summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'compiler')
-rw-r--r--compiler/Android.bp2
-rw-r--r--compiler/debug/elf_debug_info_writer.h3
-rw-r--r--compiler/debug/elf_debug_line_writer.h4
-rw-r--r--compiler/optimizing/code_generator_arm64.cc29
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.cc28
-rw-r--r--compiler/optimizing/code_generator_mips.cc23
-rw-r--r--compiler/optimizing/code_generator_mips64.cc24
-rw-r--r--compiler/optimizing/code_generator_vector_x86.cc56
-rw-r--r--compiler/optimizing/code_generator_vector_x86_64.cc58
-rw-r--r--compiler/optimizing/code_generator_x86.cc25
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc23
-rw-r--r--compiler/optimizing/instruction_builder.cc36
-rw-r--r--compiler/optimizing/instruction_simplifier_x86.cc149
-rw-r--r--compiler/optimizing/instruction_simplifier_x86.h44
-rw-r--r--compiler/optimizing/loop_analysis.cc56
-rw-r--r--compiler/optimizing/loop_analysis.h53
-rw-r--r--compiler/optimizing/loop_optimization.cc118
-rw-r--r--compiler/optimizing/loop_optimization.h15
-rw-r--r--compiler/optimizing/nodes.cc16
-rw-r--r--compiler/optimizing/nodes.h1
-rw-r--r--compiler/optimizing/nodes_vector.h4
-rw-r--r--compiler/optimizing/optimization.cc8
-rw-r--r--compiler/optimizing/optimization.h1
-rw-r--r--compiler/optimizing/optimizing_compiler.cc80
-rw-r--r--compiler/optimizing/optimizing_compiler_stats.h1
-rw-r--r--compiler/optimizing/optimizing_unit_test.h78
-rw-r--r--compiler/optimizing/select_generator.cc12
-rw-r--r--compiler/optimizing/select_generator_test.cc96
-rw-r--r--compiler/optimizing/ssa_builder.cc79
-rw-r--r--compiler/optimizing/ssa_builder.h9
-rw-r--r--compiler/optimizing/stack_map_stream.cc10
-rw-r--r--compiler/optimizing/stack_map_test.cc78
-rw-r--r--compiler/optimizing/superblock_cloner_test.cc66
-rw-r--r--compiler/utils/arm/jni_macro_assembler_arm_vixl.cc5
-rw-r--r--compiler/utils/arm64/jni_macro_assembler_arm64.cc5
-rw-r--r--compiler/utils/assembler_thumb_test_expected.cc.inc2
-rw-r--r--compiler/utils/managed_register.h4
-rw-r--r--compiler/utils/mips/assembler_mips.cc5
-rw-r--r--compiler/utils/mips64/assembler_mips64.cc5
-rw-r--r--compiler/utils/x86/assembler_x86.cc145
-rw-r--r--compiler/utils/x86/assembler_x86.h11
-rw-r--r--compiler/utils/x86/jni_macro_assembler_x86.cc3
-rw-r--r--compiler/utils/x86_64/assembler_x86_64.cc142
-rw-r--r--compiler/utils/x86_64/assembler_x86_64.h15
-rw-r--r--compiler/utils/x86_64/jni_macro_assembler_x86_64.cc3
45 files changed, 1196 insertions, 434 deletions
diff --git a/compiler/Android.bp b/compiler/Android.bp
index 11521e68d0..e1d382f6f4 100644
--- a/compiler/Android.bp
+++ b/compiler/Android.bp
@@ -161,6 +161,7 @@ art_cc_defaults {
"utils/x86/assembler_x86.cc",
"utils/x86/jni_macro_assembler_x86.cc",
"utils/x86/managed_register_x86.cc",
+ "optimizing/instruction_simplifier_x86.cc",
],
},
x86_64: {
@@ -346,6 +347,7 @@ art_cc_test {
"optimizing/parallel_move_test.cc",
"optimizing/pretty_printer_test.cc",
"optimizing/reference_type_propagation_test.cc",
+ "optimizing/select_generator_test.cc",
"optimizing/side_effects_test.cc",
"optimizing/ssa_liveness_analysis_test.cc",
"optimizing/ssa_test.cc",
diff --git a/compiler/debug/elf_debug_info_writer.h b/compiler/debug/elf_debug_info_writer.h
index f2a942f34a..bda7108c74 100644
--- a/compiler/debug/elf_debug_info_writer.h
+++ b/compiler/debug/elf_debug_info_writer.h
@@ -208,8 +208,7 @@ class ElfCompilationUnitWriter {
std::vector<DexRegisterMap> dex_reg_maps;
if (accessor.HasCodeItem() && mi->code_info != nullptr) {
code_info.reset(new CodeInfo(mi->code_info));
- for (size_t s = 0; s < code_info->GetNumberOfStackMaps(); ++s) {
- const StackMap stack_map = code_info->GetStackMapAt(s);
+ for (StackMap stack_map : code_info->GetStackMaps()) {
dex_reg_maps.push_back(code_info->GetDexRegisterMapOf(stack_map));
}
}
diff --git a/compiler/debug/elf_debug_line_writer.h b/compiler/debug/elf_debug_line_writer.h
index a7adab5506..3d78943cd0 100644
--- a/compiler/debug/elf_debug_line_writer.h
+++ b/compiler/debug/elf_debug_line_writer.h
@@ -101,9 +101,7 @@ class ElfDebugLineWriter {
// Use stack maps to create mapping table from pc to dex.
const CodeInfo code_info(mi->code_info);
pc2dex_map.reserve(code_info.GetNumberOfStackMaps());
- for (uint32_t s = 0; s < code_info.GetNumberOfStackMaps(); s++) {
- StackMap stack_map = code_info.GetStackMapAt(s);
- DCHECK(stack_map.IsValid());
+ for (StackMap stack_map : code_info.GetStackMaps()) {
const uint32_t pc = stack_map.GetNativePcOffset(isa);
const int32_t dex = stack_map.GetDexPc();
pc2dex_map.push_back({pc, dex});
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 26c9e9fa2b..d1c83ce625 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -5493,36 +5493,13 @@ void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
instruction, LocationSummary::kCallOnMainOnly);
InvokeRuntimeCallingConvention calling_convention;
- if (instruction->IsStringAlloc()) {
- locations->AddTemp(LocationFrom(kArtMethodRegister));
- } else {
- locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
- }
+ locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
}
void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) {
- // Note: if heap poisoning is enabled, the entry point takes cares
- // of poisoning the reference.
- if (instruction->IsStringAlloc()) {
- // String is allocated through StringFactory. Call NewEmptyString entry point.
- Location temp = instruction->GetLocations()->GetTemp(0);
- MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
- __ Ldr(XRegisterFrom(temp), MemOperand(tr, QUICK_ENTRY_POINT(pNewEmptyString)));
- __ Ldr(lr, MemOperand(XRegisterFrom(temp), code_offset.Int32Value()));
-
- {
- // Ensure the pc position is recorded immediately after the `blr` instruction.
- ExactAssemblyScope eas(GetVIXLAssembler(),
- kInstructionSize,
- CodeBufferCheckScope::kExactSize);
- __ blr(lr);
- codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
- }
- } else {
- codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
- CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
- }
+ codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
+ CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
}
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 9e1ef4002e..deab239362 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -5479,34 +5479,14 @@ void InstructionCodeGeneratorARMVIXL::VisitUShr(HUShr* ushr) {
void LocationsBuilderARMVIXL::VisitNewInstance(HNewInstance* instruction) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
instruction, LocationSummary::kCallOnMainOnly);
- if (instruction->IsStringAlloc()) {
- locations->AddTemp(LocationFrom(kMethodRegister));
- } else {
- InvokeRuntimeCallingConventionARMVIXL calling_convention;
- locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
- }
+ InvokeRuntimeCallingConventionARMVIXL calling_convention;
+ locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
locations->SetOut(LocationFrom(r0));
}
void InstructionCodeGeneratorARMVIXL::VisitNewInstance(HNewInstance* instruction) {
- // Note: if heap poisoning is enabled, the entry point takes cares
- // of poisoning the reference.
- if (instruction->IsStringAlloc()) {
- // String is allocated through StringFactory. Call NewEmptyString entry point.
- vixl32::Register temp = RegisterFrom(instruction->GetLocations()->GetTemp(0));
- MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize);
- GetAssembler()->LoadFromOffset(kLoadWord, temp, tr, QUICK_ENTRY_POINT(pNewEmptyString));
- GetAssembler()->LoadFromOffset(kLoadWord, lr, temp, code_offset.Int32Value());
- // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used.
- ExactAssemblyScope aas(GetVIXLAssembler(),
- vixl32::k16BitT32InstructionSizeInBytes,
- CodeBufferCheckScope::kExactSize);
- __ blx(lr);
- codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
- } else {
- codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
- CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
- }
+ codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
+ CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 11);
}
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index f0ef30ee37..c7295e4db1 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -8701,30 +8701,13 @@ void LocationsBuilderMIPS::VisitNewInstance(HNewInstance* instruction) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
instruction, LocationSummary::kCallOnMainOnly);
InvokeRuntimeCallingConvention calling_convention;
- if (instruction->IsStringAlloc()) {
- locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
- } else {
- locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
- }
+ locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
}
void InstructionCodeGeneratorMIPS::VisitNewInstance(HNewInstance* instruction) {
- // Note: if heap poisoning is enabled, the entry point takes care
- // of poisoning the reference.
- if (instruction->IsStringAlloc()) {
- // String is allocated through StringFactory. Call NewEmptyString entry point.
- Register temp = instruction->GetLocations()->GetTemp(0).AsRegister<Register>();
- MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMipsPointerSize);
- __ LoadFromOffset(kLoadWord, temp, TR, QUICK_ENTRY_POINT(pNewEmptyString));
- __ LoadFromOffset(kLoadWord, T9, temp, code_offset.Int32Value());
- __ Jalr(T9);
- __ NopIfNoReordering();
- codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
- } else {
- codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
- CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
- }
+ codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
+ CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
}
void LocationsBuilderMIPS::VisitNot(HNot* instruction) {
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 6e72727f59..ffde45e95e 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -6632,31 +6632,13 @@ void LocationsBuilderMIPS64::VisitNewInstance(HNewInstance* instruction) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
instruction, LocationSummary::kCallOnMainOnly);
InvokeRuntimeCallingConvention calling_convention;
- if (instruction->IsStringAlloc()) {
- locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
- } else {
- locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
- }
+ locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
}
void InstructionCodeGeneratorMIPS64::VisitNewInstance(HNewInstance* instruction) {
- // Note: if heap poisoning is enabled, the entry point takes care
- // of poisoning the reference.
- if (instruction->IsStringAlloc()) {
- // String is allocated through StringFactory. Call NewEmptyString entry point.
- GpuRegister temp = instruction->GetLocations()->GetTemp(0).AsRegister<GpuRegister>();
- MemberOffset code_offset =
- ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64PointerSize);
- __ LoadFromOffset(kLoadDoubleword, temp, TR, QUICK_ENTRY_POINT(pNewEmptyString));
- __ LoadFromOffset(kLoadDoubleword, T9, temp, code_offset.Int32Value());
- __ Jalr(T9);
- __ Nop();
- codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
- } else {
- codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
- CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
- }
+ codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
+ CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
}
void LocationsBuilderMIPS64::VisitNot(HNot* instruction) {
diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc
index 086ae07a06..58808769e2 100644
--- a/compiler/optimizing/code_generator_vector_x86.cc
+++ b/compiler/optimizing/code_generator_vector_x86.cc
@@ -1125,13 +1125,59 @@ static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* in
}
}
-void LocationsBuilderX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
- CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
+void LocationsBuilderX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr);
+ switch (instr->GetPackedType()) {
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ locations->SetInAt(
+ HVecMultiplyAccumulate::kInputAccumulatorIndex, Location::RequiresFpuRegister());
+ locations->SetInAt(
+ HVecMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresFpuRegister());
+ locations->SetInAt(
+ HVecMultiplyAccumulate::kInputMulRightIndex, Location::RequiresFpuRegister());
+ DCHECK_EQ(HVecMultiplyAccumulate::kInputAccumulatorIndex, 0);
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ default:
+ // VecMultiplyAccumulate is supported only for single and
+ // double precision floating points. Hence integral types
+ // are still not converted.
+ LOG(FATAL) << "Unsupported SIMD Type";
+ }
}
-void InstructionCodeGeneratorX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
- // TODO: pmaddwd?
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+void InstructionCodeGeneratorX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
+ LocationSummary* locations = instr->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister accumulator = locations->InAt(
+ HVecMultiplyAccumulate::kInputAccumulatorIndex).AsFpuRegister<XmmRegister>();
+ XmmRegister mul_left = locations->InAt(
+ HVecMultiplyAccumulate::kInputMulLeftIndex).AsFpuRegister<XmmRegister>();
+ XmmRegister mul_right = locations->InAt(
+ HVecMultiplyAccumulate::kInputMulRightIndex).AsFpuRegister<XmmRegister>();
+ switch (instr->GetPackedType()) {
+ case DataType::Type::kFloat32:
+ DCHECK_EQ(4u, instr->GetVectorLength());
+ if (instr->GetOpKind() == HInstruction::InstructionKind::kAdd)
+ __ vfmadd231ps(accumulator, mul_left, mul_right);
+ else
+ __ vfmsub231ps(accumulator, mul_left, mul_right);
+ break;
+ case DataType::Type::kFloat64:
+ DCHECK_EQ(2u, instr->GetVectorLength());
+ if (instr->GetOpKind() == HInstruction::InstructionKind::kAdd)
+ __ vfmadd231pd(accumulator, mul_left, mul_right);
+ else
+ __ vfmsub231pd(accumulator, mul_left, mul_right);
+ break;
+ default:
+
+ // VecMultiplyAccumulate is supported only for single and
+ // double precision floating points. Hence integral types
+ // are still not converted.
+ LOG(FATAL) << "Unsupported SIMD Type";
+ }
}
void LocationsBuilderX86::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc
index 4d31ab68d1..4795e86933 100644
--- a/compiler/optimizing/code_generator_vector_x86_64.cc
+++ b/compiler/optimizing/code_generator_vector_x86_64.cc
@@ -1098,13 +1098,61 @@ static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* in
}
}
-void LocationsBuilderX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
- CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
+void LocationsBuilderX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr);
+ switch (instr->GetPackedType()) {
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ locations->SetInAt(
+ HVecMultiplyAccumulate::kInputAccumulatorIndex, Location::RequiresFpuRegister());
+ locations->SetInAt(
+ HVecMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresFpuRegister());
+ locations->SetInAt(
+ HVecMultiplyAccumulate::kInputMulRightIndex, Location::RequiresFpuRegister());
+ DCHECK_EQ(HVecMultiplyAccumulate::kInputAccumulatorIndex, 0);
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ default:
+ // VecMultiplyAccumulate is supported only for single and
+ // double precision floating points. Hence integral types
+ // are still not converted.
+ LOG(FATAL) << "Unsupported SIMD type";
+ }
}
-void InstructionCodeGeneratorX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
- // TODO: pmaddwd?
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+
+void InstructionCodeGeneratorX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
+ LocationSummary* locations = instr->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister accumulator = locations->InAt(
+ HVecMultiplyAccumulate::kInputAccumulatorIndex).AsFpuRegister<XmmRegister>();
+ XmmRegister mul_left = locations->InAt(
+ HVecMultiplyAccumulate::kInputMulLeftIndex).AsFpuRegister<XmmRegister>();
+ XmmRegister mul_right = locations->InAt(
+ HVecMultiplyAccumulate::kInputMulRightIndex).AsFpuRegister<XmmRegister>();
+
+ switch (instr->GetPackedType()) {
+ case DataType::Type::kFloat32:
+ DCHECK_EQ(4u, instr->GetVectorLength());
+ if (instr->GetOpKind() == HInstruction::InstructionKind::kAdd)
+ __ vfmadd231ps(accumulator, mul_left, mul_right);
+ else
+ __ vfmsub231ps(accumulator, mul_left, mul_right);
+ break;
+ case DataType::Type::kFloat64:
+ DCHECK_EQ(2u, instr->GetVectorLength());
+ if (instr->GetOpKind() == HInstruction::InstructionKind::kAdd)
+ __ vfmadd231pd(accumulator, mul_left, mul_right);
+ else
+ __ vfmsub231pd(accumulator, mul_left, mul_right);
+ break;
+ default:
+
+ // VecMultiplyAccumulate is supported only for single and
+ // double precision floating points. Hence integral types
+ // are still not converted.
+ LOG(FATAL) << "Unsupported SIMD Type";
+ }
}
void LocationsBuilderX86_64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index d189476a48..1c0d283ef6 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -4496,29 +4496,14 @@ void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
instruction, LocationSummary::kCallOnMainOnly);
locations->SetOut(Location::RegisterLocation(EAX));
- if (instruction->IsStringAlloc()) {
- locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
- } else {
- InvokeRuntimeCallingConvention calling_convention;
- locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
- }
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
}
void InstructionCodeGeneratorX86::VisitNewInstance(HNewInstance* instruction) {
- // Note: if heap poisoning is enabled, the entry point takes cares
- // of poisoning the reference.
- if (instruction->IsStringAlloc()) {
- // String is allocated through StringFactory. Call NewEmptyString entry point.
- Register temp = instruction->GetLocations()->GetTemp(0).AsRegister<Register>();
- MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize);
- __ fs()->movl(temp, Address::Absolute(QUICK_ENTRY_POINT(pNewEmptyString)));
- __ call(Address(temp, code_offset.Int32Value()));
- codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
- } else {
- codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
- CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
- DCHECK(!codegen_->IsLeafMethod());
- }
+ codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
+ CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+ DCHECK(!codegen_->IsLeafMethod());
}
void LocationsBuilderX86::VisitNewArray(HNewArray* instruction) {
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index bea3da070a..3073be6ca7 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -4343,29 +4343,14 @@ void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
instruction, LocationSummary::kCallOnMainOnly);
InvokeRuntimeCallingConvention calling_convention;
- if (instruction->IsStringAlloc()) {
- locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
- } else {
- locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
- }
+ locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
locations->SetOut(Location::RegisterLocation(RAX));
}
void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) {
- // Note: if heap poisoning is enabled, the entry point takes cares
- // of poisoning the reference.
- if (instruction->IsStringAlloc()) {
- // String is allocated through StringFactory. Call NewEmptyString entry point.
- CpuRegister temp = instruction->GetLocations()->GetTemp(0).AsRegister<CpuRegister>();
- MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64PointerSize);
- __ gs()->movq(temp, Address::Absolute(QUICK_ENTRY_POINT(pNewEmptyString), /* no_rip */ true));
- __ call(Address(temp, code_offset.SizeValue()));
- codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
- } else {
- codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
- CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
- DCHECK(!codegen_->IsLeafMethod());
- }
+ codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
+ CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+ DCHECK(!codegen_->IsLeafMethod());
}
void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) {
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index 7d918c47ca..ba160e55f8 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -1075,6 +1075,10 @@ HNewInstance* HInstructionBuilder::BuildNewInstance(dex::TypeIndex type_index, u
if (load_class->NeedsAccessCheck() || klass->IsFinalizable() || !klass->IsInstantiable()) {
entrypoint = kQuickAllocObjectWithChecks;
}
+ // We will always be able to resolve the string class since it is in the BCP.
+ if (!klass.IsNull() && klass->IsStringClass()) {
+ entrypoint = kQuickAllocStringObject;
+ }
// Consider classes we haven't resolved as potentially finalizable.
bool finalizable = (klass == nullptr) || klass->IsFinalizable();
@@ -1308,29 +1312,25 @@ bool HInstructionBuilder::HandleStringInit(HInvoke* invoke,
HInstruction* arg_this = LoadLocal(orig_this_reg, DataType::Type::kReference);
// Replacing the NewInstance might render it redundant. Keep a list of these
- // to be visited once it is clear whether it is has remaining uses.
+ // to be visited once it is clear whether it has remaining uses.
if (arg_this->IsNewInstance()) {
ssa_builder_->AddUninitializedString(arg_this->AsNewInstance());
+ // Walk over all vregs and replace any occurrence of `arg_this` with `invoke`.
+ for (size_t vreg = 0, e = current_locals_->size(); vreg < e; ++vreg) {
+ if ((*current_locals_)[vreg] == arg_this) {
+ (*current_locals_)[vreg] = invoke;
+ }
+ }
} else {
- // The only reason a HPhi can flow in a String.<init> is when there is an
- // irreducible loop, which will create HPhi for all dex registers at loop entry.
DCHECK(arg_this->IsPhi());
- // TODO(b/109666561): Re-enable.
- // DCHECK(graph_->HasIrreducibleLoops());
- // Don't bother compiling a method in that situation. While we could look at all
- // phis related to the HNewInstance, it's not worth the trouble.
- MaybeRecordStat(compilation_stats_,
- MethodCompilationStat::kNotCompiledIrreducibleAndStringInit);
- return false;
+ // We can get a phi as input of a String.<init> if there is a loop between the
+ // allocation and the String.<init> call. As we don't know which other phis might alias
+ // with `arg_this`, we keep a record of these phis and will analyze their inputs and
+ // uses once the inputs and users are populated (in ssa_builder.cc).
+ // Note: we only do this for phis, as it is a somewhat more expensive operation than
+ // what we're doing above when the input is the `HNewInstance`.
+ ssa_builder_->AddUninitializedStringPhi(arg_this->AsPhi(), invoke);
}
-
- // Walk over all vregs and replace any occurrence of `arg_this` with `invoke`.
- for (size_t vreg = 0, e = current_locals_->size(); vreg < e; ++vreg) {
- if ((*current_locals_)[vreg] == arg_this) {
- (*current_locals_)[vreg] = invoke;
- }
- }
-
return true;
}
diff --git a/compiler/optimizing/instruction_simplifier_x86.cc b/compiler/optimizing/instruction_simplifier_x86.cc
new file mode 100644
index 0000000000..b3f67d6e84
--- /dev/null
+++ b/compiler/optimizing/instruction_simplifier_x86.cc
@@ -0,0 +1,149 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "instruction_simplifier_x86.h"
+#include "arch/x86/instruction_set_features_x86.h"
+#include "mirror/array-inl.h"
+#include "code_generator.h"
+
+
+namespace art {
+
+namespace x86 {
+
+class InstructionSimplifierX86Visitor : public HGraphVisitor {
+ public:
+ InstructionSimplifierX86Visitor(HGraph* graph,
+ CodeGeneratorX86 *codegen,
+ OptimizingCompilerStats* stats)
+ : HGraphVisitor(graph), codegen_(codegen), stats_(stats) {}
+
+ private:
+ void RecordSimplification() {
+ MaybeRecordStat(stats_, MethodCompilationStat::kInstructionSimplificationsArch);
+ }
+
+ bool HasCpuFeatureFlag() {
+ return (codegen_->GetInstructionSetFeatures().HasAVX2());
+ }
+
+ /**
+ * This simplifier uses a special-purpose BB visitor.
+ * (1) No need to visit Phi nodes.
+ * (2) Since statements can be removed in a "forward" fashion,
+ * the visitor should test if each statement is still there.
+ */
+ void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
+ // TODO: fragile iteration, provide more robust iterators?
+ for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+ HInstruction* instruction = it.Current();
+ if (instruction->IsInBlock()) {
+ instruction->Accept(this);
+ }
+ }
+ }
+
+ bool TryGenerateVecMultiplyAccumulate(HVecMul* mul);
+ void VisitVecMul(HVecMul* instruction) OVERRIDE;
+
+ CodeGeneratorX86* codegen_;
+ OptimizingCompilerStats* stats_;
+};
+
+/* generic expressions for FMA
+a = (b * c) + a
+a = (b * c) – a
+*/
+bool InstructionSimplifierX86Visitor::TryGenerateVecMultiplyAccumulate(HVecMul* mul) {
+ if (!(mul->GetPackedType() == DataType::Type::kFloat32 ||
+ mul->GetPackedType() == DataType::Type::kFloat64)) {
+ return false;
+ }
+ ArenaAllocator* allocator = mul->GetBlock()->GetGraph()->GetAllocator();
+ if (mul->HasOnlyOneNonEnvironmentUse()) {
+ HInstruction* use = mul->GetUses().front().GetUser();
+ if (use->IsVecAdd() || use->IsVecSub()) {
+ // Replace code looking like
+ // VECMUL tmp, x, y
+ // VECADD dst, acc, tmp or VECADD dst, tmp, acc
+ // or
+ // VECSUB dst, tmp, acc
+ // with
+ // VECMULACC dst, acc, x, y
+
+ // Note that we do not want to (unconditionally) perform the merge when the
+ // multiplication has multiple uses and it can be merged in all of them.
+ // Multiple uses could happen on the same control-flow path, and we would
+ // then increase the amount of work. In the future we could try to evaluate
+ // whether all uses are on different control-flow paths (using dominance and
+ // reverse-dominance information) and only perform the merge when they are.
+ HInstruction* accumulator = nullptr;
+ HVecBinaryOperation* binop = use->AsVecBinaryOperation();
+ HInstruction* binop_left = binop->GetLeft();
+ HInstruction* binop_right = binop->GetRight();
+ DCHECK_NE(binop_left, binop_right);
+ if (use->IsVecSub()) {
+ if (binop_left == mul) {
+ accumulator = binop_right;
+ }
+ } else {
+ // VecAdd
+ if (binop_right == mul) {
+ accumulator = binop_left;
+ } else {
+ DCHECK_EQ(binop_left, mul);
+ accumulator = binop_right;
+ }
+ }
+ HInstruction::InstructionKind kind =
+ use->IsVecAdd() ? HInstruction::kAdd : HInstruction::kSub;
+
+ if (accumulator != nullptr) {
+ HVecMultiplyAccumulate* mulacc =
+ new (allocator) HVecMultiplyAccumulate(allocator,
+ kind,
+ accumulator,
+ mul->GetLeft(),
+ mul->GetRight(),
+ binop->GetPackedType(),
+ binop->GetVectorLength(),
+ binop->GetDexPc());
+ binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc);
+ DCHECK(!mul->HasUses());
+ mul->GetBlock()->RemoveInstruction(mul);
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+void InstructionSimplifierX86Visitor::VisitVecMul(HVecMul* instruction) {
+ if (HasCpuFeatureFlag()) {
+ if (TryGenerateVecMultiplyAccumulate(instruction)) {
+ RecordSimplification();
+ }
+ }
+}
+
+bool InstructionSimplifierX86::Run() {
+ InstructionSimplifierX86Visitor visitor(graph_, codegen_, stats_);
+ visitor.VisitReversePostOrder();
+ return true;
+}
+
+} // namespace x86
+} // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_x86.h b/compiler/optimizing/instruction_simplifier_x86.h
new file mode 100644
index 0000000000..1fb199f728
--- /dev/null
+++ b/compiler/optimizing/instruction_simplifier_x86.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_H_
+#define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_H_
+
+#include "nodes.h"
+#include "optimization.h"
+#include "code_generator_x86.h"
+
+namespace art {
+namespace x86 {
+
+class InstructionSimplifierX86 : public HOptimization {
+ public:
+ InstructionSimplifierX86(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats)
+ : HOptimization(graph, kInstructionSimplifierX86PassName, stats),
+ codegen_(down_cast<CodeGeneratorX86*>(codegen)) {}
+
+ static constexpr const char* kInstructionSimplifierX86PassName = "instruction_simplifier_x86";
+
+ bool Run() OVERRIDE;
+
+ private:
+ CodeGeneratorX86* codegen_;
+};
+
+} // namespace x86
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_H_
diff --git a/compiler/optimizing/loop_analysis.cc b/compiler/optimizing/loop_analysis.cc
index a2124455e2..efb23e7d3e 100644
--- a/compiler/optimizing/loop_analysis.cc
+++ b/compiler/optimizing/loop_analysis.cc
@@ -17,19 +17,34 @@
#include "loop_analysis.h"
#include "base/bit_vector-inl.h"
+#include "induction_var_range.h"
namespace art {
void LoopAnalysis::CalculateLoopBasicProperties(HLoopInformation* loop_info,
- LoopAnalysisInfo* analysis_results) {
+ LoopAnalysisInfo* analysis_results,
+ int64_t trip_count) {
+ analysis_results->trip_count_ = trip_count;
+
for (HBlocksInLoopIterator block_it(*loop_info);
!block_it.Done();
block_it.Advance()) {
HBasicBlock* block = block_it.Current();
+ // Check whether one of the successor is loop exit.
for (HBasicBlock* successor : block->GetSuccessors()) {
if (!loop_info->Contains(*successor)) {
analysis_results->exits_num_++;
+
+ // We track number of invariant loop exits which correspond to HIf instruction and
+ // can be eliminated by loop peeling; other control flow instruction are ignored and will
+ // not cause loop peeling to happen as they either cannot be inside a loop, or by
+ // definition cannot be loop exits (unconditional instructions), or are not beneficial for
+ // the optimization.
+ HIf* hif = block->GetLastInstruction()->AsIf();
+ if (hif != nullptr && !loop_info->Contains(*hif->InputAt(0)->GetBlock())) {
+ analysis_results->invariant_exits_num_++;
+ }
}
}
@@ -48,20 +63,13 @@ void LoopAnalysis::CalculateLoopBasicProperties(HLoopInformation* loop_info,
}
}
-bool LoopAnalysis::HasLoopAtLeastOneInvariantExit(HLoopInformation* loop_info) {
- HGraph* graph = loop_info->GetHeader()->GetGraph();
- for (uint32_t block_id : loop_info->GetBlocks().Indexes()) {
- HBasicBlock* block = graph->GetBlocks()[block_id];
- DCHECK(block != nullptr);
- if (block->EndsWithIf()) {
- HIf* hif = block->GetLastInstruction()->AsIf();
- HInstruction* input = hif->InputAt(0);
- if (IsLoopExit(loop_info, hif) && !loop_info->Contains(*input->GetBlock())) {
- return true;
- }
- }
+int64_t LoopAnalysis::GetLoopTripCount(HLoopInformation* loop_info,
+ const InductionVarRange* induction_range) {
+ int64_t trip_count;
+ if (!induction_range->HasKnownTripCount(loop_info, &trip_count)) {
+ trip_count = LoopAnalysisInfo::kUnknownTripCount;
}
- return false;
+ return trip_count;
}
// Default implementation of loop helper; used for all targets unless a custom implementation
@@ -77,18 +85,22 @@ class ArchDefaultLoopHelper : public ArchNoOptsLoopHelper {
// Loop's maximum basic block count. Loops with higher count will not be peeled/unrolled.
static constexpr uint32_t kScalarHeuristicMaxBodySizeBlocks = 6;
- bool IsLoopNonBeneficialForScalarOpts(LoopAnalysisInfo* loop_analysis_info) const OVERRIDE {
- return loop_analysis_info->HasLongTypeInstructions() ||
- IsLoopTooBig(loop_analysis_info,
+ bool IsLoopNonBeneficialForScalarOpts(LoopAnalysisInfo* analysis_info) const OVERRIDE {
+ return analysis_info->HasLongTypeInstructions() ||
+ IsLoopTooBig(analysis_info,
kScalarHeuristicMaxBodySizeInstr,
kScalarHeuristicMaxBodySizeBlocks);
}
- uint32_t GetScalarUnrollingFactor(HLoopInformation* loop_info ATTRIBUTE_UNUSED,
- uint64_t trip_count) const OVERRIDE {
+ uint32_t GetScalarUnrollingFactor(const LoopAnalysisInfo* analysis_info) const OVERRIDE {
+ int64_t trip_count = analysis_info->GetTripCount();
+ // Unroll only loops with known trip count.
+ if (trip_count == LoopAnalysisInfo::kUnknownTripCount) {
+ return LoopAnalysisInfo::kNoUnrollingFactor;
+ }
uint32_t desired_unrolling_factor = kScalarMaxUnrollFactor;
if (trip_count < desired_unrolling_factor || trip_count % desired_unrolling_factor != 0) {
- return kNoUnrollingFactor;
+ return LoopAnalysisInfo::kNoUnrollingFactor;
}
return desired_unrolling_factor;
@@ -136,12 +148,12 @@ class Arm64LoopHelper : public ArchDefaultLoopHelper {
// TODO: Unroll loops with unknown trip count.
DCHECK_NE(vector_length, 0u);
if (trip_count < (2 * vector_length + max_peel)) {
- return kNoUnrollingFactor;
+ return LoopAnalysisInfo::kNoUnrollingFactor;
}
// Don't unroll for large loop body size.
uint32_t instruction_count = block->GetInstructions().CountSize();
if (instruction_count >= kArm64SimdHeuristicMaxBodySizeInstr) {
- return kNoUnrollingFactor;
+ return LoopAnalysisInfo::kNoUnrollingFactor;
}
// Find a beneficial unroll factor with the following restrictions:
// - At least one iteration of the transformed loop should be executed.
diff --git a/compiler/optimizing/loop_analysis.h b/compiler/optimizing/loop_analysis.h
index 7f321b73c8..bcb7b70494 100644
--- a/compiler/optimizing/loop_analysis.h
+++ b/compiler/optimizing/loop_analysis.h
@@ -21,26 +21,33 @@
namespace art {
+class InductionVarRange;
class LoopAnalysis;
-// No loop unrolling factor (just one copy of the loop-body).
-static constexpr uint32_t kNoUnrollingFactor = 1;
-
// Class to hold cached information on properties of the loop.
class LoopAnalysisInfo : public ValueObject {
public:
+ // No loop unrolling factor (just one copy of the loop-body).
+ static constexpr uint32_t kNoUnrollingFactor = 1;
+ // Used for unknown and non-constant trip counts (see InductionVarRange::HasKnownTripCount).
+ static constexpr int64_t kUnknownTripCount = -1;
+
explicit LoopAnalysisInfo(HLoopInformation* loop_info)
- : bb_num_(0),
+ : trip_count_(kUnknownTripCount),
+ bb_num_(0),
instr_num_(0),
exits_num_(0),
+ invariant_exits_num_(0),
has_instructions_preventing_scalar_peeling_(false),
has_instructions_preventing_scalar_unrolling_(false),
has_long_type_instructions_(false),
loop_info_(loop_info) {}
+ int64_t GetTripCount() const { return trip_count_; }
size_t GetNumberOfBasicBlocks() const { return bb_num_; }
size_t GetNumberOfInstructions() const { return instr_num_; }
size_t GetNumberOfExits() const { return exits_num_; }
+ size_t GetNumberOfInvariantExits() const { return invariant_exits_num_; }
bool HasInstructionsPreventingScalarPeeling() const {
return has_instructions_preventing_scalar_peeling_;
@@ -50,19 +57,27 @@ class LoopAnalysisInfo : public ValueObject {
return has_instructions_preventing_scalar_unrolling_;
}
+ bool HasInstructionsPreventingScalarOpts() const {
+ return HasInstructionsPreventingScalarPeeling() || HasInstructionsPreventingScalarUnrolling();
+ }
+
bool HasLongTypeInstructions() const {
return has_long_type_instructions_;
}
- const HLoopInformation* GetLoopInfo() const { return loop_info_; }
+ HLoopInformation* GetLoopInfo() const { return loop_info_; }
private:
+ // Trip count of the loop if known, kUnknownTripCount otherwise.
+ int64_t trip_count_;
// Number of basic blocks in the loop body.
size_t bb_num_;
// Number of instructions in the loop body.
size_t instr_num_;
// Number of loop's exits.
size_t exits_num_;
+ // Number of "if" loop exits (with HIf instruction) whose condition is loop-invariant.
+ size_t invariant_exits_num_;
// Whether the loop has instructions which make scalar loop peeling non-beneficial.
bool has_instructions_preventing_scalar_peeling_;
// Whether the loop has instructions which make scalar loop unrolling non-beneficial.
@@ -72,7 +87,7 @@ class LoopAnalysisInfo : public ValueObject {
bool has_long_type_instructions_;
// Corresponding HLoopInformation.
- const HLoopInformation* loop_info_;
+ HLoopInformation* loop_info_;
friend class LoopAnalysis;
};
@@ -84,20 +99,12 @@ class LoopAnalysis : public ValueObject {
// Calculates loops basic properties like body size, exits number, etc. and fills
// 'analysis_results' with this information.
static void CalculateLoopBasicProperties(HLoopInformation* loop_info,
- LoopAnalysisInfo* analysis_results);
+ LoopAnalysisInfo* analysis_results,
+ int64_t trip_count);
- // Returns whether the loop has at least one loop invariant exit.
- static bool HasLoopAtLeastOneInvariantExit(HLoopInformation* loop_info);
-
- // Returns whether HIf's true or false successor is outside the specified loop.
- //
- // Prerequisite: HIf must be in the specified loop.
- static bool IsLoopExit(HLoopInformation* loop_info, const HIf* hif) {
- DCHECK(loop_info->Contains(*hif->GetBlock()));
- HBasicBlock* true_succ = hif->IfTrueSuccessor();
- HBasicBlock* false_succ = hif->IfFalseSuccessor();
- return (!loop_info->Contains(*true_succ) || !loop_info->Contains(*false_succ));
- }
+ // Returns the trip count of the loop if it is known and kUnknownTripCount otherwise.
+ static int64_t GetLoopTripCount(HLoopInformation* loop_info,
+ const InductionVarRange* induction_range);
private:
// Returns whether an instruction makes scalar loop peeling/unrolling non-beneficial.
@@ -143,9 +150,9 @@ class ArchNoOptsLoopHelper : public ArenaObject<kArenaAllocOptimization> {
// Returns optimal scalar unrolling factor for the loop.
//
// Returns kNoUnrollingFactor by default, should be overridden by particular target loop helper.
- virtual uint32_t GetScalarUnrollingFactor(HLoopInformation* loop_info ATTRIBUTE_UNUSED,
- uint64_t trip_count ATTRIBUTE_UNUSED) const {
- return kNoUnrollingFactor;
+ virtual uint32_t GetScalarUnrollingFactor(
+ const LoopAnalysisInfo* analysis_info ATTRIBUTE_UNUSED) const {
+ return LoopAnalysisInfo::kNoUnrollingFactor;
}
// Returns whether scalar loop peeling is enabled,
@@ -160,7 +167,7 @@ class ArchNoOptsLoopHelper : public ArenaObject<kArenaAllocOptimization> {
int64_t trip_count ATTRIBUTE_UNUSED,
uint32_t max_peel ATTRIBUTE_UNUSED,
uint32_t vector_length ATTRIBUTE_UNUSED) const {
- return kNoUnrollingFactor;
+ return LoopAnalysisInfo::kNoUnrollingFactor;
}
};
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 72aa25302e..440cd3351e 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -744,64 +744,74 @@ bool HLoopOptimization::TryOptimizeInnerLoopFinite(LoopNode* node) {
}
bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) {
- return TryOptimizeInnerLoopFinite(node) ||
- TryPeelingForLoopInvariantExitsElimination(node) ||
- TryUnrollingForBranchPenaltyReduction(node);
+ return TryOptimizeInnerLoopFinite(node) || TryPeelingAndUnrolling(node);
}
//
-// Loop unrolling: generic part methods.
+// Scalar loop peeling and unrolling: generic part methods.
//
-bool HLoopOptimization::TryUnrollingForBranchPenaltyReduction(LoopNode* node) {
- // Don't run peeling/unrolling if compiler_options_ is nullptr (i.e., running under tests)
- // as InstructionSet is needed.
- if (compiler_options_ == nullptr) {
+bool HLoopOptimization::TryUnrollingForBranchPenaltyReduction(LoopAnalysisInfo* analysis_info,
+ bool generate_code) {
+ if (analysis_info->GetNumberOfExits() > 1) {
return false;
}
- HLoopInformation* loop_info = node->loop_info;
- int64_t trip_count = 0;
- // Only unroll loops with a known tripcount.
- if (!induction_range_.HasKnownTripCount(loop_info, &trip_count)) {
+ uint32_t unrolling_factor = arch_loop_helper_->GetScalarUnrollingFactor(analysis_info);
+ if (unrolling_factor == LoopAnalysisInfo::kNoUnrollingFactor) {
return false;
}
- uint32_t unrolling_factor = arch_loop_helper_->GetScalarUnrollingFactor(loop_info, trip_count);
- if (unrolling_factor == kNoUnrollingFactor) {
- return false;
- }
+ if (generate_code) {
+ // TODO: support other unrolling factors.
+ DCHECK_EQ(unrolling_factor, 2u);
- LoopAnalysisInfo loop_analysis_info(loop_info);
- LoopAnalysis::CalculateLoopBasicProperties(loop_info, &loop_analysis_info);
+ // Perform unrolling.
+ HLoopInformation* loop_info = analysis_info->GetLoopInfo();
+ PeelUnrollSimpleHelper helper(loop_info);
+ helper.DoUnrolling();
- // Check "IsLoopClonable" last as it can be time-consuming.
- if (loop_analysis_info.HasInstructionsPreventingScalarUnrolling() ||
- arch_loop_helper_->IsLoopNonBeneficialForScalarOpts(&loop_analysis_info) ||
- (loop_analysis_info.GetNumberOfExits() > 1) ||
- !PeelUnrollHelper::IsLoopClonable(loop_info)) {
- return false;
+ // Remove the redundant loop check after unrolling.
+ HIf* copy_hif =
+ helper.GetBasicBlockMap()->Get(loop_info->GetHeader())->GetLastInstruction()->AsIf();
+ int32_t constant = loop_info->Contains(*copy_hif->IfTrueSuccessor()) ? 1 : 0;
+ copy_hif->ReplaceInput(graph_->GetIntConstant(constant), 0u);
}
+ return true;
+}
- // TODO: support other unrolling factors.
- DCHECK_EQ(unrolling_factor, 2u);
+bool HLoopOptimization::TryPeelingForLoopInvariantExitsElimination(LoopAnalysisInfo* analysis_info,
+ bool generate_code) {
+ HLoopInformation* loop_info = analysis_info->GetLoopInfo();
+ if (!arch_loop_helper_->IsLoopPeelingEnabled()) {
+ return false;
+ }
- // Perform unrolling.
- PeelUnrollSimpleHelper helper(loop_info);
- helper.DoUnrolling();
+ if (analysis_info->GetNumberOfInvariantExits() == 0) {
+ return false;
+ }
- // Remove the redundant loop check after unrolling.
- HIf* copy_hif =
- helper.GetBasicBlockMap()->Get(loop_info->GetHeader())->GetLastInstruction()->AsIf();
- int32_t constant = loop_info->Contains(*copy_hif->IfTrueSuccessor()) ? 1 : 0;
- copy_hif->ReplaceInput(graph_->GetIntConstant(constant), 0u);
+ if (generate_code) {
+ // Perform peeling.
+ PeelUnrollSimpleHelper helper(loop_info);
+ helper.DoPeeling();
+
+ // Statically evaluate loop check after peeling for loop invariant condition.
+ const SuperblockCloner::HInstructionMap* hir_map = helper.GetInstructionMap();
+ for (auto entry : *hir_map) {
+ HInstruction* copy = entry.second;
+ if (copy->IsIf()) {
+ TryToEvaluateIfCondition(copy->AsIf(), graph_);
+ }
+ }
+ }
return true;
}
-bool HLoopOptimization::TryPeelingForLoopInvariantExitsElimination(LoopNode* node) {
+bool HLoopOptimization::TryPeelingAndUnrolling(LoopNode* node) {
// Don't run peeling/unrolling if compiler_options_ is nullptr (i.e., running under tests)
// as InstructionSet is needed.
if (compiler_options_ == nullptr) {
@@ -809,35 +819,27 @@ bool HLoopOptimization::TryPeelingForLoopInvariantExitsElimination(LoopNode* nod
}
HLoopInformation* loop_info = node->loop_info;
- // Check 'IsLoopClonable' the last as it might be time-consuming.
- if (!arch_loop_helper_->IsLoopPeelingEnabled()) {
+ int64_t trip_count = LoopAnalysis::GetLoopTripCount(loop_info, &induction_range_);
+ LoopAnalysisInfo analysis_info(loop_info);
+ LoopAnalysis::CalculateLoopBasicProperties(loop_info, &analysis_info, trip_count);
+
+ if (analysis_info.HasInstructionsPreventingScalarOpts() ||
+ arch_loop_helper_->IsLoopNonBeneficialForScalarOpts(&analysis_info)) {
return false;
}
- LoopAnalysisInfo loop_analysis_info(loop_info);
- LoopAnalysis::CalculateLoopBasicProperties(loop_info, &loop_analysis_info);
-
- // Check "IsLoopClonable" last as it can be time-consuming.
- if (loop_analysis_info.HasInstructionsPreventingScalarPeeling() ||
- arch_loop_helper_->IsLoopNonBeneficialForScalarOpts(&loop_analysis_info) ||
- !LoopAnalysis::HasLoopAtLeastOneInvariantExit(loop_info) ||
- !PeelUnrollHelper::IsLoopClonable(loop_info)) {
+ if (!TryPeelingForLoopInvariantExitsElimination(&analysis_info, /*generate_code*/ false) &&
+ !TryUnrollingForBranchPenaltyReduction(&analysis_info, /*generate_code*/ false)) {
return false;
}
- // Perform peeling.
- PeelUnrollSimpleHelper helper(loop_info);
- helper.DoPeeling();
-
- const SuperblockCloner::HInstructionMap* hir_map = helper.GetInstructionMap();
- for (auto entry : *hir_map) {
- HInstruction* copy = entry.second;
- if (copy->IsIf()) {
- TryToEvaluateIfCondition(copy->AsIf(), graph_);
- }
+ // Run 'IsLoopClonable' the last as it might be time-consuming.
+ if (!PeelUnrollHelper::IsLoopClonable(loop_info)) {
+ return false;
}
- return true;
+ return TryPeelingForLoopInvariantExitsElimination(&analysis_info) ||
+ TryUnrollingForBranchPenaltyReduction(&analysis_info);
}
//
@@ -1076,7 +1078,7 @@ void HLoopOptimization::Vectorize(LoopNode* node,
vector_index_,
ptc,
graph_->GetConstant(induc_type, 1),
- kNoUnrollingFactor);
+ LoopAnalysisInfo::kNoUnrollingFactor);
}
// Generate vector loop, possibly further unrolled:
@@ -1103,7 +1105,7 @@ void HLoopOptimization::Vectorize(LoopNode* node,
vector_index_,
stc,
graph_->GetConstant(induc_type, 1),
- kNoUnrollingFactor);
+ LoopAnalysisInfo::kNoUnrollingFactor);
}
// Link reductions to their final uses.
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index 9743b25259..bc4792458b 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -144,12 +144,19 @@ class HLoopOptimization : public HOptimization {
bool OptimizeInnerLoop(LoopNode* node);
// Tries to apply loop unrolling for branch penalty reduction and better instruction scheduling
- // opportunities. Returns whether transformation happened.
- bool TryUnrollingForBranchPenaltyReduction(LoopNode* loop_node);
+ // opportunities. Returns whether transformation happened. 'generate_code' determines whether the
+ // optimization should be actually applied.
+ bool TryUnrollingForBranchPenaltyReduction(LoopAnalysisInfo* analysis_info,
+ bool generate_code = true);
// Tries to apply loop peeling for loop invariant exits elimination. Returns whether
- // transformation happened.
- bool TryPeelingForLoopInvariantExitsElimination(LoopNode* loop_node);
+ // transformation happened. 'generate_code' determines whether the optimization should be
+ // actually applied.
+ bool TryPeelingForLoopInvariantExitsElimination(LoopAnalysisInfo* analysis_info,
+ bool generate_code = true);
+
+ // Tries to apply scalar loop peeling and unrolling.
+ bool TryPeelingAndUnrolling(LoopNode* node);
//
// Vectorization analysis and synthesis.
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 661f66a34c..50ce7559f5 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -1305,6 +1305,19 @@ void HInstruction::ReplaceUsesDominatedBy(HInstruction* dominator, HInstruction*
}
}
+void HInstruction::ReplaceEnvUsesDominatedBy(HInstruction* dominator, HInstruction* replacement) {
+ const HUseList<HEnvironment*>& uses = GetEnvUses();
+ for (auto it = uses.begin(), end = uses.end(); it != end; /* ++it below */) {
+ HEnvironment* user = it->GetUser();
+ size_t index = it->GetIndex();
+ // Increment `it` now because `*it` may disappear thanks to user->ReplaceInput().
+ ++it;
+ if (dominator->StrictlyDominates(user->GetHolder())) {
+ user->ReplaceInput(replacement, index);
+ }
+ }
+}
+
void HInstruction::ReplaceInput(HInstruction* replacement, size_t index) {
HUserRecord<HInstruction*> input_use = InputRecordAt(index);
if (input_use.GetInstruction() == replacement) {
@@ -2879,8 +2892,7 @@ void HInvoke::SetIntrinsic(Intrinsics intrinsic,
}
bool HNewInstance::IsStringAlloc() const {
- ScopedObjectAccess soa(Thread::Current());
- return GetReferenceTypeInfo().IsStringClass();
+ return GetEntrypoint() == kQuickAllocStringObject;
}
bool HInvoke::NeedsEnvironment() const {
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 825779989c..cd8d07a17a 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -2217,6 +2217,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
void ReplaceWith(HInstruction* instruction);
void ReplaceUsesDominatedBy(HInstruction* dominator, HInstruction* replacement);
+ void ReplaceEnvUsesDominatedBy(HInstruction* dominator, HInstruction* replacement);
void ReplaceInput(HInstruction* replacement, size_t index);
// This is almost the same as doing `ReplaceWith()`. But in this helper, the
diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h
index c5e9a8d036..b4f9993ad6 100644
--- a/compiler/optimizing/nodes_vector.h
+++ b/compiler/optimizing/nodes_vector.h
@@ -958,6 +958,10 @@ class HVecMultiplyAccumulate FINAL : public HVecOperation {
SetRawInputAt(2, mul_right);
}
+ static constexpr int kInputAccumulatorIndex = 0;
+ static constexpr int kInputMulLeftIndex = 1;
+ static constexpr int kInputMulRightIndex = 2;
+
bool CanBeMoved() const OVERRIDE { return true; }
bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
diff --git a/compiler/optimizing/optimization.cc b/compiler/optimizing/optimization.cc
index 142ddb5fbb..3c803ab627 100644
--- a/compiler/optimizing/optimization.cc
+++ b/compiler/optimizing/optimization.cc
@@ -28,6 +28,7 @@
#endif
#ifdef ART_ENABLE_CODEGEN_x86
#include "pc_relative_fixups_x86.h"
+#include "instruction_simplifier_x86.h"
#endif
#if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64)
#include "x86_memory_gen.h"
@@ -121,6 +122,8 @@ const char* OptimizationPassName(OptimizationPass pass) {
#if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64)
case OptimizationPass::kX86MemoryOperandGeneration:
return x86::X86MemoryOperandGeneration::kX86MemoryOperandGenerationPassName;
+ case OptimizationPass::kInstructionSimplifierX86:
+ return x86::InstructionSimplifierX86::kInstructionSimplifierX86PassName;
#endif
case OptimizationPass::kNone:
LOG(FATAL) << "kNone does not represent an actual pass";
@@ -163,6 +166,7 @@ OptimizationPass OptimizationPassByName(const std::string& pass_name) {
#ifdef ART_ENABLE_CODEGEN_x86
X(OptimizationPass::kPcRelativeFixupsX86);
X(OptimizationPass::kX86MemoryOperandGeneration);
+ X(OptimizationPass::kInstructionSimplifierX86);
#endif
LOG(FATAL) << "Cannot find optimization " << pass_name;
UNREACHABLE();
@@ -323,6 +327,10 @@ ArenaVector<HOptimization*> ConstructOptimizations(
DCHECK(alt_name == nullptr) << "arch-specific pass does not support alternative name";
opt = new (allocator) x86::X86MemoryOperandGeneration(graph, codegen, stats);
break;
+ case OptimizationPass::kInstructionSimplifierX86:
+ DCHECK(alt_name == nullptr) << "arch-specific pass does not support alternative name";
+ opt = new (allocator) x86::InstructionSimplifierX86(graph, codegen, stats);
+ break;
#endif
case OptimizationPass::kNone:
LOG(FATAL) << "kNone does not represent an actual pass";
diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h
index 88b283cebf..a9fafa0864 100644
--- a/compiler/optimizing/optimization.h
+++ b/compiler/optimizing/optimization.h
@@ -101,6 +101,7 @@ enum class OptimizationPass {
#endif
#if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64)
kX86MemoryOperandGeneration,
+ kInstructionSimplifierX86,
#endif
kNone,
kLast = kNone
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 84863e4357..f4bafcbef0 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -61,6 +61,7 @@
#include "ssa_builder.h"
#include "ssa_liveness_analysis.h"
#include "ssa_phi_elimination.h"
+#include "stack_map_stream.h"
#include "utils/assembler.h"
#include "verifier/verifier_compiler_binding.h"
@@ -530,7 +531,8 @@ bool OptimizingCompiler::RunArchOptimizations(HGraph* graph,
OptDef(OptimizationPass::kSideEffectsAnalysis),
OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"),
OptDef(OptimizationPass::kPcRelativeFixupsX86),
- OptDef(OptimizationPass::kX86MemoryOperandGeneration)
+ OptDef(OptimizationPass::kX86MemoryOperandGeneration),
+ OptDef(OptimizationPass::kInstructionSimplifierX86)
};
return RunOptimizations(graph,
codegen,
@@ -545,7 +547,8 @@ bool OptimizingCompiler::RunArchOptimizations(HGraph* graph,
OptimizationDef x86_64_optimizations[] = {
OptDef(OptimizationPass::kSideEffectsAnalysis),
OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"),
- OptDef(OptimizationPass::kX86MemoryOperandGeneration)
+ OptDef(OptimizationPass::kX86MemoryOperandGeneration),
+ OptDef(OptimizationPass::kInstructionSimplifierX86)
};
return RunOptimizations(graph,
codegen,
@@ -846,23 +849,23 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator,
case kAnalysisSkipped: {
MaybeRecordStat(compilation_stats_.get(),
MethodCompilationStat::kNotCompiledSkipped);
- }
break;
+ }
case kAnalysisInvalidBytecode: {
MaybeRecordStat(compilation_stats_.get(),
MethodCompilationStat::kNotCompiledInvalidBytecode);
- }
break;
+ }
case kAnalysisFailThrowCatchLoop: {
MaybeRecordStat(compilation_stats_.get(),
MethodCompilationStat::kNotCompiledThrowCatchLoop);
- }
break;
+ }
case kAnalysisFailAmbiguousArrayOp: {
MaybeRecordStat(compilation_stats_.get(),
MethodCompilationStat::kNotCompiledAmbiguousArrayOp);
- }
break;
+ }
case kAnalysisSuccess:
UNREACHABLE();
}
@@ -1104,14 +1107,35 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
return compiled_method;
}
+static void CreateJniStackMap(ArenaStack* arena_stack,
+ const JniCompiledMethod& jni_compiled_method,
+ /* out */ ArenaVector<uint8_t>* stack_map,
+ /* out */ ArenaVector<uint8_t>* method_info) {
+ ScopedArenaAllocator allocator(arena_stack);
+ StackMapStream stack_map_stream(&allocator, jni_compiled_method.GetInstructionSet());
+ stack_map_stream.BeginMethod(
+ jni_compiled_method.GetFrameSize(),
+ jni_compiled_method.GetCoreSpillMask(),
+ jni_compiled_method.GetFpSpillMask(),
+ /* num_dex_registers */ 0);
+ stack_map_stream.EndMethod();
+ stack_map->resize(stack_map_stream.PrepareForFillIn());
+ method_info->resize(stack_map_stream.ComputeMethodInfoSize());
+ stack_map_stream.FillInCodeInfo(MemoryRegion(stack_map->data(), stack_map->size()));
+ stack_map_stream.FillInMethodInfo(MemoryRegion(method_info->data(), method_info->size()));
+}
+
CompiledMethod* OptimizingCompiler::JniCompile(uint32_t access_flags,
uint32_t method_idx,
const DexFile& dex_file,
Handle<mirror::DexCache> dex_cache) const {
+ Runtime* runtime = Runtime::Current();
+ ArenaAllocator allocator(runtime->GetArenaPool());
+ ArenaStack arena_stack(runtime->GetArenaPool());
+
const CompilerOptions& compiler_options = GetCompilerDriver()->GetCompilerOptions();
if (compiler_options.IsBootImage()) {
ScopedObjectAccess soa(Thread::Current());
- Runtime* runtime = Runtime::Current();
ArtMethod* method = runtime->GetClassLinker()->LookupResolvedMethod(
method_idx, dex_cache.Get(), /* class_loader */ nullptr);
if (method != nullptr && UNLIKELY(method->IsIntrinsic())) {
@@ -1126,8 +1150,6 @@ CompiledMethod* OptimizingCompiler::JniCompile(uint32_t access_flags,
access_flags,
/* verified_method */ nullptr,
dex_cache);
- ArenaAllocator allocator(runtime->GetArenaPool());
- ArenaStack arena_stack(runtime->GetArenaPool());
CodeVectorAllocator code_allocator(&allocator);
VariableSizedHandleScope handles(soa.Self());
// Go to native so that we don't block GC during compilation.
@@ -1153,6 +1175,10 @@ CompiledMethod* OptimizingCompiler::JniCompile(uint32_t access_flags,
JniCompiledMethod jni_compiled_method = ArtQuickJniCompileMethod(
compiler_options, access_flags, method_idx, dex_file);
MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kCompiledNativeStub);
+
+ ArenaVector<uint8_t> stack_map(allocator.Adapter(kArenaAllocStackMaps));
+ ArenaVector<uint8_t> method_info(allocator.Adapter(kArenaAllocStackMaps));
+ CreateJniStackMap(&arena_stack, jni_compiled_method, &stack_map, &method_info);
return CompiledMethod::SwapAllocCompiledMethod(
GetCompilerDriver(),
jni_compiled_method.GetInstructionSet(),
@@ -1160,8 +1186,8 @@ CompiledMethod* OptimizingCompiler::JniCompile(uint32_t access_flags,
jni_compiled_method.GetFrameSize(),
jni_compiled_method.GetCoreSpillMask(),
jni_compiled_method.GetFpSpillMask(),
- /* method_info */ ArrayRef<const uint8_t>(),
- /* vmap_table */ ArrayRef<const uint8_t>(),
+ ArrayRef<const uint8_t>(method_info),
+ ArrayRef<const uint8_t>(stack_map),
jni_compiled_method.GetCfi(),
/* patches */ ArrayRef<const linker::LinkerPatch>());
}
@@ -1221,18 +1247,42 @@ bool OptimizingCompiler::JitCompile(Thread* self,
ScopedNullHandle<mirror::ObjectArray<mirror::Object>> roots;
ArenaSet<ArtMethod*, std::less<ArtMethod*>> cha_single_implementation_list(
allocator.Adapter(kArenaAllocCHA));
+ ArenaVector<uint8_t> stack_map(allocator.Adapter(kArenaAllocStackMaps));
+ ArenaVector<uint8_t> method_info(allocator.Adapter(kArenaAllocStackMaps));
+ ArenaStack arena_stack(runtime->GetJitArenaPool());
+ // StackMapStream is large and it does not fit into this frame, so we need helper method.
+ // TODO: Try to avoid the extra memory copy that results from this.
+ CreateJniStackMap(&arena_stack, jni_compiled_method, &stack_map, &method_info);
+ uint8_t* stack_map_data = nullptr;
+ uint8_t* method_info_data = nullptr;
+ uint8_t* roots_data = nullptr;
+ uint32_t data_size = code_cache->ReserveData(self,
+ stack_map.size(),
+ method_info.size(),
+ /* number_of_roots */ 0,
+ method,
+ &stack_map_data,
+ &method_info_data,
+ &roots_data);
+ if (stack_map_data == nullptr || roots_data == nullptr) {
+ MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kJitOutOfMemoryForCommit);
+ return false;
+ }
+ memcpy(stack_map_data, stack_map.data(), stack_map.size());
+ memcpy(method_info_data, method_info.data(), method_info.size());
+
const void* code = code_cache->CommitCode(
self,
method,
- /* stack_map_data */ nullptr,
- /* method_info_data */ nullptr,
- /* roots_data */ nullptr,
+ stack_map_data,
+ method_info_data,
+ roots_data,
jni_compiled_method.GetFrameSize(),
jni_compiled_method.GetCoreSpillMask(),
jni_compiled_method.GetFpSpillMask(),
jni_compiled_method.GetCode().data(),
jni_compiled_method.GetCode().size(),
- /* data_size */ 0u,
+ data_size,
osr,
roots,
/* has_should_deoptimize_flag */ false,
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index f246228074..9a26f2f6c4 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -50,7 +50,6 @@ enum class MethodCompilationStat {
kNotCompiledThrowCatchLoop,
kNotCompiledAmbiguousArrayOp,
kNotCompiledHugeMethod,
- kNotCompiledIrreducibleAndStringInit,
kNotCompiledLargeMethodNoBranches,
kNotCompiledMalformedOpcode,
kNotCompiledNoCodegen,
diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h
index a627f65ed4..f903f82d50 100644
--- a/compiler/optimizing/optimizing_unit_test.h
+++ b/compiler/optimizing/optimizing_unit_test.h
@@ -29,6 +29,7 @@
#include "dex/dex_instruction.h"
#include "dex/standard_dex_file.h"
#include "driver/dex_compilation_unit.h"
+#include "graph_checker.h"
#include "handle_scope-inl.h"
#include "mirror/class_loader.h"
#include "mirror/dex_cache.h"
@@ -129,10 +130,12 @@ class OptimizingUnitTestHelper {
// Create the dex file based on the fake data. Call the constructor so that we can use virtual
// functions. Don't use the arena for the StandardDexFile otherwise the dex location leaks.
dex_files_.emplace_back(new StandardDexFile(
- std::make_unique<NonOwningDexFileContainer>(dex_data, sizeof(StandardDexFile::Header)),
+ dex_data,
+ sizeof(StandardDexFile::Header),
"no_location",
/*location_checksum*/ 0,
- /*oat_dex_file*/ nullptr));
+ /*oat_dex_file*/ nullptr,
+ /*container*/ nullptr));
return new (allocator) HGraph(
allocator,
@@ -185,6 +188,77 @@ class OptimizingUnitTestHelper {
class OptimizingUnitTest : public CommonCompilerTest, public OptimizingUnitTestHelper {};
+// OptimizingUnitTest with some handy functions to ease the graph creation.
+class ImprovedOptimizingUnitTest : public OptimizingUnitTest {
+ public:
+ ImprovedOptimizingUnitTest() : graph_(CreateGraph()),
+ entry_block_(nullptr),
+ return_block_(nullptr),
+ exit_block_(nullptr),
+ parameter_(nullptr) {}
+
+ virtual ~ImprovedOptimizingUnitTest() {}
+
+ void InitGraph() {
+ entry_block_ = new (GetAllocator()) HBasicBlock(graph_);
+ graph_->AddBlock(entry_block_);
+ graph_->SetEntryBlock(entry_block_);
+
+ return_block_ = new (GetAllocator()) HBasicBlock(graph_);
+ graph_->AddBlock(return_block_);
+
+ exit_block_ = new (GetAllocator()) HBasicBlock(graph_);
+ graph_->AddBlock(exit_block_);
+ graph_->SetExitBlock(exit_block_);
+
+ entry_block_->AddSuccessor(return_block_);
+ return_block_->AddSuccessor(exit_block_);
+
+ parameter_ = new (GetAllocator()) HParameterValue(graph_->GetDexFile(),
+ dex::TypeIndex(0),
+ 0,
+ DataType::Type::kInt32);
+ entry_block_->AddInstruction(parameter_);
+ return_block_->AddInstruction(new (GetAllocator()) HReturnVoid());
+ exit_block_->AddInstruction(new (GetAllocator()) HExit());
+ }
+
+ bool CheckGraph() {
+ GraphChecker checker(graph_);
+ checker.Run();
+ if (!checker.IsValid()) {
+ for (const std::string& error : checker.GetErrors()) {
+ std::cout << error << std::endl;
+ }
+ return false;
+ }
+ return true;
+ }
+
+ HEnvironment* ManuallyBuildEnvFor(HInstruction* instruction,
+ ArenaVector<HInstruction*>* current_locals) {
+ HEnvironment* environment = new (GetAllocator()) HEnvironment(
+ (GetAllocator()),
+ current_locals->size(),
+ graph_->GetArtMethod(),
+ instruction->GetDexPc(),
+ instruction);
+
+ environment->CopyFrom(ArrayRef<HInstruction* const>(*current_locals));
+ instruction->SetRawEnvironment(environment);
+ return environment;
+ }
+
+ protected:
+ HGraph* graph_;
+
+ HBasicBlock* entry_block_;
+ HBasicBlock* return_block_;
+ HBasicBlock* exit_block_;
+
+ HInstruction* parameter_;
+};
+
// Naive string diff data type.
typedef std::list<std::pair<std::string, std::string>> diff_t;
diff --git a/compiler/optimizing/select_generator.cc b/compiler/optimizing/select_generator.cc
index 0d0f7cc748..dcc7f77fc2 100644
--- a/compiler/optimizing/select_generator.cc
+++ b/compiler/optimizing/select_generator.cc
@@ -45,7 +45,9 @@ static bool IsSimpleBlock(HBasicBlock* block) {
HInstruction* instruction = it.Current();
if (instruction->IsControlFlow()) {
return instruction->IsGoto() || instruction->IsReturn();
- } else if (instruction->CanBeMoved() && !instruction->HasSideEffects()) {
+ } else if (instruction->CanBeMoved() &&
+ !instruction->HasSideEffects() &&
+ !instruction->CanThrow()) {
if (instruction->IsSelect() &&
instruction->AsSelect()->GetCondition()->GetBlock() == block) {
// Count one HCondition and HSelect in the same block as a single instruction.
@@ -119,10 +121,14 @@ bool HSelectGenerator::Run() {
// TODO(dbrazdil): This puts an instruction between If and its condition.
// Implement moving of conditions to first users if possible.
while (!true_block->IsSingleGoto() && !true_block->IsSingleReturn()) {
- true_block->GetFirstInstruction()->MoveBefore(if_instruction);
+ HInstruction* instr = true_block->GetFirstInstruction();
+ DCHECK(!instr->CanThrow());
+ instr->MoveBefore(if_instruction);
}
while (!false_block->IsSingleGoto() && !false_block->IsSingleReturn()) {
- false_block->GetFirstInstruction()->MoveBefore(if_instruction);
+ HInstruction* instr = false_block->GetFirstInstruction();
+ DCHECK(!instr->CanThrow());
+ instr->MoveBefore(if_instruction);
}
DCHECK(true_block->IsSingleGoto() || true_block->IsSingleReturn());
DCHECK(false_block->IsSingleGoto() || false_block->IsSingleReturn());
diff --git a/compiler/optimizing/select_generator_test.cc b/compiler/optimizing/select_generator_test.cc
new file mode 100644
index 0000000000..6e6549737c
--- /dev/null
+++ b/compiler/optimizing/select_generator_test.cc
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "select_generator.h"
+
+#include "base/arena_allocator.h"
+#include "builder.h"
+#include "nodes.h"
+#include "optimizing_unit_test.h"
+#include "side_effects_analysis.h"
+
+namespace art {
+
+class SelectGeneratorTest : public ImprovedOptimizingUnitTest {
+ public:
+ void ConstructBasicGraphForSelect(HInstruction* instr) {
+ HBasicBlock* if_block = new (GetAllocator()) HBasicBlock(graph_);
+ HBasicBlock* then_block = new (GetAllocator()) HBasicBlock(graph_);
+ HBasicBlock* else_block = new (GetAllocator()) HBasicBlock(graph_);
+
+ graph_->AddBlock(if_block);
+ graph_->AddBlock(then_block);
+ graph_->AddBlock(else_block);
+
+ entry_block_->ReplaceSuccessor(return_block_, if_block);
+
+ if_block->AddSuccessor(then_block);
+ if_block->AddSuccessor(else_block);
+ then_block->AddSuccessor(return_block_);
+ else_block->AddSuccessor(return_block_);
+
+ HParameterValue* bool_param = new (GetAllocator()) HParameterValue(graph_->GetDexFile(),
+ dex::TypeIndex(0),
+ 1,
+ DataType::Type::kBool);
+ entry_block_->AddInstruction(bool_param);
+ HIntConstant* const1 = graph_->GetIntConstant(1);
+
+ if_block->AddInstruction(new (GetAllocator()) HIf(bool_param));
+
+ then_block->AddInstruction(instr);
+ then_block->AddInstruction(new (GetAllocator()) HGoto());
+
+ else_block->AddInstruction(new (GetAllocator()) HGoto());
+
+ HPhi* phi = new (GetAllocator()) HPhi(GetAllocator(), 0, 0, DataType::Type::kInt32);
+ return_block_->AddPhi(phi);
+ phi->AddInput(instr);
+ phi->AddInput(const1);
+ }
+
+ bool CheckGraphAndTrySelectGenerator() {
+ graph_->BuildDominatorTree();
+ EXPECT_TRUE(CheckGraph());
+
+ SideEffectsAnalysis side_effects(graph_);
+ side_effects.Run();
+ return HSelectGenerator(graph_, /*handles*/ nullptr, /*stats*/ nullptr).Run();
+ }
+};
+
+// HDivZeroCheck might throw and should not be hoisted from the conditional to an unconditional.
+TEST_F(SelectGeneratorTest, testZeroCheck) {
+ InitGraph();
+ HDivZeroCheck* instr = new (GetAllocator()) HDivZeroCheck(parameter_, 0);
+ ConstructBasicGraphForSelect(instr);
+
+ ArenaVector<HInstruction*> current_locals({parameter_, graph_->GetIntConstant(1)},
+ GetAllocator()->Adapter(kArenaAllocInstruction));
+ ManuallyBuildEnvFor(instr, &current_locals);
+
+ EXPECT_FALSE(CheckGraphAndTrySelectGenerator());
+}
+
+// Test that SelectGenerator succeeds with HAdd.
+TEST_F(SelectGeneratorTest, testAdd) {
+ InitGraph();
+ HAdd* instr = new (GetAllocator()) HAdd(DataType::Type::kInt32, parameter_, parameter_, 0);
+ ConstructBasicGraphForSelect(instr);
+ EXPECT_TRUE(CheckGraphAndTrySelectGenerator());
+}
+
+} // namespace art
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index dd54468217..dda29a1b4b 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -440,6 +440,62 @@ static bool HasAliasInEnvironments(HInstruction* instruction) {
return false;
}
+void SsaBuilder::ReplaceUninitializedStringPhis() {
+ ScopedArenaHashSet<HInstruction*> seen_instructions(
+ local_allocator_->Adapter(kArenaAllocGraphBuilder));
+ ScopedArenaVector<HInstruction*> worklist(local_allocator_->Adapter(kArenaAllocGraphBuilder));
+
+ // Iterate over all inputs and uses of the phi, recursively, until all related instructions
+ // have been visited.
+ for (const auto& pair : uninitialized_string_phis_) {
+ HPhi* string_phi = pair.first;
+ HInvoke* invoke = pair.second;
+ worklist.push_back(string_phi);
+ HNewInstance* found_instance = nullptr;
+ do {
+ HInstruction* current = worklist.back();
+ worklist.pop_back();
+ if (seen_instructions.find(current) != seen_instructions.end()) {
+ continue;
+ }
+ seen_instructions.insert(current);
+ if (current->IsNewInstance()) {
+ // If it is the first time we see the allocation, replace its uses. We don't register
+ // it through `RemoveRedundantUninitializedStrings`, as that method makes assumption about
+ // aliasing and environment uses that don't hold when the string escapes to phis.
+ // Note that this also means we will keep the (useless) allocation.
+ if (found_instance == nullptr) {
+ found_instance = current->AsNewInstance();
+ } else {
+ DCHECK(found_instance == current);
+ }
+ } else if (current->IsPhi()) {
+ // Push all inputs to the worklist. Those should be Phis or NewInstance.
+ for (HInstruction* input : current->GetInputs()) {
+ DCHECK(input->IsPhi() || input->IsNewInstance()) << input->DebugName();
+ worklist.push_back(input);
+ }
+ } else {
+ // The verifier prevents any other DEX uses of the uninitialized string.
+ DCHECK(current->IsEqual() || current->IsNotEqual());
+ continue;
+ }
+ current->ReplaceUsesDominatedBy(invoke, invoke);
+ current->ReplaceEnvUsesDominatedBy(invoke, invoke);
+ // Push all users to the worklist. Now that we have replaced
+ // the uses dominated by the invokes, the remaining users should only
+ // be Phi, or Equal/NotEqual.
+ for (const HUseListNode<HInstruction*>& use : current->GetUses()) {
+ HInstruction* user = use.GetUser();
+ DCHECK(user->IsPhi() || user->IsEqual() || user->IsNotEqual()) << user->DebugName();
+ worklist.push_back(user);
+ }
+ } while (!worklist.empty());
+ seen_instructions.clear();
+ DCHECK(found_instance != nullptr);
+ }
+}
+
void SsaBuilder::RemoveRedundantUninitializedStrings() {
if (graph_->IsDebuggable()) {
// Do not perform the optimization for consistency with the interpreter
@@ -488,27 +544,32 @@ void SsaBuilder::RemoveRedundantUninitializedStrings() {
GraphAnalysisResult SsaBuilder::BuildSsa() {
DCHECK(!graph_->IsInSsaForm());
- // 1) Propagate types of phis. At this point, phis are typed void in the general
+ // Replace Phis that feed in a String.<init>, as well as their aliases, with
+ // the actual String allocation invocation. We do this first, as the phis stored in
+ // the data structure might get removed from the graph in later stages during `BuildSsa`.
+ ReplaceUninitializedStringPhis();
+
+ // Propagate types of phis. At this point, phis are typed void in the general
// case, or float/double/reference if we created an equivalent phi. So we need
// to propagate the types across phis to give them a correct type. If a type
// conflict is detected in this stage, the phi is marked dead.
RunPrimitiveTypePropagation();
- // 2) Now that the correct primitive types have been assigned, we can get rid
+ // Now that the correct primitive types have been assigned, we can get rid
// of redundant phis. Note that we cannot do this phase before type propagation,
// otherwise we could get rid of phi equivalents, whose presence is a requirement
// for the type propagation phase. Note that this is to satisfy statement (a)
// of the SsaBuilder (see ssa_builder.h).
SsaRedundantPhiElimination(graph_).Run();
- // 3) Fix the type for null constants which are part of an equality comparison.
+ // Fix the type for null constants which are part of an equality comparison.
// We need to do this after redundant phi elimination, to ensure the only cases
// that we can see are reference comparison against 0. The redundant phi
// elimination ensures we do not see a phi taking two 0 constants in a HEqual
// or HNotEqual.
FixNullConstantType();
- // 4) Compute type of reference type instructions. The pass assumes that
+ // Compute type of reference type instructions. The pass assumes that
// NullConstant has been fixed up.
ReferenceTypePropagation(graph_,
class_loader_,
@@ -516,7 +577,7 @@ GraphAnalysisResult SsaBuilder::BuildSsa() {
handles_,
/* is_first_run */ true).Run();
- // 5) HInstructionBuilder duplicated ArrayGet instructions with ambiguous type
+ // HInstructionBuilder duplicated ArrayGet instructions with ambiguous type
// (int/float or long/double) and marked ArraySets with ambiguous input type.
// Now that RTP computed the type of the array input, the ambiguity can be
// resolved and the correct equivalents kept.
@@ -524,13 +585,13 @@ GraphAnalysisResult SsaBuilder::BuildSsa() {
return kAnalysisFailAmbiguousArrayOp;
}
- // 6) Mark dead phis. This will mark phis which are not used by instructions
+ // Mark dead phis. This will mark phis which are not used by instructions
// or other live phis. If compiling as debuggable code, phis will also be kept
// live if they have an environment use.
SsaDeadPhiElimination dead_phi_elimimation(graph_);
dead_phi_elimimation.MarkDeadPhis();
- // 7) Make sure environments use the right phi equivalent: a phi marked dead
+ // Make sure environments use the right phi equivalent: a phi marked dead
// can have a phi equivalent that is not dead. In that case we have to replace
// it with the live equivalent because deoptimization and try/catch rely on
// environments containing values of all live vregs at that point. Note that
@@ -539,14 +600,14 @@ GraphAnalysisResult SsaBuilder::BuildSsa() {
// environments to just reference one.
FixEnvironmentPhis();
- // 8) Now that the right phis are used for the environments, we can eliminate
+ // Now that the right phis are used for the environments, we can eliminate
// phis we do not need. Regardless of the debuggable status, this phase is
/// necessary for statement (b) of the SsaBuilder (see ssa_builder.h), as well
// as for the code generation, which does not deal with phis of conflicting
// input types.
dead_phi_elimimation.EliminateDeadPhis();
- // 9) HInstructionBuidler replaced uses of NewInstances of String with the
+ // HInstructionBuidler replaced uses of NewInstances of String with the
// results of their corresponding StringFactory calls. Unless the String
// objects are used before they are initialized, they can be replaced with
// NullConstant. Note that this optimization is valid only if unsimplified
diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h
index 60831a9e6a..765544508e 100644
--- a/compiler/optimizing/ssa_builder.h
+++ b/compiler/optimizing/ssa_builder.h
@@ -61,7 +61,8 @@ class SsaBuilder : public ValueObject {
local_allocator_(local_allocator),
ambiguous_agets_(local_allocator->Adapter(kArenaAllocGraphBuilder)),
ambiguous_asets_(local_allocator->Adapter(kArenaAllocGraphBuilder)),
- uninitialized_strings_(local_allocator->Adapter(kArenaAllocGraphBuilder)) {
+ uninitialized_strings_(local_allocator->Adapter(kArenaAllocGraphBuilder)),
+ uninitialized_string_phis_(local_allocator->Adapter(kArenaAllocGraphBuilder)) {
graph_->InitializeInexactObjectRTI(handles);
}
@@ -96,6 +97,10 @@ class SsaBuilder : public ValueObject {
}
}
+ void AddUninitializedStringPhi(HPhi* phi, HInvoke* invoke) {
+ uninitialized_string_phis_.push_back(std::make_pair(phi, invoke));
+ }
+
private:
void SetLoopHeaderPhiInputs();
void FixEnvironmentPhis();
@@ -118,6 +123,7 @@ class SsaBuilder : public ValueObject {
HArrayGet* GetFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget);
void RemoveRedundantUninitializedStrings();
+ void ReplaceUninitializedStringPhis();
HGraph* const graph_;
Handle<mirror::ClassLoader> class_loader_;
@@ -131,6 +137,7 @@ class SsaBuilder : public ValueObject {
ScopedArenaVector<HArrayGet*> ambiguous_agets_;
ScopedArenaVector<HArraySet*> ambiguous_asets_;
ScopedArenaVector<HNewInstance*> uninitialized_strings_;
+ ScopedArenaVector<std::pair<HPhi*, HInvoke*>> uninitialized_string_phis_;
DISALLOW_COPY_AND_ASSIGN(SsaBuilder);
};
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index 5d361953ba..3e1a36dc9b 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -151,7 +151,7 @@ void StackMapStream::EndStackMapEntry() {
StackMap stack_map = code_info.GetStackMapAt(stack_map_index);
CHECK_EQ(stack_map.HasDexRegisterMap(), (num_dex_registers != 0));
CHECK_EQ(stack_map.HasInlineInfo(), (inlining_depth != 0));
- CHECK_EQ(code_info.GetInlineDepthOf(stack_map), inlining_depth);
+ CHECK_EQ(code_info.GetInlineInfosOf(stack_map).size(), inlining_depth);
});
}
}
@@ -209,7 +209,7 @@ void StackMapStream::BeginInlineInfoEntry(ArtMethod* method,
size_t depth = current_inline_infos_.size() - 1;
dchecks_.emplace_back([=](const CodeInfo& code_info) {
StackMap stack_map = code_info.GetStackMapAt(stack_map_index);
- InlineInfo inline_info = code_info.GetInlineInfoAtDepth(stack_map, depth);
+ InlineInfo inline_info = code_info.GetInlineInfosOf(stack_map)[depth];
CHECK_EQ(inline_info.GetDexPc(), dex_pc);
bool encode_art_method = EncodeArtMethodInInlineInfo(method);
CHECK_EQ(inline_info.EncodesArtMethod(), encode_art_method);
@@ -275,7 +275,6 @@ void StackMapStream::CreateDexRegisterMap() {
if (kVerifyStackMaps) {
size_t stack_map_index = stack_maps_.size();
- uint32_t depth = current_inline_infos_.size();
// We need to make copy of the current registers for later (when the check is run).
auto expected_dex_registers = std::make_shared<dchecked_vector<DexRegisterLocation>>(
current_dex_registers_.begin(), current_dex_registers_.end());
@@ -285,8 +284,9 @@ void StackMapStream::CreateDexRegisterMap() {
for (DexRegisterLocation reg : code_info.GetDexRegisterMapOf(stack_map)) {
CHECK_EQ((*expected_dex_registers)[expected_reg++], reg);
}
- for (uint32_t d = 0; d < depth; d++) {
- for (DexRegisterLocation reg : code_info.GetDexRegisterMapAtDepth(d, stack_map)) {
+ for (InlineInfo inline_info : code_info.GetInlineInfosOf(stack_map)) {
+ DexRegisterMap map = code_info.GetInlineDexRegisterMapOf(stack_map, inline_info);
+ for (DexRegisterLocation reg : map) {
CHECK_EQ((*expected_dex_registers)[expected_reg++], reg);
}
}
diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc
index 6241e0c25a..9ed90a4839 100644
--- a/compiler/optimizing/stack_map_test.cc
+++ b/compiler/optimizing/stack_map_test.cc
@@ -193,13 +193,12 @@ TEST(StackMapTest, Test2) {
ASSERT_EQ(-2, location1.GetValue());
ASSERT_TRUE(stack_map.HasInlineInfo());
- InlineInfo inline_info0 = code_info.GetInlineInfoAtDepth(stack_map, 0);
- InlineInfo inline_info1 = code_info.GetInlineInfoAtDepth(stack_map, 1);
- ASSERT_EQ(2u, code_info.GetInlineDepthOf(stack_map));
- ASSERT_EQ(3u, inline_info0.GetDexPc());
- ASSERT_EQ(2u, inline_info1.GetDexPc());
- ASSERT_TRUE(inline_info0.EncodesArtMethod());
- ASSERT_TRUE(inline_info1.EncodesArtMethod());
+ auto inline_infos = code_info.GetInlineInfosOf(stack_map);
+ ASSERT_EQ(2u, inline_infos.size());
+ ASSERT_EQ(3u, inline_infos[0].GetDexPc());
+ ASSERT_EQ(2u, inline_infos[1].GetDexPc());
+ ASSERT_TRUE(inline_infos[0].EncodesArtMethod());
+ ASSERT_TRUE(inline_infos[1].EncodesArtMethod());
}
// Second stack map.
@@ -614,19 +613,18 @@ TEST(StackMapTest, InlineTest) {
ASSERT_EQ(0, dex_registers0[0].GetStackOffsetInBytes());
ASSERT_EQ(4, dex_registers0[1].GetConstant());
- InlineInfo if0_0 = ci.GetInlineInfoAtDepth(sm0, 0);
- InlineInfo if0_1 = ci.GetInlineInfoAtDepth(sm0, 1);
- ASSERT_EQ(2u, ci.GetInlineDepthOf(sm0));
- ASSERT_EQ(2u, if0_0.GetDexPc());
- ASSERT_TRUE(if0_0.EncodesArtMethod());
- ASSERT_EQ(3u, if0_1.GetDexPc());
- ASSERT_TRUE(if0_1.EncodesArtMethod());
+ auto inline_infos = ci.GetInlineInfosOf(sm0);
+ ASSERT_EQ(2u, inline_infos.size());
+ ASSERT_EQ(2u, inline_infos[0].GetDexPc());
+ ASSERT_TRUE(inline_infos[0].EncodesArtMethod());
+ ASSERT_EQ(3u, inline_infos[1].GetDexPc());
+ ASSERT_TRUE(inline_infos[1].EncodesArtMethod());
- DexRegisterMap dex_registers1 = ci.GetDexRegisterMapAtDepth(0, sm0);
+ DexRegisterMap dex_registers1 = ci.GetInlineDexRegisterMapOf(sm0, inline_infos[0]);
ASSERT_EQ(1u, dex_registers1.size());
ASSERT_EQ(8, dex_registers1[0].GetStackOffsetInBytes());
- DexRegisterMap dex_registers2 = ci.GetDexRegisterMapAtDepth(1, sm0);
+ DexRegisterMap dex_registers2 = ci.GetInlineDexRegisterMapOf(sm0, inline_infos[1]);
ASSERT_EQ(3u, dex_registers2.size());
ASSERT_EQ(16, dex_registers2[0].GetStackOffsetInBytes());
ASSERT_EQ(20, dex_registers2[1].GetConstant());
@@ -642,22 +640,20 @@ TEST(StackMapTest, InlineTest) {
ASSERT_EQ(56, dex_registers0[0].GetStackOffsetInBytes());
ASSERT_EQ(0, dex_registers0[1].GetConstant());
- InlineInfo if1_0 = ci.GetInlineInfoAtDepth(sm1, 0);
- InlineInfo if1_1 = ci.GetInlineInfoAtDepth(sm1, 1);
- InlineInfo if1_2 = ci.GetInlineInfoAtDepth(sm1, 2);
- ASSERT_EQ(3u, ci.GetInlineDepthOf(sm1));
- ASSERT_EQ(2u, if1_0.GetDexPc());
- ASSERT_TRUE(if1_0.EncodesArtMethod());
- ASSERT_EQ(3u, if1_1.GetDexPc());
- ASSERT_TRUE(if1_1.EncodesArtMethod());
- ASSERT_EQ(5u, if1_2.GetDexPc());
- ASSERT_TRUE(if1_2.EncodesArtMethod());
-
- DexRegisterMap dex_registers1 = ci.GetDexRegisterMapAtDepth(0, sm1);
+ auto inline_infos = ci.GetInlineInfosOf(sm1);
+ ASSERT_EQ(3u, inline_infos.size());
+ ASSERT_EQ(2u, inline_infos[0].GetDexPc());
+ ASSERT_TRUE(inline_infos[0].EncodesArtMethod());
+ ASSERT_EQ(3u, inline_infos[1].GetDexPc());
+ ASSERT_TRUE(inline_infos[1].EncodesArtMethod());
+ ASSERT_EQ(5u, inline_infos[2].GetDexPc());
+ ASSERT_TRUE(inline_infos[2].EncodesArtMethod());
+
+ DexRegisterMap dex_registers1 = ci.GetInlineDexRegisterMapOf(sm1, inline_infos[0]);
ASSERT_EQ(1u, dex_registers1.size());
ASSERT_EQ(12, dex_registers1[0].GetStackOffsetInBytes());
- DexRegisterMap dex_registers2 = ci.GetDexRegisterMapAtDepth(1, sm1);
+ DexRegisterMap dex_registers2 = ci.GetInlineDexRegisterMapOf(sm1, inline_infos[1]);
ASSERT_EQ(3u, dex_registers2.size());
ASSERT_EQ(80, dex_registers2[0].GetStackOffsetInBytes());
ASSERT_EQ(10, dex_registers2[1].GetConstant());
@@ -684,22 +680,20 @@ TEST(StackMapTest, InlineTest) {
ASSERT_EQ(56, dex_registers0[0].GetStackOffsetInBytes());
ASSERT_EQ(0, dex_registers0[1].GetConstant());
- InlineInfo if2_0 = ci.GetInlineInfoAtDepth(sm3, 0);
- InlineInfo if2_1 = ci.GetInlineInfoAtDepth(sm3, 1);
- InlineInfo if2_2 = ci.GetInlineInfoAtDepth(sm3, 2);
- ASSERT_EQ(3u, ci.GetInlineDepthOf(sm3));
- ASSERT_EQ(2u, if2_0.GetDexPc());
- ASSERT_TRUE(if2_0.EncodesArtMethod());
- ASSERT_EQ(5u, if2_1.GetDexPc());
- ASSERT_TRUE(if2_1.EncodesArtMethod());
- ASSERT_EQ(10u, if2_2.GetDexPc());
- ASSERT_TRUE(if2_2.EncodesArtMethod());
-
- DexRegisterMap dex_registers1 = ci.GetDexRegisterMapAtDepth(1, sm3);
+ auto inline_infos = ci.GetInlineInfosOf(sm3);
+ ASSERT_EQ(3u, inline_infos.size());
+ ASSERT_EQ(2u, inline_infos[0].GetDexPc());
+ ASSERT_TRUE(inline_infos[0].EncodesArtMethod());
+ ASSERT_EQ(5u, inline_infos[1].GetDexPc());
+ ASSERT_TRUE(inline_infos[1].EncodesArtMethod());
+ ASSERT_EQ(10u, inline_infos[2].GetDexPc());
+ ASSERT_TRUE(inline_infos[2].EncodesArtMethod());
+
+ DexRegisterMap dex_registers1 = ci.GetInlineDexRegisterMapOf(sm3, inline_infos[1]);
ASSERT_EQ(1u, dex_registers1.size());
ASSERT_EQ(2, dex_registers1[0].GetMachineRegister());
- DexRegisterMap dex_registers2 = ci.GetDexRegisterMapAtDepth(2, sm3);
+ DexRegisterMap dex_registers2 = ci.GetInlineDexRegisterMapOf(sm3, inline_infos[2]);
ASSERT_EQ(2u, dex_registers2.size());
ASSERT_FALSE(dex_registers2[0].IsLive());
ASSERT_EQ(3, dex_registers2[1].GetMachineRegister());
diff --git a/compiler/optimizing/superblock_cloner_test.cc b/compiler/optimizing/superblock_cloner_test.cc
index 6f3bcdac47..31114b6dcc 100644
--- a/compiler/optimizing/superblock_cloner_test.cc
+++ b/compiler/optimizing/superblock_cloner_test.cc
@@ -30,38 +30,8 @@ using HEdgeSet = SuperblockCloner::HEdgeSet;
// This class provides methods and helpers for testing various cloning and copying routines:
// individual instruction cloning and cloning of the more coarse-grain structures.
-class SuperblockClonerTest : public OptimizingUnitTest {
+class SuperblockClonerTest : public ImprovedOptimizingUnitTest {
public:
- SuperblockClonerTest() : graph_(CreateGraph()),
- entry_block_(nullptr),
- return_block_(nullptr),
- exit_block_(nullptr),
- parameter_(nullptr) {}
-
- void InitGraph() {
- entry_block_ = new (GetAllocator()) HBasicBlock(graph_);
- graph_->AddBlock(entry_block_);
- graph_->SetEntryBlock(entry_block_);
-
- return_block_ = new (GetAllocator()) HBasicBlock(graph_);
- graph_->AddBlock(return_block_);
-
- exit_block_ = new (GetAllocator()) HBasicBlock(graph_);
- graph_->AddBlock(exit_block_);
- graph_->SetExitBlock(exit_block_);
-
- entry_block_->AddSuccessor(return_block_);
- return_block_->AddSuccessor(exit_block_);
-
- parameter_ = new (GetAllocator()) HParameterValue(graph_->GetDexFile(),
- dex::TypeIndex(0),
- 0,
- DataType::Type::kInt32);
- entry_block_->AddInstruction(parameter_);
- return_block_->AddInstruction(new (GetAllocator()) HReturnVoid());
- exit_block_->AddInstruction(new (GetAllocator()) HExit());
- }
-
void CreateBasicLoopControlFlow(HBasicBlock* position,
HBasicBlock* successor,
/* out */ HBasicBlock** header_p,
@@ -137,40 +107,6 @@ class SuperblockClonerTest : public OptimizingUnitTest {
null_check->CopyEnvironmentFrom(env);
bounds_check->CopyEnvironmentFrom(env);
}
-
- HEnvironment* ManuallyBuildEnvFor(HInstruction* instruction,
- ArenaVector<HInstruction*>* current_locals) {
- HEnvironment* environment = new (GetAllocator()) HEnvironment(
- (GetAllocator()),
- current_locals->size(),
- graph_->GetArtMethod(),
- instruction->GetDexPc(),
- instruction);
-
- environment->CopyFrom(ArrayRef<HInstruction* const>(*current_locals));
- instruction->SetRawEnvironment(environment);
- return environment;
- }
-
- bool CheckGraph() {
- GraphChecker checker(graph_);
- checker.Run();
- if (!checker.IsValid()) {
- for (const std::string& error : checker.GetErrors()) {
- std::cout << error << std::endl;
- }
- return false;
- }
- return true;
- }
-
- HGraph* graph_;
-
- HBasicBlock* entry_block_;
- HBasicBlock* return_block_;
- HBasicBlock* exit_block_;
-
- HInstruction* parameter_;
};
TEST_F(SuperblockClonerTest, IndividualInstrCloner) {
diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
index 2c428fac7e..c6c764e3a9 100644
--- a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
+++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
@@ -120,11 +120,10 @@ void ArmVIXLJNIMacroAssembler::BuildFrame(size_t frame_size,
// Write out entry spills.
int32_t offset = frame_size + kFramePointerSize;
- for (size_t i = 0; i < entry_spills.size(); ++i) {
- ArmManagedRegister reg = entry_spills.at(i).AsArm();
+ for (const ManagedRegisterSpill& spill : entry_spills) {
+ ArmManagedRegister reg = spill.AsArm();
if (reg.IsNoRegister()) {
// only increment stack offset.
- ManagedRegisterSpill spill = entry_spills.at(i);
offset += spill.getSize();
} else if (reg.IsCoreRegister()) {
asm_.StoreToOffset(kStoreWord, AsVIXLRegister(reg), sp, offset);
diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.cc b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
index a5aa1c12b3..d6ce03387c 100644
--- a/compiler/utils/arm64/jni_macro_assembler_arm64.cc
+++ b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
@@ -719,11 +719,10 @@ void Arm64JNIMacroAssembler::BuildFrame(size_t frame_size,
// Write out entry spills
int32_t offset = frame_size + static_cast<size_t>(kArm64PointerSize);
- for (size_t i = 0; i < entry_spills.size(); ++i) {
- Arm64ManagedRegister reg = entry_spills.at(i).AsArm64();
+ for (const ManagedRegisterSpill& spill : entry_spills) {
+ Arm64ManagedRegister reg = spill.AsArm64();
if (reg.IsNoRegister()) {
// only increment stack offset.
- ManagedRegisterSpill spill = entry_spills.at(i);
offset += spill.getSize();
} else if (reg.IsXRegister()) {
StoreToOffset(reg.AsXRegister(), SP, offset);
diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc
index e76e98a2a3..85e4326494 100644
--- a/compiler/utils/assembler_thumb_test_expected.cc.inc
+++ b/compiler/utils/assembler_thumb_test_expected.cc.inc
@@ -153,7 +153,7 @@ const char* const VixlJniHelpersResults[] = {
" 21c: f8d9 8034 ldr.w r8, [r9, #52] ; 0x34\n",
" 220: 4770 bx lr\n",
" 222: 4660 mov r0, ip\n",
- " 224: f8d9 c2d0 ldr.w ip, [r9, #720] ; 0x2d0\n",
+ " 224: f8d9 c2d4 ldr.w ip, [r9, #724] ; 0x2d4\n",
" 228: 47e0 blx ip\n",
nullptr
};
diff --git a/compiler/utils/managed_register.h b/compiler/utils/managed_register.h
index 2b7b2aa7ce..db9c36cc75 100644
--- a/compiler/utils/managed_register.h
+++ b/compiler/utils/managed_register.h
@@ -101,11 +101,11 @@ class ManagedRegisterSpill : public ManagedRegister {
ManagedRegisterSpill(const ManagedRegister& other, int32_t size)
: ManagedRegister(other), size_(size), spill_offset_(-1) { }
- int32_t getSpillOffset() {
+ int32_t getSpillOffset() const {
return spill_offset_;
}
- int32_t getSize() {
+ int32_t getSize() const {
return size_;
}
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index dce5b95fec..c0b6f988d4 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -4801,10 +4801,9 @@ void MipsAssembler::BuildFrame(size_t frame_size,
// Write out entry spills.
int32_t offset = frame_size + kFramePointerSize;
- for (size_t i = 0; i < entry_spills.size(); ++i) {
- MipsManagedRegister reg = entry_spills.at(i).AsMips();
+ for (const ManagedRegisterSpill& spill : entry_spills) {
+ MipsManagedRegister reg = spill.AsMips();
if (reg.IsNoRegister()) {
- ManagedRegisterSpill spill = entry_spills.at(i);
offset += spill.getSize();
} else if (reg.IsCoreRegister()) {
StoreToOffset(kStoreWord, reg.AsCoreRegister(), SP, offset);
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index bb1bb82fa5..5b1c5d9e01 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -3633,9 +3633,8 @@ void Mips64Assembler::BuildFrame(size_t frame_size,
// Write out entry spills.
int32_t offset = frame_size + kFramePointerSize;
- for (size_t i = 0; i < entry_spills.size(); ++i) {
- Mips64ManagedRegister reg = entry_spills[i].AsMips64();
- ManagedRegisterSpill spill = entry_spills.at(i);
+ for (const ManagedRegisterSpill& spill : entry_spills) {
+ Mips64ManagedRegister reg = spill.AsMips64();
int32_t size = spill.getSize();
if (reg.IsNoRegister()) {
// only increment stack offset.
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 86f9010ea3..c2ce03b1f2 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -525,6 +525,58 @@ void X86Assembler::divss(XmmRegister dst, const Address& src) {
EmitOperand(dst, src);
}
+void X86Assembler::vfmadd231ps(XmmRegister acc, XmmRegister mul_left, XmmRegister mul_right) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t byte_zero = EmitVexByteZero(false /*is_two_byte*/);
+ uint8_t byte_one = EmitVexByte1(false, false, false, 2);
+ uint8_t byte_two = EmitVexByte2(false, 128, X86ManagedRegister::FromXmmRegister(mul_left), 1);
+ EmitUint8(byte_zero);
+ EmitUint8(byte_one);
+ EmitUint8(byte_two);
+ // Opcode field.
+ EmitUint8(0xB8);
+ EmitXmmRegisterOperand(acc, mul_right);
+}
+
+void X86Assembler::vfmsub231ps(XmmRegister acc, XmmRegister mul_left, XmmRegister mul_right) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t byte_zero = EmitVexByteZero(false /*is_two_byte*/);
+ uint8_t byte_one = EmitVexByte1(false, false, false, 2);
+ uint8_t byte_two = EmitVexByte2(false, 128, X86ManagedRegister::FromXmmRegister(mul_left), 1);
+ EmitUint8(byte_zero);
+ EmitUint8(byte_one);
+ EmitUint8(byte_two);
+ // Opcode field.
+ EmitUint8(0xBA);
+ EmitXmmRegisterOperand(acc, mul_right);
+}
+
+void X86Assembler::vfmadd231pd(XmmRegister acc, XmmRegister mul_left, XmmRegister mul_right) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t byte_zero = EmitVexByteZero(false /*is_two_byte*/);
+ uint8_t byte_one = EmitVexByte1(false, false, false, 2);
+ uint8_t byte_two = EmitVexByte2(true, 128, X86ManagedRegister::FromXmmRegister(mul_left), 1);
+ EmitUint8(byte_zero);
+ EmitUint8(byte_one);
+ EmitUint8(byte_two);
+ // Opcode field.
+ EmitUint8(0xB8);
+ EmitXmmRegisterOperand(acc, mul_right);
+}
+
+void X86Assembler::vfmsub231pd(XmmRegister acc, XmmRegister mul_left, XmmRegister mul_right) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t byte_zero = EmitVexByteZero(false /*is_two_byte*/);
+ uint8_t byte_one = EmitVexByte1(false, false, false, 2);
+ uint8_t byte_two = EmitVexByte2(true, 128, X86ManagedRegister::FromXmmRegister(mul_left), 1);
+ EmitUint8(byte_zero);
+ EmitUint8(byte_one);
+ EmitUint8(byte_two);
+ // Opcode field.
+ EmitUint8(0xBA);
+ EmitXmmRegisterOperand(acc, mul_right);
+}
+
void X86Assembler::addps(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
@@ -2898,6 +2950,99 @@ void X86Assembler::EmitLabelLink(NearLabel* label) {
}
+uint8_t X86Assembler::EmitVexByteZero(bool is_two_byte) {
+ uint8_t vex_zero = 0xC0;
+ if (!is_two_byte) {
+ vex_zero |= 0xC4;
+ } else {
+ vex_zero |= 0xC5;
+ }
+ return vex_zero;
+}
+
+uint8_t X86Assembler::EmitVexByte1(bool r, bool x, bool b, int mmmmm ) {
+ // VEX Byte 1.
+ uint8_t vex_prefix = 0;
+ if (!r) {
+ vex_prefix |= 0x80; // VEX.R .
+ }
+ if (!x) {
+ vex_prefix |= 0x40; // VEX.X .
+ }
+ if (!b) {
+ vex_prefix |= 0x20; // VEX.B .
+ }
+
+ // VEX.mmmmm.
+ switch (mmmmm) {
+ case 1:
+ // Implied 0F leading opcode byte.
+ vex_prefix |= 0x01;
+ break;
+ case 2:
+ // Implied leading 0F 38 opcode byte.
+ vex_prefix |= 0x02;
+ break;
+ case 3:
+ // Implied leading OF 3A opcode byte.
+ vex_prefix |= 0x03;
+ break;
+ default:
+ LOG(FATAL) << "unknown opcode bytes";
+ }
+ return vex_prefix;
+}
+
+uint8_t X86Assembler::EmitVexByte2(bool w, int l, X86ManagedRegister operand, int pp) {
+ uint8_t vex_prefix = 0;
+ // VEX Byte 2.
+ if (w) {
+ vex_prefix |= 0x80;
+ }
+
+ // VEX.vvvv.
+ if (operand.IsXmmRegister()) {
+ XmmRegister vvvv = operand.AsXmmRegister();
+ int inverted_reg = 15-static_cast<int>(vvvv);
+ uint8_t reg = static_cast<uint8_t>(inverted_reg);
+ vex_prefix |= ((reg & 0x0F) << 3);
+ } else if (operand.IsCpuRegister()) {
+ Register vvvv = operand.AsCpuRegister();
+ int inverted_reg = 15 - static_cast<int>(vvvv);
+ uint8_t reg = static_cast<uint8_t>(inverted_reg);
+ vex_prefix |= ((reg & 0x0F) << 3);
+ }
+
+ // VEX.L.
+ if (l == 256) {
+ vex_prefix |= 0x04;
+ }
+
+ // VEX.pp.
+ switch (pp) {
+ case 0:
+ // SIMD Pefix - None.
+ vex_prefix |= 0x00;
+ break;
+ case 1:
+ // SIMD Prefix - 66.
+ vex_prefix |= 0x01;
+ break;
+ case 2:
+ // SIMD Prefix - F3.
+ vex_prefix |= 0x02;
+ break;
+ case 3:
+ // SIMD Prefix - F2.
+ vex_prefix |= 0x03;
+ break;
+ default:
+ LOG(FATAL) << "unknown SIMD Prefix";
+ }
+
+ return vex_prefix;
+}
+
void X86Assembler::EmitGenericShift(int reg_or_opcode,
const Operand& operand,
const Immediate& imm) {
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index e42c4c986a..8c9ce82687 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -397,6 +397,12 @@ class X86Assembler FINAL : public Assembler {
void divss(XmmRegister dst, XmmRegister src);
void divss(XmmRegister dst, const Address& src);
+ // FMA Mac Instructions
+ void vfmadd231ps(XmmRegister dst, XmmRegister src1, XmmRegister src2);
+ void vfmadd231pd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
+ void vfmsub231ps(XmmRegister dst, XmmRegister src1, XmmRegister src2);
+ void vfmsub231pd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
+
void addps(XmmRegister dst, XmmRegister src); // no addr variant (for now)
void subps(XmmRegister dst, XmmRegister src);
void mulps(XmmRegister dst, XmmRegister src);
@@ -834,6 +840,11 @@ class X86Assembler FINAL : public Assembler {
void EmitLabelLink(Label* label);
void EmitLabelLink(NearLabel* label);
+ // Emit a 3 byte VEX Prefix
+ uint8_t EmitVexByteZero(bool is_two_byte);
+ uint8_t EmitVexByte1(bool r, bool x, bool b, int mmmmm);
+ uint8_t EmitVexByte2(bool w , int l , X86ManagedRegister vvv, int pp);
+
void EmitGenericShift(int rm, const Operand& operand, const Immediate& imm);
void EmitGenericShift(int rm, const Operand& operand, Register shifter);
diff --git a/compiler/utils/x86/jni_macro_assembler_x86.cc b/compiler/utils/x86/jni_macro_assembler_x86.cc
index 7e29c4aa26..dd99f03aa7 100644
--- a/compiler/utils/x86/jni_macro_assembler_x86.cc
+++ b/compiler/utils/x86/jni_macro_assembler_x86.cc
@@ -67,8 +67,7 @@ void X86JNIMacroAssembler::BuildFrame(size_t frame_size,
cfi().AdjustCFAOffset(kFramePointerSize);
DCHECK_EQ(static_cast<size_t>(cfi().GetCurrentCFAOffset()), frame_size);
- for (size_t i = 0; i < entry_spills.size(); ++i) {
- ManagedRegisterSpill spill = entry_spills.at(i);
+ for (const ManagedRegisterSpill& spill : entry_spills) {
if (spill.AsX86().IsCpuRegister()) {
int offset = frame_size + spill.getSpillOffset();
__ movl(Address(ESP, offset), spill.AsX86().AsCpuRegister());
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index bd31561937..9983eaeeea 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -603,6 +603,56 @@ void X86_64Assembler::divss(XmmRegister dst, const Address& src) {
}
+void X86_64Assembler::vfmadd231ps(XmmRegister acc, XmmRegister mul_left, XmmRegister mul_right) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t byte_zero = EmitVexByteZero(false /*is_two_byte*/);
+ uint8_t byte_one = EmitVexByte1(acc.NeedsRex(), false, mul_right.NeedsRex(), 2);
+ uint8_t byte_two = EmitVexByte2(false, 128, X86_64ManagedRegister::FromXmmRegister(mul_left.AsFloatRegister()), 1);
+ EmitUint8(byte_zero);
+ EmitUint8(byte_one);
+ EmitUint8(byte_two);
+ // Opcode field.
+ EmitUint8(0xB8);
+ EmitXmmRegisterOperand(acc.LowBits(), mul_right);
+}
+
+
+void X86_64Assembler::vfmsub231ps(XmmRegister acc, XmmRegister mul_left, XmmRegister mul_right) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t byte_zero = EmitVexByteZero(false /*is_two_byte*/);
+ uint8_t byte_one = EmitVexByte1(acc.NeedsRex(), false, mul_right.NeedsRex(), 2);
+ uint8_t byte_two = EmitVexByte2(false, 128, X86_64ManagedRegister::FromXmmRegister(mul_left.AsFloatRegister()), 1);
+ EmitUint8(byte_zero);
+ EmitUint8(byte_one);
+ EmitUint8(byte_two);
+ // Opcode field
+ EmitUint8(0xBA);
+ EmitXmmRegisterOperand(acc.LowBits(), mul_right);
+}
+
+void X86_64Assembler::vfmadd231pd(XmmRegister acc, XmmRegister mul_left, XmmRegister mul_right) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t byte_zero = EmitVexByteZero(false /*is_two_byte*/);
+ uint8_t byte_one = EmitVexByte1(acc.NeedsRex(), false, mul_right.NeedsRex(), 2);
+ uint8_t byte_two = EmitVexByte2(true, 128, X86_64ManagedRegister::FromXmmRegister(mul_left.AsFloatRegister()), 1);
+ EmitUint8(byte_zero);
+ EmitUint8(byte_one);
+ EmitUint8(byte_two);
+ EmitUint8(0xB8);
+ EmitXmmRegisterOperand(acc.LowBits(), mul_right);
+}
+
+void X86_64Assembler::vfmsub231pd(XmmRegister acc, XmmRegister mul_left, XmmRegister mul_right) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t byte_zero = EmitVexByteZero(false /*is_two_byte*/);
+ uint8_t byte_one = EmitVexByte1(acc.NeedsRex(), false, mul_right.NeedsRex(), 2);
+ uint8_t byte_two = EmitVexByte2(true, 128, X86_64ManagedRegister::FromXmmRegister(mul_left.AsFloatRegister()), 1);
+ EmitUint8(byte_zero);
+ EmitUint8(byte_one);
+ EmitUint8(byte_two);
+ EmitUint8(0xBA);
+ EmitXmmRegisterOperand(acc.LowBits(), mul_right);
+}
void X86_64Assembler::addps(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalRex32(dst, src);
@@ -3544,6 +3594,98 @@ void X86_64Assembler::EmitLabelLink(NearLabel* label) {
label->LinkTo(position);
}
+uint8_t X86_64Assembler::EmitVexByteZero(bool is_two_byte) {
+ uint8_t vex_zero = 0xC0;
+ if (!is_two_byte) {
+ vex_zero |= 0xC4;
+ } else {
+ vex_zero |= 0xC5;
+ }
+ return vex_zero;
+}
+
+uint8_t X86_64Assembler::EmitVexByte1(bool r, bool x, bool b, int mmmmm) {
+ // VEX Byte 1.
+ uint8_t vex_prefix = 0;
+ if (!r) {
+ vex_prefix |= 0x80; // VEX.R .
+ }
+ if (!x) {
+ vex_prefix |= 0x40; // VEX.X .
+ }
+ if (!b) {
+ vex_prefix |= 0x20; // VEX.B .
+ }
+
+ // VEX.mmmmm.
+ switch (mmmmm) {
+ case 1:
+ // Implied 0F leading opcode byte.
+ vex_prefix |= 0x01;
+ break;
+ case 2:
+ // Implied leading 0F 38 opcode byte.
+ vex_prefix |= 0x02;
+ break;
+ case 3:
+ // Implied leading OF 3A opcode byte.
+ vex_prefix |= 0x03;
+ break;
+ default:
+ LOG(FATAL) << "unknown opcode bytes";
+ }
+
+ return vex_prefix;
+}
+
+uint8_t X86_64Assembler::EmitVexByte2(bool w, int l, X86_64ManagedRegister operand, int pp) {
+ // VEX Byte 2.
+ uint8_t vex_prefix = 0;
+ if (w) {
+ vex_prefix |= 0x80;
+ }
+ // VEX.vvvv.
+ if (operand.IsXmmRegister()) {
+ XmmRegister vvvv = operand.AsXmmRegister();
+ int inverted_reg = 15-static_cast<int>(vvvv.AsFloatRegister());
+ uint8_t reg = static_cast<uint8_t>(inverted_reg);
+ vex_prefix |= ((reg & 0x0F) << 3);
+ } else if (operand.IsCpuRegister()) {
+ CpuRegister vvvv = operand.AsCpuRegister();
+ int inverted_reg = 15 - static_cast<int>(vvvv.AsRegister());
+ uint8_t reg = static_cast<uint8_t>(inverted_reg);
+ vex_prefix |= ((reg & 0x0F) << 3);
+ }
+
+ // VEX.L.
+ if (l == 256) {
+ vex_prefix |= 0x04;
+ }
+
+ // VEX.pp.
+ switch (pp) {
+ case 0:
+ // SIMD Pefix - None.
+ vex_prefix |= 0x00;
+ break;
+ case 1:
+ // SIMD Prefix - 66.
+ vex_prefix |= 0x01;
+ break;
+ case 2:
+ // SIMD Prefix - F3.
+ vex_prefix |= 0x02;
+ break;
+ case 3:
+ // SIMD Prefix - F2.
+ vex_prefix |= 0x03;
+ break;
+ default:
+ LOG(FATAL) << "unknown SIMD Prefix";
+ }
+
+ return vex_prefix;
+}
void X86_64Assembler::EmitGenericShift(bool wide,
int reg_or_opcode,
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index e4d72a7ba2..d5779aa786 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -436,6 +436,16 @@ class X86_64Assembler FINAL : public Assembler {
void divss(XmmRegister dst, XmmRegister src);
void divss(XmmRegister dst, const Address& src);
+ // Mac Instructions
+ // For reference look at the Instruction reference volume 2C.
+ // The below URL is broken down in two lines.
+ // https://www.intel.com/content/www/us/en/architecture-and-technology/
+ // 64-ia-32-architectures-software-developer-vol-2c-manual.html
+ void vfmadd231ps(XmmRegister acc, XmmRegister left, XmmRegister right);
+ void vfmadd231pd(XmmRegister acc, XmmRegister left, XmmRegister right);
+ void vfmsub231ps(XmmRegister acc, XmmRegister left, XmmRegister right);
+ void vfmsub231pd(XmmRegister acc, XmmRegister left, XmmRegister right);
+
void addps(XmmRegister dst, XmmRegister src); // no addr variant (for now)
void subps(XmmRegister dst, XmmRegister src);
void mulps(XmmRegister dst, XmmRegister src);
@@ -921,6 +931,11 @@ class X86_64Assembler FINAL : public Assembler {
void EmitLabelLink(Label* label);
void EmitLabelLink(NearLabel* label);
+ // Emit a 3 byte VEX Prefix.
+ uint8_t EmitVexByteZero(bool is_two_byte);
+ uint8_t EmitVexByte1(bool r, bool x, bool b, int mmmmm);
+ uint8_t EmitVexByte2(bool w , int l , X86_64ManagedRegister operand, int pp);
+
void EmitGenericShift(bool wide, int rm, CpuRegister reg, const Immediate& imm);
void EmitGenericShift(bool wide, int rm, CpuRegister operand, CpuRegister shifter);
diff --git a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
index 9486cb44c5..f6b2f9df34 100644
--- a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
+++ b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
@@ -75,8 +75,7 @@ void X86_64JNIMacroAssembler::BuildFrame(size_t frame_size,
__ movq(Address(CpuRegister(RSP), 0), method_reg.AsX86_64().AsCpuRegister());
- for (size_t i = 0; i < entry_spills.size(); ++i) {
- ManagedRegisterSpill spill = entry_spills.at(i);
+ for (const ManagedRegisterSpill& spill : entry_spills) {
if (spill.AsX86_64().IsCpuRegister()) {
if (spill.getSize() == 8) {
__ movq(Address(CpuRegister(RSP), frame_size + spill.getSpillOffset()),