From fb8d279bc011b31d0765dc7ca59afea324fd0d0c Mon Sep 17 00:00:00 2001 From: Mark Mendell Date: Tue, 31 Mar 2015 22:16:59 -0400 Subject: [optimizing] Implement x86/x86_64 math intrinsics Implement floor/ceil/round/RoundFloat on x86 and x86_64. Implement RoundDouble on x86_64. Add support for roundss and roundsd on both architectures. Support them in the disassembler as well. Add the instruction set features for x86, as the 'round' instruction is only supported if SSE4.1 is supported. Fix the tests to handle the addition of passing the instruction set features to x86 and x86_64. Add assembler tests for roundsd and roundss to x86_64 assembler tests. Change-Id: I9742d5930befb0bbc23f3d6c83ce0183ed9fe04f Signed-off-by: Mark Mendell --- compiler/optimizing/code_generator.cc | 8 +- compiler/optimizing/code_generator_x86.cc | 9 +- compiler/optimizing/code_generator_x86.h | 9 +- compiler/optimizing/code_generator_x86_64.cc | 11 +- compiler/optimizing/code_generator_x86_64.h | 9 +- compiler/optimizing/codegen_test.cc | 18 +- compiler/optimizing/constant_folding_test.cc | 5 +- compiler/optimizing/dead_code_elimination_test.cc | 5 +- compiler/optimizing/intrinsics_x86.cc | 152 +++++++++++++++- compiler/optimizing/intrinsics_x86.h | 3 +- compiler/optimizing/intrinsics_x86_64.cc | 208 +++++++++++++++++++++- compiler/optimizing/intrinsics_x86_64.h | 3 +- compiler/optimizing/linearize_test.cc | 5 +- compiler/optimizing/live_ranges_test.cc | 25 ++- compiler/optimizing/liveness_test.cc | 5 +- compiler/optimizing/register_allocator_test.cc | 65 +++++-- compiler/utils/assembler_test.h | 61 +++++++ compiler/utils/x86/assembler_x86.cc | 22 +++ compiler/utils/x86/assembler_x86.h | 3 + compiler/utils/x86_64/assembler_x86_64.cc | 24 +++ compiler/utils/x86_64/assembler_x86_64.h | 3 + compiler/utils/x86_64/assembler_x86_64_test.cc | 8 + 22 files changed, 609 insertions(+), 52 deletions(-) (limited to 'compiler') diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index bd6e943bf0..9b1ef17274 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -378,10 +378,14 @@ CodeGenerator* CodeGenerator::Create(HGraph* graph, case kMips: return nullptr; case kX86: { - return new x86::CodeGeneratorX86(graph, compiler_options); + return new x86::CodeGeneratorX86(graph, + *isa_features.AsX86InstructionSetFeatures(), + compiler_options); } case kX86_64: { - return new x86_64::CodeGeneratorX86_64(graph, compiler_options); + return new x86_64::CodeGeneratorX86_64(graph, + *isa_features.AsX86_64InstructionSetFeatures(), + compiler_options); } default: return nullptr; diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 0d5fe49c1d..224be0f25b 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -339,7 +339,9 @@ size_t CodeGeneratorX86::RestoreFloatingPointRegister(size_t stack_index, uint32 return GetFloatingPointSpillSlotSize(); } -CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, const CompilerOptions& compiler_options) +CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, + const X86InstructionSetFeatures& isa_features, + const CompilerOptions& compiler_options) : CodeGenerator(graph, kNumberOfCpuRegisters, kNumberOfXmmRegisters, @@ -352,7 +354,8 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, const CompilerOptions& compile block_labels_(graph->GetArena(), 0), location_builder_(graph, this), instruction_visitor_(graph, this), - move_resolver_(graph->GetArena(), this) { + move_resolver_(graph->GetArena(), this), + isa_features_(isa_features) { // Use a fake return address register to mimic Quick. AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister)); } @@ -1110,7 +1113,7 @@ void InstructionCodeGeneratorX86::VisitReturn(HReturn* ret) { } void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { - IntrinsicLocationsBuilderX86 intrinsic(GetGraph()->GetArena()); + IntrinsicLocationsBuilderX86 intrinsic(codegen_); if (intrinsic.TryDispatch(invoke)) { return; } diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 6a4d42dd01..7cdbd628cb 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -185,7 +185,9 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { class CodeGeneratorX86 : public CodeGenerator { public: - CodeGeneratorX86(HGraph* graph, const CompilerOptions& compiler_options); + CodeGeneratorX86(HGraph* graph, + const X86InstructionSetFeatures& isa_features, + const CompilerOptions& compiler_options); virtual ~CodeGeneratorX86() {} void GenerateFrameEntry() OVERRIDE; @@ -271,6 +273,10 @@ class CodeGeneratorX86 : public CodeGenerator { Label* GetFrameEntryLabel() { return &frame_entry_label_; } + const X86InstructionSetFeatures& GetInstructionSetFeatures() const { + return isa_features_; + } + private: // Labels for each block that will be compiled. GrowableArray