diff options
Diffstat (limited to 'compiler')
20 files changed, 454 insertions, 15 deletions
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc index 586891a3ff..fc8cd528fa 100644 --- a/compiler/common_compiler_test.cc +++ b/compiler/common_compiler_test.cc @@ -328,6 +328,8 @@ void CommonCompilerTest::ReserveImageSpace() { (size_t)120 * 1024 * 1024, // 120MB PROT_NONE, false /* no need for 4gb flag with fixed mmap */, + /*reuse=*/ false, + /*reservation=*/ nullptr, &error_msg); CHECK(image_reservation_.IsValid()) << error_msg; } diff --git a/compiler/debug/elf_debug_info_writer.h b/compiler/debug/elf_debug_info_writer.h index fe05992960..bb550b3060 100644 --- a/compiler/debug/elf_debug_info_writer.h +++ b/compiler/debug/elf_debug_info_writer.h @@ -372,10 +372,10 @@ class ElfCompilationUnitWriter { } // Base class. - mirror::Class* base_class = type->GetSuperClass(); + ObjPtr<mirror::Class> base_class = type->GetSuperClass(); if (base_class != nullptr) { info_.StartTag(DW_TAG_inheritance); - base_class_references.emplace(info_.size(), base_class); + base_class_references.emplace(info_.size(), base_class.Ptr()); info_.WriteRef4(DW_AT_type, 0); info_.WriteUdata(DW_AT_data_member_location, 0); info_.WriteSdata(DW_AT_accessibility, DW_ACCESS_public); diff --git a/compiler/dex/verification_results.cc b/compiler/dex/verification_results.cc index 1e0b94de81..dd947d90b7 100644 --- a/compiler/dex/verification_results.cc +++ b/compiler/dex/verification_results.cc @@ -79,7 +79,7 @@ void VerificationResults::ProcessVerifiedMethod(verifier::MethodVerifier* method if (inserted) { // Successfully added, release the unique_ptr since we no longer have ownership. DCHECK_EQ(GetVerifiedMethod(ref), verified_method.get()); - verified_method.release(); + verified_method.release(); // NOLINT b/117926937 } else { // TODO: Investigate why are we doing the work again for this method and try to avoid it. LOG(WARNING) << "Method processed more than once: " << ref.PrettyMethod(); @@ -117,7 +117,7 @@ void VerificationResults::CreateVerifiedMethodFor(MethodReference ref) { /*expected*/ nullptr, verified_method.get()) == AtomicMap::InsertResult::kInsertResultSuccess) { - verified_method.release(); + verified_method.release(); // NOLINT b/117926937 } } diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index c5416d5a3d..df6e8a83e1 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -708,25 +708,46 @@ void CompilerDriver::Resolve(jobject class_loader, } } -static void ResolveConstStrings(CompilerDriver* driver, - const std::vector<const DexFile*>& dex_files, - TimingLogger* timings) { +void CompilerDriver::ResolveConstStrings(const std::vector<const DexFile*>& dex_files, + bool only_startup_strings, + TimingLogger* timings) { ScopedObjectAccess soa(Thread::Current()); StackHandleScope<1> hs(soa.Self()); ClassLinker* const class_linker = Runtime::Current()->GetClassLinker(); MutableHandle<mirror::DexCache> dex_cache(hs.NewHandle<mirror::DexCache>(nullptr)); + size_t num_instructions = 0u; for (const DexFile* dex_file : dex_files) { dex_cache.Assign(class_linker->FindDexCache(soa.Self(), *dex_file)); + if (only_startup_strings) { + // When resolving startup strings, create the preresolved strings array. + dex_cache->AddPreResolvedStringsArray(); + } TimingLogger::ScopedTiming t("Resolve const-string Strings", timings); for (ClassAccessor accessor : dex_file->GetClasses()) { - if (!driver->IsClassToCompile(accessor.GetDescriptor())) { + if (!IsClassToCompile(accessor.GetDescriptor())) { // Compilation is skipped, do not resolve const-string in code of this class. // FIXME: Make sure that inlining honors this. b/26687569 continue; } + + const bool is_startup_class = + profile_compilation_info_ != nullptr && + profile_compilation_info_->ContainsClass(*dex_file, accessor.GetClassIdx()); + for (const ClassAccessor::Method& method : accessor.GetMethods()) { + const bool is_clinit = (method.GetAccessFlags() & kAccConstructor) != 0 && + (method.GetAccessFlags() & kAccStatic) != 0; + const bool is_startup_clinit = is_startup_class && is_clinit; + + if (only_startup_strings && + profile_compilation_info_ != nullptr && + (!profile_compilation_info_->GetMethodHotness(method.GetReference()).IsStartup() && + !is_startup_clinit)) { + continue; + } + // Resolve const-strings in the code. Done to have deterministic allocation behavior. Right // now this is single-threaded for simplicity. // TODO: Collect the relevant string indices in parallel, then allocate them sequentially @@ -740,6 +761,11 @@ static void ResolveConstStrings(CompilerDriver* driver, : inst->VRegB_31c()); ObjPtr<mirror::String> string = class_linker->ResolveString(string_index, dex_cache); CHECK(string != nullptr) << "Could not allocate a string when forcing determinism"; + if (only_startup_strings) { + dex_cache->GetPreResolvedStrings()[string_index.index_] = + GcRoot<mirror::String>(string); + } + ++num_instructions; break; } @@ -750,6 +776,7 @@ static void ResolveConstStrings(CompilerDriver* driver, } } } + VLOG(compiler) << "Resolved " << num_instructions << " const string instructions"; } // Initialize type check bit strings for check-cast and instance-of in the code. Done to have @@ -897,8 +924,10 @@ void CompilerDriver::PreCompile(jobject class_loader, if (GetCompilerOptions().IsForceDeterminism() && GetCompilerOptions().IsBootImage()) { // Resolve strings from const-string. Do this now to have a deterministic image. - ResolveConstStrings(this, dex_files, timings); + ResolveConstStrings(dex_files, /*only_startup_strings=*/ false, timings); VLOG(compiler) << "Resolve const-strings: " << GetMemoryUsageString(false); + } else if (GetCompilerOptions().ResolveStartupConstStrings()) { + ResolveConstStrings(dex_files, /*only_startup_strings=*/ true, timings); } Verify(class_loader, dex_files, timings); @@ -1146,7 +1175,7 @@ static void MaybeAddToImageClasses(Thread* self, if (klass->IsArrayClass()) { MaybeAddToImageClasses(self, klass->GetComponentType(), image_classes); } - klass.Assign(klass->GetSuperClass()); + klass = klass->GetSuperClass(); } } diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h index 343f67c6d5..9a83e55c96 100644 --- a/compiler/driver/compiler_driver.h +++ b/compiler/driver/compiler_driver.h @@ -430,6 +430,12 @@ class CompilerDriver { typedef AtomicDexRefMap<MethodReference, CompiledMethod*> MethodTable; private: + // Resolve const string literals that are loaded from dex code. If only_startup_strings is + // specified, only methods that are marked startup in the profile are resolved. + void ResolveConstStrings(const std::vector<const DexFile*>& dex_files, + bool only_startup_strings, + /*inout*/ TimingLogger* timings); + // All method references that this compiler has compiled. MethodTable compiled_methods_; diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc index 3ab9afc5d6..6b0e45629b 100644 --- a/compiler/driver/compiler_options.cc +++ b/compiler/driver/compiler_options.cc @@ -69,6 +69,7 @@ CompilerOptions::CompilerOptions() force_determinism_(false), deduplicate_code_(true), count_hotness_in_compiled_code_(false), + resolve_startup_const_strings_(false), register_allocation_strategy_(RegisterAllocator::kRegisterAllocatorDefault), passes_to_run_(nullptr) { } diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h index e9cbf74428..4a6bbfaae6 100644 --- a/compiler/driver/compiler_options.h +++ b/compiler/driver/compiler_options.h @@ -313,6 +313,10 @@ class CompilerOptions final { return count_hotness_in_compiled_code_; } + bool ResolveStartupConstStrings() const { + return resolve_startup_const_strings_; + } + private: bool ParseDumpInitFailures(const std::string& option, std::string* error_msg); void ParseDumpCfgPasses(const StringPiece& option, UsageFn Usage); @@ -392,6 +396,10 @@ class CompilerOptions final { // won't be atomic for performance reasons, so we accept races, just like in interpreter. bool count_hotness_in_compiled_code_; + // Whether we eagerly resolve all of the const strings that are loaded from startup methods in the + // profile. + bool resolve_startup_const_strings_; + RegisterAllocator::Strategy register_allocation_strategy_; // If not null, specifies optimization passes which will be run instead of defaults. diff --git a/compiler/driver/compiler_options_map-inl.h b/compiler/driver/compiler_options_map-inl.h index d4a582fb35..5a844959c4 100644 --- a/compiler/driver/compiler_options_map-inl.h +++ b/compiler/driver/compiler_options_map-inl.h @@ -80,6 +80,7 @@ inline bool ReadCompilerOptions(Base& map, CompilerOptions* options, std::string if (map.Exists(Base::CountHotnessInCompiledCode)) { options->count_hotness_in_compiled_code_ = true; } + map.AssignIfExists(Base::ResolveStartupConstStrings, &options->resolve_startup_const_strings_); if (map.Exists(Base::DumpTimings)) { options->dump_timings_ = true; @@ -184,6 +185,11 @@ inline void AddCompilerOptionsArgumentParserOptions(Builder& b) { .template WithType<std::string>() .IntoKey(Map::RegisterAllocationStrategy) + .Define("--resolve-startup-const-strings=_") + .template WithType<bool>() + .WithValueMap({{"false", false}, {"true", true}}) + .IntoKey(Map::ResolveStartupConstStrings) + .Define("--verbose-methods=_") .template WithType<ParseStringList<','>>() .IntoKey(Map::VerboseMethods); diff --git a/compiler/driver/compiler_options_map.def b/compiler/driver/compiler_options_map.def index 238cd465df..1ec34ec73a 100644 --- a/compiler/driver/compiler_options_map.def +++ b/compiler/driver/compiler_options_map.def @@ -52,13 +52,14 @@ COMPILER_OPTIONS_KEY (Unit, Baseline) COMPILER_OPTIONS_KEY (double, TopKProfileThreshold) COMPILER_OPTIONS_KEY (bool, AbortOnHardVerifierFailure) COMPILER_OPTIONS_KEY (bool, AbortOnSoftVerifierFailure) +COMPILER_OPTIONS_KEY (bool, ResolveStartupConstStrings, kIsDebugBuild) COMPILER_OPTIONS_KEY (std::string, DumpInitFailures) COMPILER_OPTIONS_KEY (std::string, DumpCFG) COMPILER_OPTIONS_KEY (Unit, DumpCFGAppend) // TODO: Add type parser. COMPILER_OPTIONS_KEY (std::string, RegisterAllocationStrategy) COMPILER_OPTIONS_KEY (ParseStringList<','>, VerboseMethods) -COMPILER_OPTIONS_KEY (bool, DeduplicateCode, true) +COMPILER_OPTIONS_KEY (bool, DeduplicateCode, true) COMPILER_OPTIONS_KEY (Unit, CountHotnessInCompiledCode) COMPILER_OPTIONS_KEY (Unit, DumpTimings) COMPILER_OPTIONS_KEY (Unit, DumpPassTimings) diff --git a/compiler/driver/simple_compiler_options_map.h b/compiler/driver/simple_compiler_options_map.h index 3860da9f66..e7a51a4995 100644 --- a/compiler/driver/simple_compiler_options_map.h +++ b/compiler/driver/simple_compiler_options_map.h @@ -50,7 +50,7 @@ using Parser = CmdlineParser<SimpleParseArgumentMap, SimpleParseArgumentMapKey>; static inline Parser CreateSimpleParser(bool ignore_unrecognized) { std::unique_ptr<Parser::Builder> parser_builder = - std::unique_ptr<Parser::Builder>(new Parser::Builder()); + std::make_unique<Parser::Builder>(); AddCompilerOptionsArgumentParserOptions<SimpleParseArgumentMap>(*parser_builder); diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 3f4fb156b4..a9acf90762 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -1205,6 +1205,7 @@ void CodeGeneratorARM64::SetupBlockedRegisters() const { // mr : Runtime reserved. // ip1 : VIXL core temp. // ip0 : VIXL core temp. + // x18 : Platform register. // // Blocked fp registers: // d31 : VIXL fp temp. @@ -1213,6 +1214,7 @@ void CodeGeneratorARM64::SetupBlockedRegisters() const { while (!reserved_core_registers.IsEmpty()) { blocked_core_registers_[reserved_core_registers.PopLowestIndex().GetCode()] = true; } + blocked_core_registers_[X18] = true; CPURegList reserved_fp_registers = vixl_reserved_fp_registers; while (!reserved_fp_registers.IsEmpty()) { diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 7f94a298eb..dd781c288f 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -36,6 +36,8 @@ #include "jit/jit_code_cache.h" #include "mirror/class_loader.h" #include "mirror/dex_cache.h" +#include "mirror/object_array-alloc-inl.h" +#include "mirror/object_array-inl.h" #include "nodes.h" #include "optimizing_compiler.h" #include "reference_type_propagation.h" diff --git a/compiler/optimizing/intrinsic_objects.cc b/compiler/optimizing/intrinsic_objects.cc index 3c20ad698b..0374b4e332 100644 --- a/compiler/optimizing/intrinsic_objects.cc +++ b/compiler/optimizing/intrinsic_objects.cc @@ -21,6 +21,7 @@ #include "class_root.h" #include "handle.h" #include "obj_ptr-inl.h" +#include "mirror/object_array-alloc-inl.h" #include "mirror/object_array-inl.h" namespace art { diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc index 85e4326494..0d279ede19 100644 --- a/compiler/utils/assembler_thumb_test_expected.cc.inc +++ b/compiler/utils/assembler_thumb_test_expected.cc.inc @@ -76,7 +76,7 @@ const char* const VixlJniHelpersResults[] = { " f0: f1bc 0f00 cmp.w ip, #0\n", " f4: bf18 it ne\n", " f6: f20d 4c01 addwne ip, sp, #1025 ; 0x401\n", - " fa: f8d9 c08c ldr.w ip, [r9, #140] ; 0x8c\n", + " fa: f8d9 c094 ldr.w ip, [r9, #148] ; 0x94\n", " fe: f1bc 0f00 cmp.w ip, #0\n", " 102: d171 bne.n 1e8 <VixlJniHelpers+0x1e8>\n", " 104: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", @@ -153,7 +153,7 @@ const char* const VixlJniHelpersResults[] = { " 21c: f8d9 8034 ldr.w r8, [r9, #52] ; 0x34\n", " 220: 4770 bx lr\n", " 222: 4660 mov r0, ip\n", - " 224: f8d9 c2d4 ldr.w ip, [r9, #724] ; 0x2d4\n", + " 224: f8d9 c2dc ldr.w ip, [r9, #732] ; 0x2dc\n", " 228: 47e0 blx ip\n", nullptr }; diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index 86f9010ea3..2d1e451232 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -59,6 +59,98 @@ std::ostream& operator<<(std::ostream& os, const Address& addr) { } } +uint8_t X86Assembler::EmitVexByteZero(bool is_two_byte) { + uint8_t vex_zero = 0xC0; + if (!is_two_byte) { + vex_zero |= 0xC4; + } else { + vex_zero |= 0xC5; + } + return vex_zero; +} + +uint8_t X86Assembler::EmitVexByte1(bool r, bool x, bool b, int mmmmm ) { + // VEX Byte 1 + uint8_t vex_prefix = 0; + if (!r) { + vex_prefix |= 0x80; // VEX.R + } + if (!x) { + vex_prefix |= 0x40; // VEX.X + } + if (!b) { + vex_prefix |= 0x20; // VEX.B + } + + // VEX.mmmmm + switch (mmmmm) { + case 1: + // implied 0F leading opcode byte + vex_prefix |= 0x01; + break; + case 2: + // implied leading 0F 38 opcode byte + vex_prefix |= 0x02; + break; + case 3: + // implied leading OF 3A opcode byte + vex_prefix |= 0x03; + break; + default: + LOG(FATAL) << "unknown opcode bytes"; + } + return vex_prefix; +} + +uint8_t X86Assembler::EmitVexByte2(bool w, int l, X86ManagedRegister operand, int pp) { + uint8_t vex_prefix = 0; + // VEX Byte 2 + if (w) { + vex_prefix |= 0x80; + } + // VEX.vvvv + if (operand.IsXmmRegister()) { + XmmRegister vvvv = operand.AsXmmRegister(); + int inverted_reg = 15-static_cast<int>(vvvv); + uint8_t reg = static_cast<uint8_t>(inverted_reg); + vex_prefix |= ((reg & 0x0F) << 3); + } else if (operand.IsCpuRegister()) { + Register vvvv = operand.AsCpuRegister(); + int inverted_reg = 15 - static_cast<int>(vvvv); + uint8_t reg = static_cast<uint8_t>(inverted_reg); + vex_prefix |= ((reg & 0x0F) << 3); + } + + // VEX.L + if (l == 256) { + vex_prefix |= 0x04; + } + + // VEX.pp + switch (pp) { + case 0: + // SIMD Pefix - None + vex_prefix |= 0x00; + break; + case 1: + // SIMD Prefix - 66 + vex_prefix |= 0x01; + break; + case 2: + // SIMD Prefix - F3 + vex_prefix |= 0x02; + break; + case 3: + // SIMD Prefix - F2 + vex_prefix |= 0x03; + break; + default: + LOG(FATAL) << "unknown SIMD Prefix"; + } + + return vex_prefix; +} + void X86Assembler::call(Register reg) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xFF); @@ -179,6 +271,60 @@ void X86Assembler::movntl(const Address& dst, Register src) { EmitOperand(src, dst); } +void X86Assembler::blsi(Register dst, Register src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + uint8_t byte_zero = EmitVexByteZero(/*is_two_byte=*/ false); + uint8_t byte_one = EmitVexByte1(/*r=*/ false, + /*x=*/ false, + /*b=*/ false, + /*mmmmm=*/ 2); + uint8_t byte_two = EmitVexByte2(/*w=*/ false, + /*l=*/ 128, + X86ManagedRegister::FromCpuRegister(dst), + /*pp=*/ 0); + EmitUint8(byte_zero); + EmitUint8(byte_one); + EmitUint8(byte_two); + EmitUint8(0xF3); + EmitRegisterOperand(3, src); +} + +void X86Assembler::blsmsk(Register dst, Register src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + uint8_t byte_zero = EmitVexByteZero(/*is_two_byte=*/ false); + uint8_t byte_one = EmitVexByte1(/*r=*/ false, + /*x=*/ false, + /*b=*/ false, + /*mmmmm=*/ 2); + uint8_t byte_two = EmitVexByte2(/*w=*/ false, + /*l=*/ 128, + X86ManagedRegister::FromCpuRegister(dst), + /*pp=*/ 0); + EmitUint8(byte_zero); + EmitUint8(byte_one); + EmitUint8(byte_two); + EmitUint8(0xF3); + EmitRegisterOperand(2, src); +} + +void X86Assembler::blsr(Register dst, Register src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + uint8_t byte_zero = EmitVexByteZero(/*is_two_byte=*/ false); + uint8_t byte_one = EmitVexByte1(/*r=*/ false, + /*x=*/ false, + /*b=*/ false, + /*mmmmm=*/ 2); + uint8_t byte_two = EmitVexByte2(/*w=*/ false, + /*l=*/ 128, + X86ManagedRegister::FromCpuRegister(dst), + /*pp=*/ 0); + EmitUint8(byte_zero); + EmitUint8(byte_one); + EmitUint8(byte_two); + EmitUint8(0xF3); + EmitRegisterOperand(1, src); +} + void X86Assembler::bswapl(Register dst) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x0F); @@ -1267,6 +1413,25 @@ void X86Assembler::pand(XmmRegister dst, XmmRegister src) { EmitXmmRegisterOperand(dst, src); } +void X86Assembler::andn(Register dst, Register src1, Register src2) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + uint8_t byte_zero = EmitVexByteZero(/*is_two_byte=*/ false); + uint8_t byte_one = EmitVexByte1(/*r=*/ false, + /*x=*/ false, + /*b=*/ false, + /*mmmmm=*/ 2); + uint8_t byte_two = EmitVexByte2(/*w=*/ false, + /*l=*/ 128, + X86ManagedRegister::FromCpuRegister(src1), + /*pp=*/ 0); + EmitUint8(byte_zero); + EmitUint8(byte_one); + EmitUint8(byte_two); + // Opcode field + EmitUint8(0xF2); + EmitRegisterOperand(dst, src2); +} + void X86Assembler::andnpd(XmmRegister dst, XmmRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h index 5ac9236d6b..275e5c1234 100644 --- a/compiler/utils/x86/assembler_x86.h +++ b/compiler/utils/x86/assembler_x86.h @@ -337,6 +337,10 @@ class X86Assembler final : public Assembler { void movntl(const Address& dst, Register src); + void blsi(Register dst, Register src); // no addr variant (for now) + void blsmsk(Register dst, Register src); // no addr variant (for now) + void blsr(Register dst, Register src); // no addr varianr (for now) + void bswapl(Register dst); void bsfl(Register dst, Register src); @@ -500,6 +504,7 @@ class X86Assembler final : public Assembler { void andps(XmmRegister dst, const Address& src); void pand(XmmRegister dst, XmmRegister src); // no addr variant (for now) + void andn(Register dst, Register src1, Register src2); // no addr variant (for now) void andnpd(XmmRegister dst, XmmRegister src); // no addr variant (for now) void andnps(XmmRegister dst, XmmRegister src); void pandn(XmmRegister dst, XmmRegister src); @@ -837,6 +842,11 @@ class X86Assembler final : public Assembler { void EmitGenericShift(int rm, const Operand& operand, const Immediate& imm); void EmitGenericShift(int rm, const Operand& operand, Register shifter); + // Emit a 3 byte VEX Prefix + uint8_t EmitVexByteZero(bool is_two_byte); + uint8_t EmitVexByte1(bool r, bool x, bool b, int mmmmm); + uint8_t EmitVexByte2(bool w , int l , X86ManagedRegister operand, int pp); + ConstantArea constant_area_; DISALLOW_COPY_AND_ASSIGN(X86Assembler); diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc index ad75174d23..1d8bfe7fa7 100644 --- a/compiler/utils/x86/assembler_x86_test.cc +++ b/compiler/utils/x86/assembler_x86_test.cc @@ -349,6 +349,18 @@ TEST_F(AssemblerX86Test, RepMovsw) { DriverStr(expected, "rep_movsw"); } +TEST_F(AssemblerX86Test, Blsmask) { + DriverStr(RepeatRR(&x86::X86Assembler::blsmsk, "blsmsk %{reg2}, %{reg1}"), "blsmsk"); +} + +TEST_F(AssemblerX86Test, Blsi) { + DriverStr(RepeatRR(&x86::X86Assembler::blsi, "blsi %{reg2}, %{reg1}"), "blsi"); +} + +TEST_F(AssemblerX86Test, Blsr) { + DriverStr(RepeatRR(&x86::X86Assembler::blsr, "blsr %{reg2}, %{reg1}"), "blsr"); +} + TEST_F(AssemblerX86Test, Bsfl) { DriverStr(RepeatRR(&x86::X86Assembler::bsfl, "bsfl %{reg2}, %{reg1}"), "bsfl"); } @@ -657,6 +669,10 @@ TEST_F(AssemblerX86Test, PAnd) { DriverStr(RepeatFF(&x86::X86Assembler::pand, "pand %{reg2}, %{reg1}"), "pand"); } +TEST_F(AssemblerX86Test, Andn) { + DriverStr(RepeatRRR(&x86::X86Assembler::andn, "andn %{reg3}, %{reg2}, %{reg1}"), "andn"); +} + TEST_F(AssemblerX86Test, AndnPD) { DriverStr(RepeatFF(&x86::X86Assembler::andnpd, "andnpd %{reg2}, %{reg1}"), "andnpd"); } diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index bd31561937..ae68fe934e 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -64,6 +64,99 @@ std::ostream& operator<<(std::ostream& os, const Address& addr) { } } +uint8_t X86_64Assembler::EmitVexByteZero(bool is_two_byte) { + uint8_t vex_zero = 0xC0; + if (!is_two_byte) { + vex_zero |= 0xC4; + } else { + vex_zero |= 0xC5; + } + return vex_zero; +} + +uint8_t X86_64Assembler::EmitVexByte1(bool r, bool x, bool b, int mmmmm) { + // VEX Byte 1 + uint8_t vex_prefix = 0; + if (!r) { + vex_prefix |= 0x80; // VEX.R + } + if (!x) { + vex_prefix |= 0x40; // VEX.X + } + if (!b) { + vex_prefix |= 0x20; // VEX.B + } + + // VEX.mmmmm + switch (mmmmm) { + case 1: + // implied 0F leading opcode byte + vex_prefix |= 0x01; + break; + case 2: + // implied leading 0F 38 opcode byte + vex_prefix |= 0x02; + break; + case 3: + // implied leading OF 3A opcode byte + vex_prefix |= 0x03; + break; + default: + LOG(FATAL) << "unknown opcode bytes"; + } + + return vex_prefix; +} + +uint8_t X86_64Assembler::EmitVexByte2(bool w, int l, X86_64ManagedRegister operand, int pp) { + // VEX Byte 2 + uint8_t vex_prefix = 0; + if (w) { + vex_prefix |= 0x80; + } + // VEX.vvvv + if (operand.IsXmmRegister()) { + XmmRegister vvvv = operand.AsXmmRegister(); + int inverted_reg = 15-static_cast<int>(vvvv.AsFloatRegister()); + uint8_t reg = static_cast<uint8_t>(inverted_reg); + vex_prefix |= ((reg & 0x0F) << 3); + } else if (operand.IsCpuRegister()) { + CpuRegister vvvv = operand.AsCpuRegister(); + int inverted_reg = 15 - static_cast<int>(vvvv.AsRegister()); + uint8_t reg = static_cast<uint8_t>(inverted_reg); + vex_prefix |= ((reg & 0x0F) << 3); + } + + // VEX.L + if (l == 256) { + vex_prefix |= 0x04; + } + + // VEX.pp + switch (pp) { + case 0: + // SIMD Pefix - None + vex_prefix |= 0x00; + break; + case 1: + // SIMD Prefix - 66 + vex_prefix |= 0x01; + break; + case 2: + // SIMD Prefix - F3 + vex_prefix |= 0x02; + break; + case 3: + // SIMD Prefix - F2 + vex_prefix |= 0x03; + break; + default: + LOG(FATAL) << "unknown SIMD Prefix"; + } + + return vex_prefix; +} + void X86_64Assembler::call(CpuRegister reg) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitOptionalRex32(reg); @@ -1483,6 +1576,25 @@ void X86_64Assembler::pand(XmmRegister dst, XmmRegister src) { EmitXmmRegisterOperand(dst.LowBits(), src); } +void X86_64Assembler::andn(CpuRegister dst, CpuRegister src1, CpuRegister src2) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + uint8_t byte_zero = EmitVexByteZero(/*is_two_byte=*/ false); + uint8_t byte_one = EmitVexByte1(dst.NeedsRex(), + /*x=*/ false, + src2.NeedsRex(), + /*mmmmm=*/ 2); + uint8_t byte_two = EmitVexByte2(/*w=*/ true, + /*l=*/ 128, + X86_64ManagedRegister::FromCpuRegister(src1.AsRegister()), + /*pp=*/ 0); + EmitUint8(byte_zero); + EmitUint8(byte_one); + EmitUint8(byte_two); + // Opcode field + EmitUint8(0xF2); + EmitRegisterOperand(dst.LowBits(), src2.LowBits()); +} + void X86_64Assembler::andnpd(XmmRegister dst, XmmRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x66); @@ -3260,6 +3372,60 @@ void X86_64Assembler::setcc(Condition condition, CpuRegister dst) { EmitUint8(0xC0 + dst.LowBits()); } +void X86_64Assembler::blsi(CpuRegister dst, CpuRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + uint8_t byte_zero = EmitVexByteZero(/*is_two_byte=*/ false); + uint8_t byte_one = EmitVexByte1(/*r=*/ false, + /*x=*/ false, + src.NeedsRex(), + /*mmmmm=*/ 2); + uint8_t byte_two = EmitVexByte2(/*w=*/ true, + /*l=*/ 128, + X86_64ManagedRegister::FromCpuRegister(dst.AsRegister()), + /*pp=*/ 0); + EmitUint8(byte_zero); + EmitUint8(byte_one); + EmitUint8(byte_two); + EmitUint8(0xF3); + EmitRegisterOperand(3, src.LowBits()); +} + +void X86_64Assembler::blsmsk(CpuRegister dst, CpuRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + uint8_t byte_zero = EmitVexByteZero(/*is_two_byte=*/ false); + uint8_t byte_one = EmitVexByte1(/*r=*/ false, + /*x=*/ false, + src.NeedsRex(), + /*mmmmm=*/ 2); + uint8_t byte_two = EmitVexByte2(/*w=*/ true, + /*l=*/ 128, + X86_64ManagedRegister::FromCpuRegister(dst.AsRegister()), + /*pp=*/ 0); + EmitUint8(byte_zero); + EmitUint8(byte_one); + EmitUint8(byte_two); + EmitUint8(0xF3); + EmitRegisterOperand(2, src.LowBits()); +} + +void X86_64Assembler::blsr(CpuRegister dst, CpuRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + uint8_t byte_zero = EmitVexByteZero(/*is_two_byte=*/ false); + uint8_t byte_one = EmitVexByte1(/*r=*/ false, + /*x=*/ false, + src.NeedsRex(), + /*mmmmm=*/ 2); + uint8_t byte_two = EmitVexByte2(/*w=*/ true, + /*l=*/ 128, + X86_64ManagedRegister::FromCpuRegister(dst.AsRegister()), + /*pp=*/ 0); + EmitUint8(byte_zero); + EmitUint8(byte_one); + EmitUint8(byte_two); + EmitUint8(0xF3); + EmitRegisterOperand(1, src.LowBits()); +} + void X86_64Assembler::bswapl(CpuRegister dst) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitOptionalRex(false, false, false, false, dst.NeedsRex()); diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index e696635e62..ff13ea3293 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -543,6 +543,7 @@ class X86_64Assembler final : public Assembler { void andps(XmmRegister dst, XmmRegister src); // no addr variant (for now) void pand(XmmRegister dst, XmmRegister src); + void andn(CpuRegister dst, CpuRegister src1, CpuRegister src2); void andnpd(XmmRegister dst, XmmRegister src); // no addr variant (for now) void andnps(XmmRegister dst, XmmRegister src); void pandn(XmmRegister dst, XmmRegister src); @@ -796,6 +797,10 @@ class X86_64Assembler final : public Assembler { void bsfq(CpuRegister dst, CpuRegister src); void bsfq(CpuRegister dst, const Address& src); + void blsi(CpuRegister dst, CpuRegister src); // no addr variant (for now) + void blsmsk(CpuRegister dst, CpuRegister src); // no addr variant (for now) + void blsr(CpuRegister dst, CpuRegister src); // no addr variant (for now) + void bsrl(CpuRegister dst, CpuRegister src); void bsrl(CpuRegister dst, const Address& src); void bsrq(CpuRegister dst, CpuRegister src); @@ -951,6 +956,11 @@ class X86_64Assembler final : public Assembler { void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, CpuRegister src); void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, const Operand& operand); + // Emit a 3 byte VEX Prefix + uint8_t EmitVexByteZero(bool is_two_byte); + uint8_t EmitVexByte1(bool r, bool x, bool b, int mmmmm); + uint8_t EmitVexByte2(bool w , int l , X86_64ManagedRegister operand, int pp); + ConstantArea constant_area_; DISALLOW_COPY_AND_ASSIGN(X86_64Assembler); diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc index fe42f9b19b..528e037bdc 100644 --- a/compiler/utils/x86_64/assembler_x86_64_test.cc +++ b/compiler/utils/x86_64/assembler_x86_64_test.cc @@ -1414,7 +1414,9 @@ TEST_F(AssemblerX86_64Test, Andpd) { TEST_F(AssemblerX86_64Test, Pand) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::pand, "pand %{reg2}, %{reg1}"), "pand"); } - +TEST_F(AssemblerX86_64Test, Andn) { + DriverStr(RepeatRRR(&x86_64::X86_64Assembler::andn, "andn %{reg3}, %{reg2}, %{reg1}"), "andn"); +} TEST_F(AssemblerX86_64Test, andnpd) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::andnpd, "andnpd %{reg2}, %{reg1}"), "andnpd"); } @@ -1785,6 +1787,18 @@ TEST_F(AssemblerX86_64Test, RetAndLeave) { DriverFn(&ret_and_leave_fn, "retleave"); } +TEST_F(AssemblerX86_64Test, Blsmask) { + DriverStr(RepeatRR(&x86_64::X86_64Assembler::blsmsk, "blsmsk %{reg2}, %{reg1}"), "blsmsk"); +} + +TEST_F(AssemblerX86_64Test, Blsi) { + DriverStr(RepeatRR(&x86_64::X86_64Assembler::blsi, "blsi %{reg2}, %{reg1}"), "blsi"); +} + +TEST_F(AssemblerX86_64Test, Blsr) { + DriverStr(RepeatRR(&x86_64::X86_64Assembler::blsr, "blsr %{reg2}, %{reg1}"), "blsr"); +} + TEST_F(AssemblerX86_64Test, Bswapl) { DriverStr(Repeatr(&x86_64::X86_64Assembler::bswapl, "bswap %{reg}"), "bswapl"); } |