summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'compiler')
-rw-r--r--compiler/common_compiler_test.cc2
-rw-r--r--compiler/debug/elf_debug_info_writer.h4
-rw-r--r--compiler/dex/verification_results.cc4
-rw-r--r--compiler/driver/compiler_driver.cc41
-rw-r--r--compiler/driver/compiler_driver.h6
-rw-r--r--compiler/driver/compiler_options.cc1
-rw-r--r--compiler/driver/compiler_options.h8
-rw-r--r--compiler/driver/compiler_options_map-inl.h6
-rw-r--r--compiler/driver/compiler_options_map.def3
-rw-r--r--compiler/driver/simple_compiler_options_map.h2
-rw-r--r--compiler/optimizing/code_generator_arm64.cc2
-rw-r--r--compiler/optimizing/inliner.cc2
-rw-r--r--compiler/optimizing/intrinsic_objects.cc1
-rw-r--r--compiler/utils/assembler_thumb_test_expected.cc.inc4
-rw-r--r--compiler/utils/x86/assembler_x86.cc165
-rw-r--r--compiler/utils/x86/assembler_x86.h10
-rw-r--r--compiler/utils/x86/assembler_x86_test.cc16
-rw-r--r--compiler/utils/x86_64/assembler_x86_64.cc166
-rw-r--r--compiler/utils/x86_64/assembler_x86_64.h10
-rw-r--r--compiler/utils/x86_64/assembler_x86_64_test.cc16
20 files changed, 454 insertions, 15 deletions
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc
index 586891a3ff..fc8cd528fa 100644
--- a/compiler/common_compiler_test.cc
+++ b/compiler/common_compiler_test.cc
@@ -328,6 +328,8 @@ void CommonCompilerTest::ReserveImageSpace() {
(size_t)120 * 1024 * 1024, // 120MB
PROT_NONE,
false /* no need for 4gb flag with fixed mmap */,
+ /*reuse=*/ false,
+ /*reservation=*/ nullptr,
&error_msg);
CHECK(image_reservation_.IsValid()) << error_msg;
}
diff --git a/compiler/debug/elf_debug_info_writer.h b/compiler/debug/elf_debug_info_writer.h
index fe05992960..bb550b3060 100644
--- a/compiler/debug/elf_debug_info_writer.h
+++ b/compiler/debug/elf_debug_info_writer.h
@@ -372,10 +372,10 @@ class ElfCompilationUnitWriter {
}
// Base class.
- mirror::Class* base_class = type->GetSuperClass();
+ ObjPtr<mirror::Class> base_class = type->GetSuperClass();
if (base_class != nullptr) {
info_.StartTag(DW_TAG_inheritance);
- base_class_references.emplace(info_.size(), base_class);
+ base_class_references.emplace(info_.size(), base_class.Ptr());
info_.WriteRef4(DW_AT_type, 0);
info_.WriteUdata(DW_AT_data_member_location, 0);
info_.WriteSdata(DW_AT_accessibility, DW_ACCESS_public);
diff --git a/compiler/dex/verification_results.cc b/compiler/dex/verification_results.cc
index 1e0b94de81..dd947d90b7 100644
--- a/compiler/dex/verification_results.cc
+++ b/compiler/dex/verification_results.cc
@@ -79,7 +79,7 @@ void VerificationResults::ProcessVerifiedMethod(verifier::MethodVerifier* method
if (inserted) {
// Successfully added, release the unique_ptr since we no longer have ownership.
DCHECK_EQ(GetVerifiedMethod(ref), verified_method.get());
- verified_method.release();
+ verified_method.release(); // NOLINT b/117926937
} else {
// TODO: Investigate why are we doing the work again for this method and try to avoid it.
LOG(WARNING) << "Method processed more than once: " << ref.PrettyMethod();
@@ -117,7 +117,7 @@ void VerificationResults::CreateVerifiedMethodFor(MethodReference ref) {
/*expected*/ nullptr,
verified_method.get()) ==
AtomicMap::InsertResult::kInsertResultSuccess) {
- verified_method.release();
+ verified_method.release(); // NOLINT b/117926937
}
}
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index c5416d5a3d..df6e8a83e1 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -708,25 +708,46 @@ void CompilerDriver::Resolve(jobject class_loader,
}
}
-static void ResolveConstStrings(CompilerDriver* driver,
- const std::vector<const DexFile*>& dex_files,
- TimingLogger* timings) {
+void CompilerDriver::ResolveConstStrings(const std::vector<const DexFile*>& dex_files,
+ bool only_startup_strings,
+ TimingLogger* timings) {
ScopedObjectAccess soa(Thread::Current());
StackHandleScope<1> hs(soa.Self());
ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
MutableHandle<mirror::DexCache> dex_cache(hs.NewHandle<mirror::DexCache>(nullptr));
+ size_t num_instructions = 0u;
for (const DexFile* dex_file : dex_files) {
dex_cache.Assign(class_linker->FindDexCache(soa.Self(), *dex_file));
+ if (only_startup_strings) {
+ // When resolving startup strings, create the preresolved strings array.
+ dex_cache->AddPreResolvedStringsArray();
+ }
TimingLogger::ScopedTiming t("Resolve const-string Strings", timings);
for (ClassAccessor accessor : dex_file->GetClasses()) {
- if (!driver->IsClassToCompile(accessor.GetDescriptor())) {
+ if (!IsClassToCompile(accessor.GetDescriptor())) {
// Compilation is skipped, do not resolve const-string in code of this class.
// FIXME: Make sure that inlining honors this. b/26687569
continue;
}
+
+ const bool is_startup_class =
+ profile_compilation_info_ != nullptr &&
+ profile_compilation_info_->ContainsClass(*dex_file, accessor.GetClassIdx());
+
for (const ClassAccessor::Method& method : accessor.GetMethods()) {
+ const bool is_clinit = (method.GetAccessFlags() & kAccConstructor) != 0 &&
+ (method.GetAccessFlags() & kAccStatic) != 0;
+ const bool is_startup_clinit = is_startup_class && is_clinit;
+
+ if (only_startup_strings &&
+ profile_compilation_info_ != nullptr &&
+ (!profile_compilation_info_->GetMethodHotness(method.GetReference()).IsStartup() &&
+ !is_startup_clinit)) {
+ continue;
+ }
+
// Resolve const-strings in the code. Done to have deterministic allocation behavior. Right
// now this is single-threaded for simplicity.
// TODO: Collect the relevant string indices in parallel, then allocate them sequentially
@@ -740,6 +761,11 @@ static void ResolveConstStrings(CompilerDriver* driver,
: inst->VRegB_31c());
ObjPtr<mirror::String> string = class_linker->ResolveString(string_index, dex_cache);
CHECK(string != nullptr) << "Could not allocate a string when forcing determinism";
+ if (only_startup_strings) {
+ dex_cache->GetPreResolvedStrings()[string_index.index_] =
+ GcRoot<mirror::String>(string);
+ }
+ ++num_instructions;
break;
}
@@ -750,6 +776,7 @@ static void ResolveConstStrings(CompilerDriver* driver,
}
}
}
+ VLOG(compiler) << "Resolved " << num_instructions << " const string instructions";
}
// Initialize type check bit strings for check-cast and instance-of in the code. Done to have
@@ -897,8 +924,10 @@ void CompilerDriver::PreCompile(jobject class_loader,
if (GetCompilerOptions().IsForceDeterminism() && GetCompilerOptions().IsBootImage()) {
// Resolve strings from const-string. Do this now to have a deterministic image.
- ResolveConstStrings(this, dex_files, timings);
+ ResolveConstStrings(dex_files, /*only_startup_strings=*/ false, timings);
VLOG(compiler) << "Resolve const-strings: " << GetMemoryUsageString(false);
+ } else if (GetCompilerOptions().ResolveStartupConstStrings()) {
+ ResolveConstStrings(dex_files, /*only_startup_strings=*/ true, timings);
}
Verify(class_loader, dex_files, timings);
@@ -1146,7 +1175,7 @@ static void MaybeAddToImageClasses(Thread* self,
if (klass->IsArrayClass()) {
MaybeAddToImageClasses(self, klass->GetComponentType(), image_classes);
}
- klass.Assign(klass->GetSuperClass());
+ klass = klass->GetSuperClass();
}
}
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 343f67c6d5..9a83e55c96 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -430,6 +430,12 @@ class CompilerDriver {
typedef AtomicDexRefMap<MethodReference, CompiledMethod*> MethodTable;
private:
+ // Resolve const string literals that are loaded from dex code. If only_startup_strings is
+ // specified, only methods that are marked startup in the profile are resolved.
+ void ResolveConstStrings(const std::vector<const DexFile*>& dex_files,
+ bool only_startup_strings,
+ /*inout*/ TimingLogger* timings);
+
// All method references that this compiler has compiled.
MethodTable compiled_methods_;
diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc
index 3ab9afc5d6..6b0e45629b 100644
--- a/compiler/driver/compiler_options.cc
+++ b/compiler/driver/compiler_options.cc
@@ -69,6 +69,7 @@ CompilerOptions::CompilerOptions()
force_determinism_(false),
deduplicate_code_(true),
count_hotness_in_compiled_code_(false),
+ resolve_startup_const_strings_(false),
register_allocation_strategy_(RegisterAllocator::kRegisterAllocatorDefault),
passes_to_run_(nullptr) {
}
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index e9cbf74428..4a6bbfaae6 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -313,6 +313,10 @@ class CompilerOptions final {
return count_hotness_in_compiled_code_;
}
+ bool ResolveStartupConstStrings() const {
+ return resolve_startup_const_strings_;
+ }
+
private:
bool ParseDumpInitFailures(const std::string& option, std::string* error_msg);
void ParseDumpCfgPasses(const StringPiece& option, UsageFn Usage);
@@ -392,6 +396,10 @@ class CompilerOptions final {
// won't be atomic for performance reasons, so we accept races, just like in interpreter.
bool count_hotness_in_compiled_code_;
+ // Whether we eagerly resolve all of the const strings that are loaded from startup methods in the
+ // profile.
+ bool resolve_startup_const_strings_;
+
RegisterAllocator::Strategy register_allocation_strategy_;
// If not null, specifies optimization passes which will be run instead of defaults.
diff --git a/compiler/driver/compiler_options_map-inl.h b/compiler/driver/compiler_options_map-inl.h
index d4a582fb35..5a844959c4 100644
--- a/compiler/driver/compiler_options_map-inl.h
+++ b/compiler/driver/compiler_options_map-inl.h
@@ -80,6 +80,7 @@ inline bool ReadCompilerOptions(Base& map, CompilerOptions* options, std::string
if (map.Exists(Base::CountHotnessInCompiledCode)) {
options->count_hotness_in_compiled_code_ = true;
}
+ map.AssignIfExists(Base::ResolveStartupConstStrings, &options->resolve_startup_const_strings_);
if (map.Exists(Base::DumpTimings)) {
options->dump_timings_ = true;
@@ -184,6 +185,11 @@ inline void AddCompilerOptionsArgumentParserOptions(Builder& b) {
.template WithType<std::string>()
.IntoKey(Map::RegisterAllocationStrategy)
+ .Define("--resolve-startup-const-strings=_")
+ .template WithType<bool>()
+ .WithValueMap({{"false", false}, {"true", true}})
+ .IntoKey(Map::ResolveStartupConstStrings)
+
.Define("--verbose-methods=_")
.template WithType<ParseStringList<','>>()
.IntoKey(Map::VerboseMethods);
diff --git a/compiler/driver/compiler_options_map.def b/compiler/driver/compiler_options_map.def
index 238cd465df..1ec34ec73a 100644
--- a/compiler/driver/compiler_options_map.def
+++ b/compiler/driver/compiler_options_map.def
@@ -52,13 +52,14 @@ COMPILER_OPTIONS_KEY (Unit, Baseline)
COMPILER_OPTIONS_KEY (double, TopKProfileThreshold)
COMPILER_OPTIONS_KEY (bool, AbortOnHardVerifierFailure)
COMPILER_OPTIONS_KEY (bool, AbortOnSoftVerifierFailure)
+COMPILER_OPTIONS_KEY (bool, ResolveStartupConstStrings, kIsDebugBuild)
COMPILER_OPTIONS_KEY (std::string, DumpInitFailures)
COMPILER_OPTIONS_KEY (std::string, DumpCFG)
COMPILER_OPTIONS_KEY (Unit, DumpCFGAppend)
// TODO: Add type parser.
COMPILER_OPTIONS_KEY (std::string, RegisterAllocationStrategy)
COMPILER_OPTIONS_KEY (ParseStringList<','>, VerboseMethods)
-COMPILER_OPTIONS_KEY (bool, DeduplicateCode, true)
+COMPILER_OPTIONS_KEY (bool, DeduplicateCode, true)
COMPILER_OPTIONS_KEY (Unit, CountHotnessInCompiledCode)
COMPILER_OPTIONS_KEY (Unit, DumpTimings)
COMPILER_OPTIONS_KEY (Unit, DumpPassTimings)
diff --git a/compiler/driver/simple_compiler_options_map.h b/compiler/driver/simple_compiler_options_map.h
index 3860da9f66..e7a51a4995 100644
--- a/compiler/driver/simple_compiler_options_map.h
+++ b/compiler/driver/simple_compiler_options_map.h
@@ -50,7 +50,7 @@ using Parser = CmdlineParser<SimpleParseArgumentMap, SimpleParseArgumentMapKey>;
static inline Parser CreateSimpleParser(bool ignore_unrecognized) {
std::unique_ptr<Parser::Builder> parser_builder =
- std::unique_ptr<Parser::Builder>(new Parser::Builder());
+ std::make_unique<Parser::Builder>();
AddCompilerOptionsArgumentParserOptions<SimpleParseArgumentMap>(*parser_builder);
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 3f4fb156b4..a9acf90762 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1205,6 +1205,7 @@ void CodeGeneratorARM64::SetupBlockedRegisters() const {
// mr : Runtime reserved.
// ip1 : VIXL core temp.
// ip0 : VIXL core temp.
+ // x18 : Platform register.
//
// Blocked fp registers:
// d31 : VIXL fp temp.
@@ -1213,6 +1214,7 @@ void CodeGeneratorARM64::SetupBlockedRegisters() const {
while (!reserved_core_registers.IsEmpty()) {
blocked_core_registers_[reserved_core_registers.PopLowestIndex().GetCode()] = true;
}
+ blocked_core_registers_[X18] = true;
CPURegList reserved_fp_registers = vixl_reserved_fp_registers;
while (!reserved_fp_registers.IsEmpty()) {
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 7f94a298eb..dd781c288f 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -36,6 +36,8 @@
#include "jit/jit_code_cache.h"
#include "mirror/class_loader.h"
#include "mirror/dex_cache.h"
+#include "mirror/object_array-alloc-inl.h"
+#include "mirror/object_array-inl.h"
#include "nodes.h"
#include "optimizing_compiler.h"
#include "reference_type_propagation.h"
diff --git a/compiler/optimizing/intrinsic_objects.cc b/compiler/optimizing/intrinsic_objects.cc
index 3c20ad698b..0374b4e332 100644
--- a/compiler/optimizing/intrinsic_objects.cc
+++ b/compiler/optimizing/intrinsic_objects.cc
@@ -21,6 +21,7 @@
#include "class_root.h"
#include "handle.h"
#include "obj_ptr-inl.h"
+#include "mirror/object_array-alloc-inl.h"
#include "mirror/object_array-inl.h"
namespace art {
diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc
index 85e4326494..0d279ede19 100644
--- a/compiler/utils/assembler_thumb_test_expected.cc.inc
+++ b/compiler/utils/assembler_thumb_test_expected.cc.inc
@@ -76,7 +76,7 @@ const char* const VixlJniHelpersResults[] = {
" f0: f1bc 0f00 cmp.w ip, #0\n",
" f4: bf18 it ne\n",
" f6: f20d 4c01 addwne ip, sp, #1025 ; 0x401\n",
- " fa: f8d9 c08c ldr.w ip, [r9, #140] ; 0x8c\n",
+ " fa: f8d9 c094 ldr.w ip, [r9, #148] ; 0x94\n",
" fe: f1bc 0f00 cmp.w ip, #0\n",
" 102: d171 bne.n 1e8 <VixlJniHelpers+0x1e8>\n",
" 104: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n",
@@ -153,7 +153,7 @@ const char* const VixlJniHelpersResults[] = {
" 21c: f8d9 8034 ldr.w r8, [r9, #52] ; 0x34\n",
" 220: 4770 bx lr\n",
" 222: 4660 mov r0, ip\n",
- " 224: f8d9 c2d4 ldr.w ip, [r9, #724] ; 0x2d4\n",
+ " 224: f8d9 c2dc ldr.w ip, [r9, #732] ; 0x2dc\n",
" 228: 47e0 blx ip\n",
nullptr
};
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 86f9010ea3..2d1e451232 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -59,6 +59,98 @@ std::ostream& operator<<(std::ostream& os, const Address& addr) {
}
}
+uint8_t X86Assembler::EmitVexByteZero(bool is_two_byte) {
+ uint8_t vex_zero = 0xC0;
+ if (!is_two_byte) {
+ vex_zero |= 0xC4;
+ } else {
+ vex_zero |= 0xC5;
+ }
+ return vex_zero;
+}
+
+uint8_t X86Assembler::EmitVexByte1(bool r, bool x, bool b, int mmmmm ) {
+ // VEX Byte 1
+ uint8_t vex_prefix = 0;
+ if (!r) {
+ vex_prefix |= 0x80; // VEX.R
+ }
+ if (!x) {
+ vex_prefix |= 0x40; // VEX.X
+ }
+ if (!b) {
+ vex_prefix |= 0x20; // VEX.B
+ }
+
+ // VEX.mmmmm
+ switch (mmmmm) {
+ case 1:
+ // implied 0F leading opcode byte
+ vex_prefix |= 0x01;
+ break;
+ case 2:
+ // implied leading 0F 38 opcode byte
+ vex_prefix |= 0x02;
+ break;
+ case 3:
+ // implied leading OF 3A opcode byte
+ vex_prefix |= 0x03;
+ break;
+ default:
+ LOG(FATAL) << "unknown opcode bytes";
+ }
+ return vex_prefix;
+}
+
+uint8_t X86Assembler::EmitVexByte2(bool w, int l, X86ManagedRegister operand, int pp) {
+ uint8_t vex_prefix = 0;
+ // VEX Byte 2
+ if (w) {
+ vex_prefix |= 0x80;
+ }
+ // VEX.vvvv
+ if (operand.IsXmmRegister()) {
+ XmmRegister vvvv = operand.AsXmmRegister();
+ int inverted_reg = 15-static_cast<int>(vvvv);
+ uint8_t reg = static_cast<uint8_t>(inverted_reg);
+ vex_prefix |= ((reg & 0x0F) << 3);
+ } else if (operand.IsCpuRegister()) {
+ Register vvvv = operand.AsCpuRegister();
+ int inverted_reg = 15 - static_cast<int>(vvvv);
+ uint8_t reg = static_cast<uint8_t>(inverted_reg);
+ vex_prefix |= ((reg & 0x0F) << 3);
+ }
+
+ // VEX.L
+ if (l == 256) {
+ vex_prefix |= 0x04;
+ }
+
+ // VEX.pp
+ switch (pp) {
+ case 0:
+ // SIMD Pefix - None
+ vex_prefix |= 0x00;
+ break;
+ case 1:
+ // SIMD Prefix - 66
+ vex_prefix |= 0x01;
+ break;
+ case 2:
+ // SIMD Prefix - F3
+ vex_prefix |= 0x02;
+ break;
+ case 3:
+ // SIMD Prefix - F2
+ vex_prefix |= 0x03;
+ break;
+ default:
+ LOG(FATAL) << "unknown SIMD Prefix";
+ }
+
+ return vex_prefix;
+}
+
void X86Assembler::call(Register reg) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xFF);
@@ -179,6 +271,60 @@ void X86Assembler::movntl(const Address& dst, Register src) {
EmitOperand(src, dst);
}
+void X86Assembler::blsi(Register dst, Register src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t byte_zero = EmitVexByteZero(/*is_two_byte=*/ false);
+ uint8_t byte_one = EmitVexByte1(/*r=*/ false,
+ /*x=*/ false,
+ /*b=*/ false,
+ /*mmmmm=*/ 2);
+ uint8_t byte_two = EmitVexByte2(/*w=*/ false,
+ /*l=*/ 128,
+ X86ManagedRegister::FromCpuRegister(dst),
+ /*pp=*/ 0);
+ EmitUint8(byte_zero);
+ EmitUint8(byte_one);
+ EmitUint8(byte_two);
+ EmitUint8(0xF3);
+ EmitRegisterOperand(3, src);
+}
+
+void X86Assembler::blsmsk(Register dst, Register src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t byte_zero = EmitVexByteZero(/*is_two_byte=*/ false);
+ uint8_t byte_one = EmitVexByte1(/*r=*/ false,
+ /*x=*/ false,
+ /*b=*/ false,
+ /*mmmmm=*/ 2);
+ uint8_t byte_two = EmitVexByte2(/*w=*/ false,
+ /*l=*/ 128,
+ X86ManagedRegister::FromCpuRegister(dst),
+ /*pp=*/ 0);
+ EmitUint8(byte_zero);
+ EmitUint8(byte_one);
+ EmitUint8(byte_two);
+ EmitUint8(0xF3);
+ EmitRegisterOperand(2, src);
+}
+
+void X86Assembler::blsr(Register dst, Register src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t byte_zero = EmitVexByteZero(/*is_two_byte=*/ false);
+ uint8_t byte_one = EmitVexByte1(/*r=*/ false,
+ /*x=*/ false,
+ /*b=*/ false,
+ /*mmmmm=*/ 2);
+ uint8_t byte_two = EmitVexByte2(/*w=*/ false,
+ /*l=*/ 128,
+ X86ManagedRegister::FromCpuRegister(dst),
+ /*pp=*/ 0);
+ EmitUint8(byte_zero);
+ EmitUint8(byte_one);
+ EmitUint8(byte_two);
+ EmitUint8(0xF3);
+ EmitRegisterOperand(1, src);
+}
+
void X86Assembler::bswapl(Register dst) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x0F);
@@ -1267,6 +1413,25 @@ void X86Assembler::pand(XmmRegister dst, XmmRegister src) {
EmitXmmRegisterOperand(dst, src);
}
+void X86Assembler::andn(Register dst, Register src1, Register src2) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t byte_zero = EmitVexByteZero(/*is_two_byte=*/ false);
+ uint8_t byte_one = EmitVexByte1(/*r=*/ false,
+ /*x=*/ false,
+ /*b=*/ false,
+ /*mmmmm=*/ 2);
+ uint8_t byte_two = EmitVexByte2(/*w=*/ false,
+ /*l=*/ 128,
+ X86ManagedRegister::FromCpuRegister(src1),
+ /*pp=*/ 0);
+ EmitUint8(byte_zero);
+ EmitUint8(byte_one);
+ EmitUint8(byte_two);
+ // Opcode field
+ EmitUint8(0xF2);
+ EmitRegisterOperand(dst, src2);
+}
+
void X86Assembler::andnpd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 5ac9236d6b..275e5c1234 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -337,6 +337,10 @@ class X86Assembler final : public Assembler {
void movntl(const Address& dst, Register src);
+ void blsi(Register dst, Register src); // no addr variant (for now)
+ void blsmsk(Register dst, Register src); // no addr variant (for now)
+ void blsr(Register dst, Register src); // no addr varianr (for now)
+
void bswapl(Register dst);
void bsfl(Register dst, Register src);
@@ -500,6 +504,7 @@ class X86Assembler final : public Assembler {
void andps(XmmRegister dst, const Address& src);
void pand(XmmRegister dst, XmmRegister src); // no addr variant (for now)
+ void andn(Register dst, Register src1, Register src2); // no addr variant (for now)
void andnpd(XmmRegister dst, XmmRegister src); // no addr variant (for now)
void andnps(XmmRegister dst, XmmRegister src);
void pandn(XmmRegister dst, XmmRegister src);
@@ -837,6 +842,11 @@ class X86Assembler final : public Assembler {
void EmitGenericShift(int rm, const Operand& operand, const Immediate& imm);
void EmitGenericShift(int rm, const Operand& operand, Register shifter);
+ // Emit a 3 byte VEX Prefix
+ uint8_t EmitVexByteZero(bool is_two_byte);
+ uint8_t EmitVexByte1(bool r, bool x, bool b, int mmmmm);
+ uint8_t EmitVexByte2(bool w , int l , X86ManagedRegister operand, int pp);
+
ConstantArea constant_area_;
DISALLOW_COPY_AND_ASSIGN(X86Assembler);
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index ad75174d23..1d8bfe7fa7 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -349,6 +349,18 @@ TEST_F(AssemblerX86Test, RepMovsw) {
DriverStr(expected, "rep_movsw");
}
+TEST_F(AssemblerX86Test, Blsmask) {
+ DriverStr(RepeatRR(&x86::X86Assembler::blsmsk, "blsmsk %{reg2}, %{reg1}"), "blsmsk");
+}
+
+TEST_F(AssemblerX86Test, Blsi) {
+ DriverStr(RepeatRR(&x86::X86Assembler::blsi, "blsi %{reg2}, %{reg1}"), "blsi");
+}
+
+TEST_F(AssemblerX86Test, Blsr) {
+ DriverStr(RepeatRR(&x86::X86Assembler::blsr, "blsr %{reg2}, %{reg1}"), "blsr");
+}
+
TEST_F(AssemblerX86Test, Bsfl) {
DriverStr(RepeatRR(&x86::X86Assembler::bsfl, "bsfl %{reg2}, %{reg1}"), "bsfl");
}
@@ -657,6 +669,10 @@ TEST_F(AssemblerX86Test, PAnd) {
DriverStr(RepeatFF(&x86::X86Assembler::pand, "pand %{reg2}, %{reg1}"), "pand");
}
+TEST_F(AssemblerX86Test, Andn) {
+ DriverStr(RepeatRRR(&x86::X86Assembler::andn, "andn %{reg3}, %{reg2}, %{reg1}"), "andn");
+}
+
TEST_F(AssemblerX86Test, AndnPD) {
DriverStr(RepeatFF(&x86::X86Assembler::andnpd, "andnpd %{reg2}, %{reg1}"), "andnpd");
}
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index bd31561937..ae68fe934e 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -64,6 +64,99 @@ std::ostream& operator<<(std::ostream& os, const Address& addr) {
}
}
+uint8_t X86_64Assembler::EmitVexByteZero(bool is_two_byte) {
+ uint8_t vex_zero = 0xC0;
+ if (!is_two_byte) {
+ vex_zero |= 0xC4;
+ } else {
+ vex_zero |= 0xC5;
+ }
+ return vex_zero;
+}
+
+uint8_t X86_64Assembler::EmitVexByte1(bool r, bool x, bool b, int mmmmm) {
+ // VEX Byte 1
+ uint8_t vex_prefix = 0;
+ if (!r) {
+ vex_prefix |= 0x80; // VEX.R
+ }
+ if (!x) {
+ vex_prefix |= 0x40; // VEX.X
+ }
+ if (!b) {
+ vex_prefix |= 0x20; // VEX.B
+ }
+
+ // VEX.mmmmm
+ switch (mmmmm) {
+ case 1:
+ // implied 0F leading opcode byte
+ vex_prefix |= 0x01;
+ break;
+ case 2:
+ // implied leading 0F 38 opcode byte
+ vex_prefix |= 0x02;
+ break;
+ case 3:
+ // implied leading OF 3A opcode byte
+ vex_prefix |= 0x03;
+ break;
+ default:
+ LOG(FATAL) << "unknown opcode bytes";
+ }
+
+ return vex_prefix;
+}
+
+uint8_t X86_64Assembler::EmitVexByte2(bool w, int l, X86_64ManagedRegister operand, int pp) {
+ // VEX Byte 2
+ uint8_t vex_prefix = 0;
+ if (w) {
+ vex_prefix |= 0x80;
+ }
+ // VEX.vvvv
+ if (operand.IsXmmRegister()) {
+ XmmRegister vvvv = operand.AsXmmRegister();
+ int inverted_reg = 15-static_cast<int>(vvvv.AsFloatRegister());
+ uint8_t reg = static_cast<uint8_t>(inverted_reg);
+ vex_prefix |= ((reg & 0x0F) << 3);
+ } else if (operand.IsCpuRegister()) {
+ CpuRegister vvvv = operand.AsCpuRegister();
+ int inverted_reg = 15 - static_cast<int>(vvvv.AsRegister());
+ uint8_t reg = static_cast<uint8_t>(inverted_reg);
+ vex_prefix |= ((reg & 0x0F) << 3);
+ }
+
+ // VEX.L
+ if (l == 256) {
+ vex_prefix |= 0x04;
+ }
+
+ // VEX.pp
+ switch (pp) {
+ case 0:
+ // SIMD Pefix - None
+ vex_prefix |= 0x00;
+ break;
+ case 1:
+ // SIMD Prefix - 66
+ vex_prefix |= 0x01;
+ break;
+ case 2:
+ // SIMD Prefix - F3
+ vex_prefix |= 0x02;
+ break;
+ case 3:
+ // SIMD Prefix - F2
+ vex_prefix |= 0x03;
+ break;
+ default:
+ LOG(FATAL) << "unknown SIMD Prefix";
+ }
+
+ return vex_prefix;
+}
+
void X86_64Assembler::call(CpuRegister reg) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalRex32(reg);
@@ -1483,6 +1576,25 @@ void X86_64Assembler::pand(XmmRegister dst, XmmRegister src) {
EmitXmmRegisterOperand(dst.LowBits(), src);
}
+void X86_64Assembler::andn(CpuRegister dst, CpuRegister src1, CpuRegister src2) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t byte_zero = EmitVexByteZero(/*is_two_byte=*/ false);
+ uint8_t byte_one = EmitVexByte1(dst.NeedsRex(),
+ /*x=*/ false,
+ src2.NeedsRex(),
+ /*mmmmm=*/ 2);
+ uint8_t byte_two = EmitVexByte2(/*w=*/ true,
+ /*l=*/ 128,
+ X86_64ManagedRegister::FromCpuRegister(src1.AsRegister()),
+ /*pp=*/ 0);
+ EmitUint8(byte_zero);
+ EmitUint8(byte_one);
+ EmitUint8(byte_two);
+ // Opcode field
+ EmitUint8(0xF2);
+ EmitRegisterOperand(dst.LowBits(), src2.LowBits());
+}
+
void X86_64Assembler::andnpd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
@@ -3260,6 +3372,60 @@ void X86_64Assembler::setcc(Condition condition, CpuRegister dst) {
EmitUint8(0xC0 + dst.LowBits());
}
+void X86_64Assembler::blsi(CpuRegister dst, CpuRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t byte_zero = EmitVexByteZero(/*is_two_byte=*/ false);
+ uint8_t byte_one = EmitVexByte1(/*r=*/ false,
+ /*x=*/ false,
+ src.NeedsRex(),
+ /*mmmmm=*/ 2);
+ uint8_t byte_two = EmitVexByte2(/*w=*/ true,
+ /*l=*/ 128,
+ X86_64ManagedRegister::FromCpuRegister(dst.AsRegister()),
+ /*pp=*/ 0);
+ EmitUint8(byte_zero);
+ EmitUint8(byte_one);
+ EmitUint8(byte_two);
+ EmitUint8(0xF3);
+ EmitRegisterOperand(3, src.LowBits());
+}
+
+void X86_64Assembler::blsmsk(CpuRegister dst, CpuRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t byte_zero = EmitVexByteZero(/*is_two_byte=*/ false);
+ uint8_t byte_one = EmitVexByte1(/*r=*/ false,
+ /*x=*/ false,
+ src.NeedsRex(),
+ /*mmmmm=*/ 2);
+ uint8_t byte_two = EmitVexByte2(/*w=*/ true,
+ /*l=*/ 128,
+ X86_64ManagedRegister::FromCpuRegister(dst.AsRegister()),
+ /*pp=*/ 0);
+ EmitUint8(byte_zero);
+ EmitUint8(byte_one);
+ EmitUint8(byte_two);
+ EmitUint8(0xF3);
+ EmitRegisterOperand(2, src.LowBits());
+}
+
+void X86_64Assembler::blsr(CpuRegister dst, CpuRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t byte_zero = EmitVexByteZero(/*is_two_byte=*/ false);
+ uint8_t byte_one = EmitVexByte1(/*r=*/ false,
+ /*x=*/ false,
+ src.NeedsRex(),
+ /*mmmmm=*/ 2);
+ uint8_t byte_two = EmitVexByte2(/*w=*/ true,
+ /*l=*/ 128,
+ X86_64ManagedRegister::FromCpuRegister(dst.AsRegister()),
+ /*pp=*/ 0);
+ EmitUint8(byte_zero);
+ EmitUint8(byte_one);
+ EmitUint8(byte_two);
+ EmitUint8(0xF3);
+ EmitRegisterOperand(1, src.LowBits());
+}
+
void X86_64Assembler::bswapl(CpuRegister dst) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalRex(false, false, false, false, dst.NeedsRex());
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index e696635e62..ff13ea3293 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -543,6 +543,7 @@ class X86_64Assembler final : public Assembler {
void andps(XmmRegister dst, XmmRegister src); // no addr variant (for now)
void pand(XmmRegister dst, XmmRegister src);
+ void andn(CpuRegister dst, CpuRegister src1, CpuRegister src2);
void andnpd(XmmRegister dst, XmmRegister src); // no addr variant (for now)
void andnps(XmmRegister dst, XmmRegister src);
void pandn(XmmRegister dst, XmmRegister src);
@@ -796,6 +797,10 @@ class X86_64Assembler final : public Assembler {
void bsfq(CpuRegister dst, CpuRegister src);
void bsfq(CpuRegister dst, const Address& src);
+ void blsi(CpuRegister dst, CpuRegister src); // no addr variant (for now)
+ void blsmsk(CpuRegister dst, CpuRegister src); // no addr variant (for now)
+ void blsr(CpuRegister dst, CpuRegister src); // no addr variant (for now)
+
void bsrl(CpuRegister dst, CpuRegister src);
void bsrl(CpuRegister dst, const Address& src);
void bsrq(CpuRegister dst, CpuRegister src);
@@ -951,6 +956,11 @@ class X86_64Assembler final : public Assembler {
void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, CpuRegister src);
void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, const Operand& operand);
+ // Emit a 3 byte VEX Prefix
+ uint8_t EmitVexByteZero(bool is_two_byte);
+ uint8_t EmitVexByte1(bool r, bool x, bool b, int mmmmm);
+ uint8_t EmitVexByte2(bool w , int l , X86_64ManagedRegister operand, int pp);
+
ConstantArea constant_area_;
DISALLOW_COPY_AND_ASSIGN(X86_64Assembler);
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index fe42f9b19b..528e037bdc 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -1414,7 +1414,9 @@ TEST_F(AssemblerX86_64Test, Andpd) {
TEST_F(AssemblerX86_64Test, Pand) {
DriverStr(RepeatFF(&x86_64::X86_64Assembler::pand, "pand %{reg2}, %{reg1}"), "pand");
}
-
+TEST_F(AssemblerX86_64Test, Andn) {
+ DriverStr(RepeatRRR(&x86_64::X86_64Assembler::andn, "andn %{reg3}, %{reg2}, %{reg1}"), "andn");
+}
TEST_F(AssemblerX86_64Test, andnpd) {
DriverStr(RepeatFF(&x86_64::X86_64Assembler::andnpd, "andnpd %{reg2}, %{reg1}"), "andnpd");
}
@@ -1785,6 +1787,18 @@ TEST_F(AssemblerX86_64Test, RetAndLeave) {
DriverFn(&ret_and_leave_fn, "retleave");
}
+TEST_F(AssemblerX86_64Test, Blsmask) {
+ DriverStr(RepeatRR(&x86_64::X86_64Assembler::blsmsk, "blsmsk %{reg2}, %{reg1}"), "blsmsk");
+}
+
+TEST_F(AssemblerX86_64Test, Blsi) {
+ DriverStr(RepeatRR(&x86_64::X86_64Assembler::blsi, "blsi %{reg2}, %{reg1}"), "blsi");
+}
+
+TEST_F(AssemblerX86_64Test, Blsr) {
+ DriverStr(RepeatRR(&x86_64::X86_64Assembler::blsr, "blsr %{reg2}, %{reg1}"), "blsr");
+}
+
TEST_F(AssemblerX86_64Test, Bswapl) {
DriverStr(Repeatr(&x86_64::X86_64Assembler::bswapl, "bswap %{reg}"), "bswapl");
}