summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'compiler')
-rw-r--r--compiler/Android.bp2
-rw-r--r--compiler/common_compiler_test.cc1
-rw-r--r--compiler/common_compiler_test.h4
-rw-r--r--compiler/compiler.h3
-rw-r--r--compiler/dex/dex_to_dex_compiler.cc7
-rw-r--r--compiler/dex/dex_to_dex_compiler.h7
-rw-r--r--compiler/driver/compiler_driver-inl.h13
-rw-r--r--compiler/driver/compiler_driver.cc129
-rw-r--r--compiler/driver/compiler_driver.h14
-rw-r--r--compiler/driver/compiler_driver_test.cc7
-rw-r--r--compiler/driver/dex_compilation_unit.cc2
-rw-r--r--compiler/driver/dex_compilation_unit.h8
-rw-r--r--compiler/image_writer.cc11
-rw-r--r--compiler/image_writer.h10
-rw-r--r--compiler/oat_writer.cc8
-rw-r--r--compiler/optimizing/builder.h15
-rw-r--r--compiler/optimizing/code_generator_arm.cc253
-rw-r--r--compiler/optimizing/code_generator_arm64.cc21
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.cc244
-rw-r--r--compiler/optimizing/code_generator_x86.cc2
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc2
-rw-r--r--compiler/optimizing/common_arm.h12
-rw-r--r--compiler/optimizing/common_arm64.h52
-rw-r--r--compiler/optimizing/graph_visualizer.cc6
-rw-r--r--compiler/optimizing/inliner.cc45
-rw-r--r--compiler/optimizing/instruction_builder.cc47
-rw-r--r--compiler/optimizing/instruction_builder.h11
-rw-r--r--compiler/optimizing/instruction_simplifier_arm.cc155
-rw-r--r--compiler/optimizing/instruction_simplifier_arm.h34
-rw-r--r--compiler/optimizing/instruction_simplifier_arm64.cc29
-rw-r--r--compiler/optimizing/instruction_simplifier_arm64.h4
-rw-r--r--compiler/optimizing/instruction_simplifier_shared.h27
-rw-r--r--compiler/optimizing/loop_optimization.cc6
-rw-r--r--compiler/optimizing/nodes.cc11
-rw-r--r--compiler/optimizing/nodes.h33
-rw-r--r--compiler/optimizing/nodes_arm64.h99
-rw-r--r--compiler/optimizing/nodes_shared.cc (renamed from compiler/optimizing/nodes_arm64.cc)41
-rw-r--r--compiler/optimizing/nodes_shared.h75
-rw-r--r--compiler/optimizing/optimizing_compiler.cc26
-rw-r--r--compiler/optimizing/reference_type_propagation.cc44
-rw-r--r--compiler/optimizing/reference_type_propagation.h3
-rw-r--r--compiler/optimizing/reference_type_propagation_test.cc1
-rw-r--r--compiler/optimizing/scheduler_arm64.cc4
-rw-r--r--compiler/optimizing/scheduler_arm64.h3
-rw-r--r--compiler/optimizing/ssa_builder.cc6
-rw-r--r--compiler/optimizing/ssa_builder.h3
-rw-r--r--compiler/utils/x86/assembler_x86.cc231
-rw-r--r--compiler/utils/x86/assembler_x86.h35
-rw-r--r--compiler/utils/x86/assembler_x86_test.cc101
-rw-r--r--compiler/utils/x86_64/assembler_x86_64.cc206
-rw-r--r--compiler/utils/x86_64/assembler_x86_64.h29
-rw-r--r--compiler/utils/x86_64/assembler_x86_64_test.cc100
52 files changed, 1785 insertions, 457 deletions
diff --git a/compiler/Android.bp b/compiler/Android.bp
index f6a4db49fb..f5589cd7a3 100644
--- a/compiler/Android.bp
+++ b/compiler/Android.bp
@@ -111,6 +111,7 @@ art_cc_defaults {
"optimizing/instruction_simplifier_shared.cc",
"optimizing/intrinsics_arm.cc",
"optimizing/intrinsics_arm_vixl.cc",
+ "optimizing/nodes_shared.cc",
"utils/arm/assembler_arm.cc",
"utils/arm/assembler_arm_vixl.cc",
"utils/arm/assembler_thumb2.cc",
@@ -127,7 +128,6 @@ art_cc_defaults {
"optimizing/scheduler_arm64.cc",
"optimizing/instruction_simplifier_arm64.cc",
"optimizing/intrinsics_arm64.cc",
- "optimizing/nodes_arm64.cc",
"utils/arm64/assembler_arm64.cc",
"utils/arm64/jni_macro_assembler_arm64.cc",
"utils/arm64/managed_register_arm64.cc",
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc
index 2f9164c0e0..d89cdbabf8 100644
--- a/compiler/common_compiler_test.cc
+++ b/compiler/common_compiler_test.cc
@@ -175,6 +175,7 @@ void CommonCompilerTest::CreateCompilerDriver(Compiler::Kind kind,
InstructionSet isa,
size_t number_of_threads) {
compiler_options_->boot_image_ = true;
+ compiler_options_->SetCompilerFilter(GetCompilerFilter());
compiler_driver_.reset(new CompilerDriver(compiler_options_.get(),
verification_results_.get(),
kind,
diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h
index 0d45a50053..98dcf20714 100644
--- a/compiler/common_compiler_test.h
+++ b/compiler/common_compiler_test.h
@@ -77,6 +77,10 @@ class CommonCompilerTest : public CommonRuntimeTest {
virtual ProfileCompilationInfo* GetProfileCompilationInfo();
+ virtual CompilerFilter::Filter GetCompilerFilter() const {
+ return CompilerFilter::kDefaultCompilerFilter;
+ }
+
virtual void TearDown();
void CompileClass(mirror::ClassLoader* class_loader, const char* class_name)
diff --git a/compiler/compiler.h b/compiler/compiler.h
index 2ca0b77a73..908d3669ed 100644
--- a/compiler/compiler.h
+++ b/compiler/compiler.h
@@ -27,6 +27,7 @@ namespace jit {
class JitCodeCache;
}
namespace mirror {
+ class ClassLoader;
class DexCache;
}
@@ -63,7 +64,7 @@ class Compiler {
InvokeType invoke_type,
uint16_t class_def_idx,
uint32_t method_idx,
- jobject class_loader,
+ Handle<mirror::ClassLoader> class_loader,
const DexFile& dex_file,
Handle<mirror::DexCache> dex_cache) const = 0;
diff --git a/compiler/dex/dex_to_dex_compiler.cc b/compiler/dex/dex_to_dex_compiler.cc
index d4f6545c59..76aeaa55d7 100644
--- a/compiler/dex/dex_to_dex_compiler.cc
+++ b/compiler/dex/dex_to_dex_compiler.cc
@@ -284,16 +284,13 @@ void DexCompiler::CompileInvokeVirtual(Instruction* inst, uint32_t dex_pc,
}
uint32_t method_idx = is_range ? inst->VRegB_3rc() : inst->VRegB_35c();
ScopedObjectAccess soa(Thread::Current());
- StackHandleScope<1> hs(soa.Self());
- Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
- soa.Decode<mirror::ClassLoader>(unit_.GetClassLoader())));
ClassLinker* class_linker = unit_.GetClassLinker();
ArtMethod* resolved_method = class_linker->ResolveMethod<ClassLinker::kForceICCECheck>(
GetDexFile(),
method_idx,
unit_.GetDexCache(),
- class_loader,
+ unit_.GetClassLoader(),
/* referrer */ nullptr,
kVirtual);
@@ -330,7 +327,7 @@ CompiledMethod* ArtCompileDEX(
InvokeType invoke_type ATTRIBUTE_UNUSED,
uint16_t class_def_idx,
uint32_t method_idx,
- jobject class_loader,
+ Handle<mirror::ClassLoader> class_loader,
const DexFile& dex_file,
DexToDexCompilationLevel dex_to_dex_compilation_level) {
DCHECK(driver != nullptr);
diff --git a/compiler/dex/dex_to_dex_compiler.h b/compiler/dex/dex_to_dex_compiler.h
index 0a00d45297..00c596d60e 100644
--- a/compiler/dex/dex_to_dex_compiler.h
+++ b/compiler/dex/dex_to_dex_compiler.h
@@ -18,6 +18,7 @@
#define ART_COMPILER_DEX_DEX_TO_DEX_COMPILER_H_
#include "dex_file.h"
+#include "handle.h"
#include "invoke_type.h"
namespace art {
@@ -25,6 +26,10 @@ namespace art {
class CompiledMethod;
class CompilerDriver;
+namespace mirror {
+class ClassLoader;
+} // namespace mirror
+
namespace optimizer {
enum class DexToDexCompilationLevel {
@@ -40,7 +45,7 @@ CompiledMethod* ArtCompileDEX(CompilerDriver* driver,
InvokeType invoke_type,
uint16_t class_def_idx,
uint32_t method_idx,
- jobject class_loader,
+ Handle<mirror::ClassLoader> class_loader,
const DexFile& dex_file,
DexToDexCompilationLevel dex_to_dex_compilation_level);
diff --git a/compiler/driver/compiler_driver-inl.h b/compiler/driver/compiler_driver-inl.h
index f296851ebf..582330611d 100644
--- a/compiler/driver/compiler_driver-inl.h
+++ b/compiler/driver/compiler_driver-inl.h
@@ -31,17 +31,12 @@
namespace art {
-inline mirror::ClassLoader* CompilerDriver::GetClassLoader(const ScopedObjectAccess& soa,
- const DexCompilationUnit* mUnit) {
- return soa.Decode<mirror::ClassLoader>(mUnit->GetClassLoader()).Ptr();
-}
-
inline mirror::Class* CompilerDriver::ResolveClass(
const ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache,
Handle<mirror::ClassLoader> class_loader, dex::TypeIndex cls_index,
const DexCompilationUnit* mUnit) {
DCHECK_EQ(dex_cache->GetDexFile(), mUnit->GetDexFile());
- DCHECK_EQ(class_loader.Get(), GetClassLoader(soa, mUnit));
+ DCHECK_EQ(class_loader.Get(), mUnit->GetClassLoader().Get());
mirror::Class* cls = mUnit->GetClassLinker()->ResolveType(
*mUnit->GetDexFile(), cls_index, dex_cache, class_loader);
DCHECK_EQ(cls == nullptr, soa.Self()->IsExceptionPending());
@@ -56,7 +51,7 @@ inline mirror::Class* CompilerDriver::ResolveCompilingMethodsClass(
const ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache,
Handle<mirror::ClassLoader> class_loader, const DexCompilationUnit* mUnit) {
DCHECK_EQ(dex_cache->GetDexFile(), mUnit->GetDexFile());
- DCHECK_EQ(class_loader.Get(), GetClassLoader(soa, mUnit));
+ DCHECK_EQ(class_loader.Get(), mUnit->GetClassLoader().Get());
const DexFile::MethodId& referrer_method_id =
mUnit->GetDexFile()->GetMethodId(mUnit->GetDexMethodIndex());
return ResolveClass(soa, dex_cache, class_loader, referrer_method_id.class_idx_, mUnit);
@@ -87,7 +82,7 @@ inline ArtField* CompilerDriver::ResolveField(
const ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache,
Handle<mirror::ClassLoader> class_loader, const DexCompilationUnit* mUnit,
uint32_t field_idx, bool is_static) {
- DCHECK_EQ(class_loader.Get(), GetClassLoader(soa, mUnit));
+ DCHECK_EQ(class_loader.Get(), mUnit->GetClassLoader().Get());
return ResolveFieldWithDexFile(soa, dex_cache, class_loader, mUnit->GetDexFile(), field_idx,
is_static);
}
@@ -139,7 +134,7 @@ inline ArtMethod* CompilerDriver::ResolveMethod(
ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache,
Handle<mirror::ClassLoader> class_loader, const DexCompilationUnit* mUnit,
uint32_t method_idx, InvokeType invoke_type, bool check_incompatible_class_change) {
- DCHECK_EQ(class_loader.Get(), GetClassLoader(soa, mUnit));
+ DCHECK_EQ(class_loader.Get(), mUnit->GetClassLoader().Get());
ArtMethod* resolved_method =
check_incompatible_class_change
? mUnit->GetClassLinker()->ResolveMethod<ClassLinker::kForceICCECheck>(
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 26c0818b85..52ffa55342 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -580,7 +580,7 @@ static void CompileMethod(Thread* self,
InvokeType invoke_type,
uint16_t class_def_idx,
uint32_t method_idx,
- jobject class_loader,
+ Handle<mirror::ClassLoader> class_loader,
const DexFile& dex_file,
optimizer::DexToDexCompilationLevel dex_to_dex_compilation_level,
bool compilation_enabled,
@@ -621,9 +621,6 @@ static void CompileMethod(Thread* self,
// Look-up the ArtMethod associated with this code_item (if any)
// -- It is later used to lookup any [optimization] annotations for this method.
ScopedObjectAccess soa(self);
- StackHandleScope<1> hs(soa.Self());
- Handle<mirror::ClassLoader> class_loader_handle(hs.NewHandle(
- soa.Decode<mirror::ClassLoader>(class_loader)));
// TODO: Lookup annotation from DexFile directly without resolving method.
ArtMethod* method =
@@ -631,7 +628,7 @@ static void CompileMethod(Thread* self,
dex_file,
method_idx,
dex_cache,
- class_loader_handle,
+ class_loader,
/* referrer */ nullptr,
invoke_type);
@@ -678,9 +675,14 @@ static void CompileMethod(Thread* self,
if (compile) {
// NOTE: if compiler declines to compile this method, it will return null.
- compiled_method = driver->GetCompiler()->Compile(code_item, access_flags, invoke_type,
- class_def_idx, method_idx, class_loader,
- dex_file, dex_cache);
+ compiled_method = driver->GetCompiler()->Compile(code_item,
+ access_flags,
+ invoke_type,
+ class_def_idx,
+ method_idx,
+ class_loader,
+ dex_file,
+ dex_cache);
}
if (compiled_method == nullptr &&
dex_to_dex_compilation_level != optimizer::DexToDexCompilationLevel::kDontDexToDexCompile) {
@@ -727,12 +729,14 @@ void CompilerDriver::CompileOne(Thread* self, ArtMethod* method, TimingLogger* t
uint32_t method_idx = method->GetDexMethodIndex();
uint32_t access_flags = method->GetAccessFlags();
InvokeType invoke_type = method->GetInvokeType();
- StackHandleScope<1> hs(self);
+ StackHandleScope<2> hs(self);
Handle<mirror::DexCache> dex_cache(hs.NewHandle(method->GetDexCache()));
+ Handle<mirror::ClassLoader> class_loader(
+ hs.NewHandle(method->GetDeclaringClass()->GetClassLoader()));
{
ScopedObjectAccessUnchecked soa(self);
ScopedLocalRef<jobject> local_class_loader(
- soa.Env(), soa.AddLocalReference<jobject>(method->GetDeclaringClass()->GetClassLoader()));
+ soa.Env(), soa.AddLocalReference<jobject>(class_loader.Get()));
jclass_loader = soa.Env()->NewGlobalRef(local_class_loader.get());
// Find the dex_file
dex_file = method->GetDexFile();
@@ -766,7 +770,7 @@ void CompilerDriver::CompileOne(Thread* self, ArtMethod* method, TimingLogger* t
invoke_type,
class_def_idx,
method_idx,
- jclass_loader,
+ class_loader,
*dex_file,
dex_to_dex_compilation_level,
true,
@@ -792,7 +796,7 @@ void CompilerDriver::CompileOne(Thread* self, ArtMethod* method, TimingLogger* t
invoke_type,
class_def_idx,
method_idx,
- jclass_loader,
+ class_loader,
*dex_file,
dex_to_dex_compilation_level,
true,
@@ -1050,9 +1054,9 @@ bool CompilerDriver::IsMethodToCompile(const MethodReference& method_ref) const
}
bool CompilerDriver::ShouldCompileBasedOnProfile(const MethodReference& method_ref) const {
- if (profile_compilation_info_ == nullptr) {
- // If we miss profile information it means that we don't do a profile guided compilation.
- // Return true, and let the other filters decide if the method should be compiled.
+ if (!CompilerFilter::DependsOnProfile(compiler_options_->GetCompilerFilter())) {
+ // Use the compiler filter instead of the presence of profile_compilation_info_ since
+ // we may want to have full speed compilation along with profile based layout optimizations.
return true;
}
bool result = profile_compilation_info_->ContainsMethod(method_ref);
@@ -1067,22 +1071,30 @@ bool CompilerDriver::ShouldCompileBasedOnProfile(const MethodReference& method_r
class ResolveCatchBlockExceptionsClassVisitor : public ClassVisitor {
public:
- explicit ResolveCatchBlockExceptionsClassVisitor(
- std::set<std::pair<dex::TypeIndex, const DexFile*>>& exceptions_to_resolve)
- : exceptions_to_resolve_(exceptions_to_resolve) {}
+ ResolveCatchBlockExceptionsClassVisitor() : classes_() {}
virtual bool operator()(ObjPtr<mirror::Class> c) OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
+ classes_.push_back(c);
+ return true;
+ }
+
+ void FindExceptionTypesToResolve(
+ std::set<std::pair<dex::TypeIndex, const DexFile*>>* exceptions_to_resolve)
+ REQUIRES_SHARED(Locks::mutator_lock_) {
const auto pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
- for (auto& m : c->GetMethods(pointer_size)) {
- ResolveExceptionsForMethod(&m);
+ for (ObjPtr<mirror::Class> klass : classes_) {
+ for (ArtMethod& method : klass->GetMethods(pointer_size)) {
+ FindExceptionTypesToResolveForMethod(&method, exceptions_to_resolve);
+ }
}
- return true;
}
private:
- void ResolveExceptionsForMethod(ArtMethod* method_handle)
+ void FindExceptionTypesToResolveForMethod(
+ ArtMethod* method,
+ std::set<std::pair<dex::TypeIndex, const DexFile*>>* exceptions_to_resolve)
REQUIRES_SHARED(Locks::mutator_lock_) {
- const DexFile::CodeItem* code_item = method_handle->GetCodeItem();
+ const DexFile::CodeItem* code_item = method->GetCodeItem();
if (code_item == nullptr) {
return; // native or abstract method
}
@@ -1102,9 +1114,9 @@ class ResolveCatchBlockExceptionsClassVisitor : public ClassVisitor {
dex::TypeIndex encoded_catch_handler_handlers_type_idx =
dex::TypeIndex(DecodeUnsignedLeb128(&encoded_catch_handler_list));
// Add to set of types to resolve if not already in the dex cache resolved types
- if (!method_handle->IsResolvedTypeIdx(encoded_catch_handler_handlers_type_idx)) {
- exceptions_to_resolve_.emplace(encoded_catch_handler_handlers_type_idx,
- method_handle->GetDexFile());
+ if (!method->IsResolvedTypeIdx(encoded_catch_handler_handlers_type_idx)) {
+ exceptions_to_resolve->emplace(encoded_catch_handler_handlers_type_idx,
+ method->GetDexFile());
}
// ignore address associated with catch handler
DecodeUnsignedLeb128(&encoded_catch_handler_list);
@@ -1116,7 +1128,7 @@ class ResolveCatchBlockExceptionsClassVisitor : public ClassVisitor {
}
}
- std::set<std::pair<dex::TypeIndex, const DexFile*>>& exceptions_to_resolve_;
+ std::vector<ObjPtr<mirror::Class>> classes_;
};
class RecordImageClassesVisitor : public ClassVisitor {
@@ -1170,8 +1182,14 @@ void CompilerDriver::LoadImageClasses(TimingLogger* timings) {
hs.NewHandle(class_linker->FindSystemClass(self, "Ljava/lang/Throwable;")));
do {
unresolved_exception_types.clear();
- ResolveCatchBlockExceptionsClassVisitor visitor(unresolved_exception_types);
- class_linker->VisitClasses(&visitor);
+ {
+ // Thread suspension is not allowed while ResolveCatchBlockExceptionsClassVisitor
+ // is using a std::vector<ObjPtr<mirror::Class>>.
+ ScopedAssertNoThreadSuspension ants(__FUNCTION__);
+ ResolveCatchBlockExceptionsClassVisitor visitor;
+ class_linker->VisitClasses(&visitor);
+ visitor.FindExceptionTypesToResolve(&unresolved_exception_types);
+ }
for (const auto& exception_type : unresolved_exception_types) {
dex::TypeIndex exception_type_idx = exception_type.first;
const DexFile* dex_file = exception_type.second;
@@ -1422,19 +1440,14 @@ void CompilerDriver::MarkForDexToDexCompilation(Thread* self, const MethodRefere
dex_to_dex_references_.back().GetMethodIndexes().SetBit(method_ref.dex_method_index);
}
-bool CompilerDriver::CanAccessTypeWithoutChecks(uint32_t referrer_idx,
- Handle<mirror::DexCache> dex_cache,
- dex::TypeIndex type_idx) {
- // Get type from dex cache assuming it was populated by the verifier
- mirror::Class* resolved_class = dex_cache->GetResolvedType(type_idx);
+bool CompilerDriver::CanAccessTypeWithoutChecks(ObjPtr<mirror::Class> referrer_class,
+ ObjPtr<mirror::Class> resolved_class) {
if (resolved_class == nullptr) {
stats_->TypeNeedsAccessCheck();
return false; // Unknown class needs access checks.
}
- const DexFile::MethodId& method_id = dex_cache->GetDexFile()->GetMethodId(referrer_idx);
bool is_accessible = resolved_class->IsPublic(); // Public classes are always accessible.
if (!is_accessible) {
- mirror::Class* referrer_class = dex_cache->GetResolvedType(method_id.class_idx_);
if (referrer_class == nullptr) {
stats_->TypeNeedsAccessCheck();
return false; // Incomplete referrer knowledge needs access check.
@@ -1451,12 +1464,9 @@ bool CompilerDriver::CanAccessTypeWithoutChecks(uint32_t referrer_idx,
return is_accessible;
}
-bool CompilerDriver::CanAccessInstantiableTypeWithoutChecks(uint32_t referrer_idx,
- Handle<mirror::DexCache> dex_cache,
- dex::TypeIndex type_idx,
+bool CompilerDriver::CanAccessInstantiableTypeWithoutChecks(ObjPtr<mirror::Class> referrer_class,
+ ObjPtr<mirror::Class> resolved_class,
bool* finalizable) {
- // Get type from dex cache assuming it was populated by the verifier.
- mirror::Class* resolved_class = dex_cache->GetResolvedType(type_idx);
if (resolved_class == nullptr) {
stats_->TypeNeedsAccessCheck();
// Be conservative.
@@ -1464,10 +1474,8 @@ bool CompilerDriver::CanAccessInstantiableTypeWithoutChecks(uint32_t referrer_id
return false; // Unknown class needs access checks.
}
*finalizable = resolved_class->IsFinalizable();
- const DexFile::MethodId& method_id = dex_cache->GetDexFile()->GetMethodId(referrer_idx);
bool is_accessible = resolved_class->IsPublic(); // Public classes are always accessible.
if (!is_accessible) {
- mirror::Class* referrer_class = dex_cache->GetResolvedType(method_id.class_idx_);
if (referrer_class == nullptr) {
stats_->TypeNeedsAccessCheck();
return false; // Incomplete referrer knowledge needs access check.
@@ -1511,9 +1519,7 @@ ArtField* CompilerDriver::ComputeInstanceFieldInfo(uint32_t field_idx,
mirror::Class* referrer_class;
Handle<mirror::DexCache> dex_cache(mUnit->GetDexCache());
{
- StackHandleScope<1> hs(soa.Self());
- Handle<mirror::ClassLoader> class_loader_handle(
- hs.NewHandle(soa.Decode<mirror::ClassLoader>(mUnit->GetClassLoader())));
+ Handle<mirror::ClassLoader> class_loader_handle = mUnit->GetClassLoader();
resolved_field = ResolveField(soa, dex_cache, class_loader_handle, mUnit, field_idx, false);
referrer_class = resolved_field != nullptr
? ResolveCompilingMethodsClass(soa, dex_cache, class_loader_handle, mUnit) : nullptr;
@@ -2585,10 +2591,18 @@ class CompileClassVisitor : public CompilationVisitor {
continue;
}
previous_direct_method_idx = method_idx;
- CompileMethod(soa.Self(), driver, it.GetMethodCodeItem(), it.GetMethodAccessFlags(),
- it.GetMethodInvokeType(class_def), class_def_index,
- method_idx, jclass_loader, dex_file, dex_to_dex_compilation_level,
- compilation_enabled, dex_cache);
+ CompileMethod(soa.Self(),
+ driver,
+ it.GetMethodCodeItem(),
+ it.GetMethodAccessFlags(),
+ it.GetMethodInvokeType(class_def),
+ class_def_index,
+ method_idx,
+ class_loader,
+ dex_file,
+ dex_to_dex_compilation_level,
+ compilation_enabled,
+ dex_cache);
it.Next();
}
// Compile virtual methods
@@ -2602,10 +2616,17 @@ class CompileClassVisitor : public CompilationVisitor {
continue;
}
previous_virtual_method_idx = method_idx;
- CompileMethod(soa.Self(), driver, it.GetMethodCodeItem(), it.GetMethodAccessFlags(),
- it.GetMethodInvokeType(class_def), class_def_index,
- method_idx, jclass_loader, dex_file, dex_to_dex_compilation_level,
- compilation_enabled, dex_cache);
+ CompileMethod(soa.Self(),
+ driver, it.GetMethodCodeItem(),
+ it.GetMethodAccessFlags(),
+ it.GetMethodInvokeType(class_def),
+ class_def_index,
+ method_idx,
+ class_loader,
+ dex_file,
+ dex_to_dex_compilation_level,
+ compilation_enabled,
+ dex_cache);
it.Next();
}
DCHECK(!it.HasNext());
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 5b4c751c4a..1e5c43d833 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -187,16 +187,14 @@ class CompilerDriver {
REQUIRES(!requires_constructor_barrier_lock_);
// Are runtime access checks necessary in the compiled code?
- bool CanAccessTypeWithoutChecks(uint32_t referrer_idx,
- Handle<mirror::DexCache> dex_cache,
- dex::TypeIndex type_idx)
+ bool CanAccessTypeWithoutChecks(ObjPtr<mirror::Class> referrer_class,
+ ObjPtr<mirror::Class> resolved_class)
REQUIRES_SHARED(Locks::mutator_lock_);
// Are runtime access and instantiable checks necessary in the code?
// out_is_finalizable is set to whether the type is finalizable.
- bool CanAccessInstantiableTypeWithoutChecks(uint32_t referrer_idx,
- Handle<mirror::DexCache> dex_cache,
- dex::TypeIndex type_idx,
+ bool CanAccessInstantiableTypeWithoutChecks(ObjPtr<mirror::Class> referrer_class,
+ ObjPtr<mirror::Class> resolved_class,
bool* out_is_finalizable)
REQUIRES_SHARED(Locks::mutator_lock_);
@@ -370,10 +368,6 @@ class CompilerDriver {
uint32_t field_idx)
REQUIRES_SHARED(Locks::mutator_lock_);
- mirror::ClassLoader* GetClassLoader(const ScopedObjectAccess& soa,
- const DexCompilationUnit* mUnit)
- REQUIRES_SHARED(Locks::mutator_lock_);
-
private:
void PreCompile(jobject class_loader,
const std::vector<const DexFile*>& dex_files,
diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc
index 1e4ca16844..97954f3c29 100644
--- a/compiler/driver/compiler_driver_test.cc
+++ b/compiler/driver/compiler_driver_test.cc
@@ -101,6 +101,7 @@ class CompilerDriverTest : public CommonCompilerTest {
};
// Disabled due to 10 second runtime on host
+// TODO: Update the test for hash-based dex cache arrays. Bug: 30627598
TEST_F(CompilerDriverTest, DISABLED_LARGE_CompileDexLibCore) {
CompileAll(nullptr);
@@ -246,6 +247,11 @@ class CompilerDriverProfileTest : public CompilerDriverTest {
return &profile_info_;
}
+ CompilerFilter::Filter GetCompilerFilter() const OVERRIDE {
+ // Use a profile based filter.
+ return CompilerFilter::kSpeedProfile;
+ }
+
std::unordered_set<std::string> GetExpectedMethodsForClass(const std::string& clazz) {
if (clazz == "Main") {
return std::unordered_set<std::string>({
@@ -304,7 +310,6 @@ TEST_F(CompilerDriverProfileTest, ProfileGuidedCompilation) {
// Need to enable dex-file writability. Methods rejected to be compiled will run through the
// dex-to-dex compiler.
- ProfileCompilationInfo info;
for (const DexFile* dex_file : GetDexFiles(class_loader)) {
ASSERT_TRUE(dex_file->EnableWrite());
}
diff --git a/compiler/driver/dex_compilation_unit.cc b/compiler/driver/dex_compilation_unit.cc
index 47b19297e5..7e8e812c4a 100644
--- a/compiler/driver/dex_compilation_unit.cc
+++ b/compiler/driver/dex_compilation_unit.cc
@@ -21,7 +21,7 @@
namespace art {
-DexCompilationUnit::DexCompilationUnit(jobject class_loader,
+DexCompilationUnit::DexCompilationUnit(Handle<mirror::ClassLoader> class_loader,
ClassLinker* class_linker,
const DexFile& dex_file,
const DexFile::CodeItem* code_item,
diff --git a/compiler/driver/dex_compilation_unit.h b/compiler/driver/dex_compilation_unit.h
index 854927d747..24a9a5b653 100644
--- a/compiler/driver/dex_compilation_unit.h
+++ b/compiler/driver/dex_compilation_unit.h
@@ -34,7 +34,7 @@ class VerifiedMethod;
class DexCompilationUnit : public DeletableArenaObject<kArenaAllocMisc> {
public:
- DexCompilationUnit(jobject class_loader,
+ DexCompilationUnit(Handle<mirror::ClassLoader> class_loader,
ClassLinker* class_linker,
const DexFile& dex_file,
const DexFile::CodeItem* code_item,
@@ -44,7 +44,7 @@ class DexCompilationUnit : public DeletableArenaObject<kArenaAllocMisc> {
const VerifiedMethod* verified_method,
Handle<mirror::DexCache> dex_cache);
- jobject GetClassLoader() const {
+ Handle<mirror::ClassLoader> GetClassLoader() const {
return class_loader_;
}
@@ -113,7 +113,7 @@ class DexCompilationUnit : public DeletableArenaObject<kArenaAllocMisc> {
}
private:
- const jobject class_loader_;
+ const Handle<mirror::ClassLoader> class_loader_;
ClassLinker* const class_linker_;
@@ -125,7 +125,7 @@ class DexCompilationUnit : public DeletableArenaObject<kArenaAllocMisc> {
const uint32_t access_flags_;
const VerifiedMethod* verified_method_;
- Handle<mirror::DexCache> dex_cache_;
+ const Handle<mirror::DexCache> dex_cache_;
std::string symbol_;
};
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index d2dd30d8e6..117d1131b5 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -940,9 +940,11 @@ void ImageWriter::PruneNonImageClasses() {
}
ObjPtr<mirror::DexCache> dex_cache = self->DecodeJObject(data.weak_root)->AsDexCache();
for (size_t i = 0; i < dex_cache->NumResolvedTypes(); i++) {
- Class* klass = dex_cache->GetResolvedType(dex::TypeIndex(i));
+ mirror::TypeDexCachePair pair =
+ dex_cache->GetResolvedTypes()[i].load(std::memory_order_relaxed);
+ mirror::Class* klass = pair.object.Read();
if (klass != nullptr && !KeepClass(klass)) {
- dex_cache->SetResolvedType(dex::TypeIndex(i), nullptr);
+ dex_cache->ClearResolvedType(dex::TypeIndex(pair.index));
}
}
ArtMethod** resolved_methods = dex_cache->GetResolvedMethods();
@@ -1922,8 +1924,7 @@ void ImageWriter::CopyAndFixupNativeData(size_t oat_index) {
// above comment for intern tables.
ClassTable temp_class_table;
temp_class_table.ReadFromMemory(class_table_memory_ptr);
- CHECK_EQ(class_loaders_.size(), compile_app_image_ ? 1u : 0u);
- mirror::ClassLoader* class_loader = compile_app_image_ ? *class_loaders_.begin() : nullptr;
+ ObjPtr<mirror::ClassLoader> class_loader = GetClassLoader();
CHECK_EQ(temp_class_table.NumZygoteClasses(class_loader),
table->NumNonZygoteClasses(class_loader) + table->NumZygoteClasses(class_loader));
UnbufferedRootVisitor visitor(&root_visitor, RootInfo(kRootUnknown));
@@ -2213,7 +2214,7 @@ void ImageWriter::FixupDexCache(mirror::DexCache* orig_dex_cache,
orig_dex_cache->FixupStrings(NativeCopyLocation(orig_strings, orig_dex_cache),
ImageAddressVisitor(this));
}
- GcRoot<mirror::Class>* orig_types = orig_dex_cache->GetResolvedTypes();
+ mirror::TypeDexCacheType* orig_types = orig_dex_cache->GetResolvedTypes();
if (orig_types != nullptr) {
copy_dex_cache->SetFieldPtrWithSize<false>(mirror::DexCache::ResolvedTypesOffset(),
NativeLocationInImage(orig_types),
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index cc7df1ce21..bdc7146632 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -51,8 +51,13 @@ class ImageSpace;
} // namespace space
} // namespace gc
+namespace mirror {
+class ClassLoader;
+} // namespace mirror
+
class ClassLoaderVisitor;
class ClassTable;
+class ImtConflictTable;
static constexpr int kInvalidFd = -1;
@@ -79,6 +84,11 @@ class ImageWriter FINAL {
return true;
}
+ ObjPtr<mirror::ClassLoader> GetClassLoader() {
+ CHECK_EQ(class_loaders_.size(), compile_app_image_ ? 1u : 0u);
+ return compile_app_image_ ? *class_loaders_.begin() : nullptr;
+ }
+
template <typename T>
T* GetImageAddress(T* object) const REQUIRES_SHARED(Locks::mutator_lock_) {
if (object == nullptr || IsInBootImage(object)) {
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 7c0cdbf270..0ea11255a8 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -1060,6 +1060,7 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor {
WriteCodeMethodVisitor(OatWriter* writer, OutputStream* out, const size_t file_offset,
size_t relative_offset) SHARED_LOCK_FUNCTION(Locks::mutator_lock_)
: OatDexMethodVisitor(writer, relative_offset),
+ class_loader_(writer->HasImage() ? writer->image_writer_->GetClassLoader() : nullptr),
out_(out),
file_offset_(file_offset),
soa_(Thread::Current()),
@@ -1245,6 +1246,7 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor {
}
private:
+ ObjPtr<mirror::ClassLoader> class_loader_;
OutputStream* const out_;
const size_t file_offset_;
const ScopedObjectAccess soa_;
@@ -1303,10 +1305,12 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor {
}
mirror::Class* GetTargetType(const LinkerPatch& patch) REQUIRES_SHARED(Locks::mutator_lock_) {
+ DCHECK(writer_->HasImage());
ObjPtr<mirror::DexCache> dex_cache = GetDexCache(patch.TargetTypeDexFile());
- mirror::Class* type = dex_cache->GetResolvedType(patch.TargetTypeIndex());
+ ObjPtr<mirror::Class> type =
+ ClassLinker::LookupResolvedType(patch.TargetTypeIndex(), dex_cache, class_loader_);
CHECK(type != nullptr);
- return type;
+ return type.Ptr();
}
mirror::String* GetTargetString(const LinkerPatch& patch) REQUIRES_SHARED(Locks::mutator_lock_) {
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index e4ad4222fb..3a4c9dbd16 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -54,7 +54,10 @@ class HGraphBuilder : public ValueObject {
compiler_driver_(driver),
compilation_stats_(compiler_stats),
block_builder_(graph, dex_file, code_item),
- ssa_builder_(graph, dex_compilation_unit->GetDexCache(), handles),
+ ssa_builder_(graph,
+ dex_compilation_unit->GetClassLoader(),
+ dex_compilation_unit->GetDexCache(),
+ handles),
instruction_builder_(graph,
&block_builder_,
&ssa_builder_,
@@ -80,10 +83,12 @@ class HGraphBuilder : public ValueObject {
code_item_(code_item),
dex_compilation_unit_(nullptr),
compiler_driver_(nullptr),
- null_dex_cache_(),
compilation_stats_(nullptr),
block_builder_(graph, nullptr, code_item),
- ssa_builder_(graph, null_dex_cache_, handles),
+ ssa_builder_(graph,
+ handles->NewHandle<mirror::ClassLoader>(nullptr),
+ handles->NewHandle<mirror::DexCache>(nullptr),
+ handles),
instruction_builder_(graph,
&block_builder_,
&ssa_builder_,
@@ -96,7 +101,7 @@ class HGraphBuilder : public ValueObject {
/* code_generator */ nullptr,
/* interpreter_metadata */ nullptr,
/* compiler_stats */ nullptr,
- null_dex_cache_,
+ handles->NewHandle<mirror::DexCache>(nullptr),
handles) {}
GraphAnalysisResult BuildGraph();
@@ -117,8 +122,6 @@ class HGraphBuilder : public ValueObject {
CompilerDriver* const compiler_driver_;
- ScopedNullHandle<mirror::DexCache> null_dex_cache_;
-
OptimizingCompilerStats* compilation_stats_;
HBasicBlockBuilder block_builder_;
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 759a951d6b..7b84ef83cd 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -19,6 +19,7 @@
#include "arch/arm/instruction_set_features_arm.h"
#include "art_method.h"
#include "code_generator_utils.h"
+#include "common_arm.h"
#include "compiled_method.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "gc/accounting/card_table.h"
@@ -1132,10 +1133,6 @@ class ReadBarrierForRootSlowPathARM : public SlowPathCodeARM {
DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARM);
};
-#undef __
-// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
-#define __ down_cast<ArmAssembler*>(GetAssembler())-> // NOLINT
-
inline Condition ARMCondition(IfCondition cond) {
switch (cond) {
case kCondEQ: return EQ;
@@ -1191,6 +1188,197 @@ inline Condition ARMFPCondition(IfCondition cond, bool gt_bias) {
}
}
+inline Shift ShiftFromOpKind(HDataProcWithShifterOp::OpKind op_kind) {
+ switch (op_kind) {
+ case HDataProcWithShifterOp::kASR: return ASR;
+ case HDataProcWithShifterOp::kLSL: return LSL;
+ case HDataProcWithShifterOp::kLSR: return LSR;
+ default:
+ LOG(FATAL) << "Unexpected op kind " << op_kind;
+ UNREACHABLE();
+ }
+}
+
+static void GenerateDataProcInstruction(HInstruction::InstructionKind kind,
+ Register out,
+ Register first,
+ const ShifterOperand& second,
+ CodeGeneratorARM* codegen) {
+ if (second.IsImmediate() && second.GetImmediate() == 0) {
+ const ShifterOperand in = kind == HInstruction::kAnd
+ ? ShifterOperand(0)
+ : ShifterOperand(first);
+
+ __ mov(out, in);
+ } else {
+ switch (kind) {
+ case HInstruction::kAdd:
+ __ add(out, first, second);
+ break;
+ case HInstruction::kAnd:
+ __ and_(out, first, second);
+ break;
+ case HInstruction::kOr:
+ __ orr(out, first, second);
+ break;
+ case HInstruction::kSub:
+ __ sub(out, first, second);
+ break;
+ case HInstruction::kXor:
+ __ eor(out, first, second);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected instruction kind: " << kind;
+ UNREACHABLE();
+ }
+ }
+}
+
+static void GenerateDataProc(HInstruction::InstructionKind kind,
+ const Location& out,
+ const Location& first,
+ const ShifterOperand& second_lo,
+ const ShifterOperand& second_hi,
+ CodeGeneratorARM* codegen) {
+ const Register first_hi = first.AsRegisterPairHigh<Register>();
+ const Register first_lo = first.AsRegisterPairLow<Register>();
+ const Register out_hi = out.AsRegisterPairHigh<Register>();
+ const Register out_lo = out.AsRegisterPairLow<Register>();
+
+ if (kind == HInstruction::kAdd) {
+ __ adds(out_lo, first_lo, second_lo);
+ __ adc(out_hi, first_hi, second_hi);
+ } else if (kind == HInstruction::kSub) {
+ __ subs(out_lo, first_lo, second_lo);
+ __ sbc(out_hi, first_hi, second_hi);
+ } else {
+ GenerateDataProcInstruction(kind, out_lo, first_lo, second_lo, codegen);
+ GenerateDataProcInstruction(kind, out_hi, first_hi, second_hi, codegen);
+ }
+}
+
+static ShifterOperand GetShifterOperand(Register rm, Shift shift, uint32_t shift_imm) {
+ return shift_imm == 0 ? ShifterOperand(rm) : ShifterOperand(rm, shift, shift_imm);
+}
+
+static void GenerateLongDataProc(HDataProcWithShifterOp* instruction, CodeGeneratorARM* codegen) {
+ DCHECK_EQ(instruction->GetType(), Primitive::kPrimLong);
+ DCHECK(HDataProcWithShifterOp::IsShiftOp(instruction->GetOpKind()));
+
+ const LocationSummary* const locations = instruction->GetLocations();
+ const uint32_t shift_value = instruction->GetShiftAmount();
+ const HInstruction::InstructionKind kind = instruction->GetInstrKind();
+ const Location first = locations->InAt(0);
+ const Location second = locations->InAt(1);
+ const Location out = locations->Out();
+ const Register first_hi = first.AsRegisterPairHigh<Register>();
+ const Register first_lo = first.AsRegisterPairLow<Register>();
+ const Register out_hi = out.AsRegisterPairHigh<Register>();
+ const Register out_lo = out.AsRegisterPairLow<Register>();
+ const Register second_hi = second.AsRegisterPairHigh<Register>();
+ const Register second_lo = second.AsRegisterPairLow<Register>();
+ const Shift shift = ShiftFromOpKind(instruction->GetOpKind());
+
+ if (shift_value >= 32) {
+ if (shift == LSL) {
+ GenerateDataProcInstruction(kind,
+ out_hi,
+ first_hi,
+ ShifterOperand(second_lo, LSL, shift_value - 32),
+ codegen);
+ GenerateDataProcInstruction(kind,
+ out_lo,
+ first_lo,
+ ShifterOperand(0),
+ codegen);
+ } else if (shift == ASR) {
+ GenerateDataProc(kind,
+ out,
+ first,
+ GetShifterOperand(second_hi, ASR, shift_value - 32),
+ ShifterOperand(second_hi, ASR, 31),
+ codegen);
+ } else {
+ DCHECK_EQ(shift, LSR);
+ GenerateDataProc(kind,
+ out,
+ first,
+ GetShifterOperand(second_hi, LSR, shift_value - 32),
+ ShifterOperand(0),
+ codegen);
+ }
+ } else {
+ DCHECK_GT(shift_value, 1U);
+ DCHECK_LT(shift_value, 32U);
+
+ if (shift == LSL) {
+ // We are not doing this for HInstruction::kAdd because the output will require
+ // Location::kOutputOverlap; not applicable to other cases.
+ if (kind == HInstruction::kOr || kind == HInstruction::kXor) {
+ GenerateDataProcInstruction(kind,
+ out_hi,
+ first_hi,
+ ShifterOperand(second_hi, LSL, shift_value),
+ codegen);
+ GenerateDataProcInstruction(kind,
+ out_hi,
+ out_hi,
+ ShifterOperand(second_lo, LSR, 32 - shift_value),
+ codegen);
+ GenerateDataProcInstruction(kind,
+ out_lo,
+ first_lo,
+ ShifterOperand(second_lo, LSL, shift_value),
+ codegen);
+ } else {
+ __ Lsl(IP, second_hi, shift_value);
+ __ orr(IP, IP, ShifterOperand(second_lo, LSR, 32 - shift_value));
+ GenerateDataProc(kind,
+ out,
+ first,
+ ShifterOperand(second_lo, LSL, shift_value),
+ ShifterOperand(IP),
+ codegen);
+ }
+ } else {
+ DCHECK(shift == ASR || shift == LSR);
+
+ // We are not doing this for HInstruction::kAdd because the output will require
+ // Location::kOutputOverlap; not applicable to other cases.
+ if (kind == HInstruction::kOr || kind == HInstruction::kXor) {
+ GenerateDataProcInstruction(kind,
+ out_lo,
+ first_lo,
+ ShifterOperand(second_lo, LSR, shift_value),
+ codegen);
+ GenerateDataProcInstruction(kind,
+ out_lo,
+ out_lo,
+ ShifterOperand(second_hi, LSL, 32 - shift_value),
+ codegen);
+ GenerateDataProcInstruction(kind,
+ out_hi,
+ first_hi,
+ ShifterOperand(second_hi, shift, shift_value),
+ codegen);
+ } else {
+ __ Lsr(IP, second_lo, shift_value);
+ __ orr(IP, IP, ShifterOperand(second_hi, LSL, 32 - shift_value));
+ GenerateDataProc(kind,
+ out,
+ first,
+ ShifterOperand(IP),
+ ShifterOperand(second_hi, shift, shift_value),
+ codegen);
+ }
+ }
+ }
+}
+
+#undef __
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<ArmAssembler*>(GetAssembler())-> // NOLINT
+
void CodeGeneratorARM::DumpCoreRegister(std::ostream& stream, int reg) const {
stream << Register(reg);
}
@@ -6709,6 +6897,63 @@ void InstructionCodeGeneratorARM::VisitBitwiseNegatedRight(HBitwiseNegatedRight*
}
}
+void LocationsBuilderARM::VisitDataProcWithShifterOp(
+ HDataProcWithShifterOp* instruction) {
+ DCHECK(instruction->GetType() == Primitive::kPrimInt ||
+ instruction->GetType() == Primitive::kPrimLong);
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+ const bool overlap = instruction->GetType() == Primitive::kPrimLong &&
+ HDataProcWithShifterOp::IsExtensionOp(instruction->GetOpKind());
+
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(),
+ overlap ? Location::kOutputOverlap : Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorARM::VisitDataProcWithShifterOp(
+ HDataProcWithShifterOp* instruction) {
+ const LocationSummary* const locations = instruction->GetLocations();
+ const HInstruction::InstructionKind kind = instruction->GetInstrKind();
+ const HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind();
+ const Location left = locations->InAt(0);
+ const Location right = locations->InAt(1);
+ const Location out = locations->Out();
+
+ if (instruction->GetType() == Primitive::kPrimInt) {
+ DCHECK(!HDataProcWithShifterOp::IsExtensionOp(op_kind));
+
+ const Register second = instruction->InputAt(1)->GetType() == Primitive::kPrimLong
+ ? right.AsRegisterPairLow<Register>()
+ : right.AsRegister<Register>();
+
+ GenerateDataProcInstruction(kind,
+ out.AsRegister<Register>(),
+ left.AsRegister<Register>(),
+ ShifterOperand(second,
+ ShiftFromOpKind(op_kind),
+ instruction->GetShiftAmount()),
+ codegen_);
+ } else {
+ DCHECK_EQ(instruction->GetType(), Primitive::kPrimLong);
+
+ if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) {
+ const Register second = right.AsRegister<Register>();
+
+ DCHECK_NE(out.AsRegisterPairLow<Register>(), second);
+ GenerateDataProc(kind,
+ out,
+ left,
+ ShifterOperand(second),
+ ShifterOperand(second, ASR, 31),
+ codegen_);
+ } else {
+ GenerateLongDataProc(instruction, codegen_);
+ }
+ }
+}
+
void InstructionCodeGeneratorARM::GenerateAndConst(Register out, Register first, uint32_t value) {
// Optimize special cases for individual halfs of `and-long` (`and` is simplified earlier).
if (value == 0xffffffffu) {
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index e6032d2381..edccbd4904 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -2277,8 +2277,8 @@ void InstructionCodeGeneratorARM64::VisitBitwiseNegatedRight(HBitwiseNegatedRigh
}
}
-void LocationsBuilderARM64::VisitArm64DataProcWithShifterOp(
- HArm64DataProcWithShifterOp* instruction) {
+void LocationsBuilderARM64::VisitDataProcWithShifterOp(
+ HDataProcWithShifterOp* instruction) {
DCHECK(instruction->GetType() == Primitive::kPrimInt ||
instruction->GetType() == Primitive::kPrimLong);
LocationSummary* locations =
@@ -2292,8 +2292,8 @@ void LocationsBuilderARM64::VisitArm64DataProcWithShifterOp(
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
}
-void InstructionCodeGeneratorARM64::VisitArm64DataProcWithShifterOp(
- HArm64DataProcWithShifterOp* instruction) {
+void InstructionCodeGeneratorARM64::VisitDataProcWithShifterOp(
+ HDataProcWithShifterOp* instruction) {
Primitive::Type type = instruction->GetType();
HInstruction::InstructionKind kind = instruction->GetInstrKind();
DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
@@ -2302,21 +2302,20 @@ void InstructionCodeGeneratorARM64::VisitArm64DataProcWithShifterOp(
if (kind != HInstruction::kNeg) {
left = InputRegisterAt(instruction, 0);
}
- // If this `HArm64DataProcWithShifterOp` was created by merging a type conversion as the
+ // If this `HDataProcWithShifterOp` was created by merging a type conversion as the
// shifter operand operation, the IR generating `right_reg` (input to the type
// conversion) can have a different type from the current instruction's type,
// so we manually indicate the type.
Register right_reg = RegisterFrom(instruction->GetLocations()->InAt(1), type);
- int64_t shift_amount = instruction->GetShiftAmount() &
- (type == Primitive::kPrimInt ? kMaxIntShiftDistance : kMaxLongShiftDistance);
-
Operand right_operand(0);
- HArm64DataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind();
- if (HArm64DataProcWithShifterOp::IsExtensionOp(op_kind)) {
+ HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind();
+ if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) {
right_operand = Operand(right_reg, helpers::ExtendFromOpKind(op_kind));
} else {
- right_operand = Operand(right_reg, helpers::ShiftFromOpKind(op_kind), shift_amount);
+ right_operand = Operand(right_reg,
+ helpers::ShiftFromOpKind(op_kind),
+ instruction->GetShiftAmount());
}
// Logical binary operations do not support extension operations in the
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 5c4ca5bc17..6bfbe4a9c9 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -1216,6 +1216,17 @@ inline vixl32::Condition ARMFPCondition(IfCondition cond, bool gt_bias) {
}
}
+inline ShiftType ShiftFromOpKind(HDataProcWithShifterOp::OpKind op_kind) {
+ switch (op_kind) {
+ case HDataProcWithShifterOp::kASR: return ShiftType::ASR;
+ case HDataProcWithShifterOp::kLSL: return ShiftType::LSL;
+ case HDataProcWithShifterOp::kLSR: return ShiftType::LSR;
+ default:
+ LOG(FATAL) << "Unexpected op kind " << op_kind;
+ UNREACHABLE();
+ }
+}
+
void CodeGeneratorARMVIXL::DumpCoreRegister(std::ostream& stream, int reg) const {
stream << vixl32::Register(reg);
}
@@ -1260,6 +1271,185 @@ size_t CodeGeneratorARMVIXL::RestoreFloatingPointRegister(size_t stack_index ATT
return 0;
}
+static void GenerateDataProcInstruction(HInstruction::InstructionKind kind,
+ vixl32::Register out,
+ vixl32::Register first,
+ const Operand& second,
+ CodeGeneratorARMVIXL* codegen) {
+ if (second.IsImmediate() && second.GetImmediate() == 0) {
+ const Operand in = kind == HInstruction::kAnd
+ ? Operand(0)
+ : Operand(first);
+
+ __ Mov(out, in);
+ } else {
+ switch (kind) {
+ case HInstruction::kAdd:
+ __ Add(out, first, second);
+ break;
+ case HInstruction::kAnd:
+ __ And(out, first, second);
+ break;
+ case HInstruction::kOr:
+ __ Orr(out, first, second);
+ break;
+ case HInstruction::kSub:
+ __ Sub(out, first, second);
+ break;
+ case HInstruction::kXor:
+ __ Eor(out, first, second);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected instruction kind: " << kind;
+ UNREACHABLE();
+ }
+ }
+}
+
+static void GenerateDataProc(HInstruction::InstructionKind kind,
+ const Location& out,
+ const Location& first,
+ const Operand& second_lo,
+ const Operand& second_hi,
+ CodeGeneratorARMVIXL* codegen) {
+ const vixl32::Register first_hi = HighRegisterFrom(first);
+ const vixl32::Register first_lo = LowRegisterFrom(first);
+ const vixl32::Register out_hi = HighRegisterFrom(out);
+ const vixl32::Register out_lo = LowRegisterFrom(out);
+
+ if (kind == HInstruction::kAdd) {
+ __ Adds(out_lo, first_lo, second_lo);
+ __ Adc(out_hi, first_hi, second_hi);
+ } else if (kind == HInstruction::kSub) {
+ __ Subs(out_lo, first_lo, second_lo);
+ __ Sbc(out_hi, first_hi, second_hi);
+ } else {
+ GenerateDataProcInstruction(kind, out_lo, first_lo, second_lo, codegen);
+ GenerateDataProcInstruction(kind, out_hi, first_hi, second_hi, codegen);
+ }
+}
+
+static Operand GetShifterOperand(vixl32::Register rm, ShiftType shift, uint32_t shift_imm) {
+ return shift_imm == 0 ? Operand(rm) : Operand(rm, shift, shift_imm);
+}
+
+static void GenerateLongDataProc(HDataProcWithShifterOp* instruction,
+ CodeGeneratorARMVIXL* codegen) {
+ DCHECK_EQ(instruction->GetType(), Primitive::kPrimLong);
+ DCHECK(HDataProcWithShifterOp::IsShiftOp(instruction->GetOpKind()));
+
+ const LocationSummary* const locations = instruction->GetLocations();
+ const uint32_t shift_value = instruction->GetShiftAmount();
+ const HInstruction::InstructionKind kind = instruction->GetInstrKind();
+ const Location first = locations->InAt(0);
+ const Location second = locations->InAt(1);
+ const Location out = locations->Out();
+ const vixl32::Register first_hi = HighRegisterFrom(first);
+ const vixl32::Register first_lo = LowRegisterFrom(first);
+ const vixl32::Register out_hi = HighRegisterFrom(out);
+ const vixl32::Register out_lo = LowRegisterFrom(out);
+ const vixl32::Register second_hi = HighRegisterFrom(second);
+ const vixl32::Register second_lo = LowRegisterFrom(second);
+ const ShiftType shift = ShiftFromOpKind(instruction->GetOpKind());
+
+ if (shift_value >= 32) {
+ if (shift == ShiftType::LSL) {
+ GenerateDataProcInstruction(kind,
+ out_hi,
+ first_hi,
+ Operand(second_lo, ShiftType::LSL, shift_value - 32),
+ codegen);
+ GenerateDataProcInstruction(kind, out_lo, first_lo, 0, codegen);
+ } else if (shift == ShiftType::ASR) {
+ GenerateDataProc(kind,
+ out,
+ first,
+ GetShifterOperand(second_hi, ShiftType::ASR, shift_value - 32),
+ Operand(second_hi, ShiftType::ASR, 31),
+ codegen);
+ } else {
+ DCHECK_EQ(shift, ShiftType::LSR);
+ GenerateDataProc(kind,
+ out,
+ first,
+ GetShifterOperand(second_hi, ShiftType::LSR, shift_value - 32),
+ 0,
+ codegen);
+ }
+ } else {
+ DCHECK_GT(shift_value, 1U);
+ DCHECK_LT(shift_value, 32U);
+
+ UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
+
+ if (shift == ShiftType::LSL) {
+ // We are not doing this for HInstruction::kAdd because the output will require
+ // Location::kOutputOverlap; not applicable to other cases.
+ if (kind == HInstruction::kOr || kind == HInstruction::kXor) {
+ GenerateDataProcInstruction(kind,
+ out_hi,
+ first_hi,
+ Operand(second_hi, ShiftType::LSL, shift_value),
+ codegen);
+ GenerateDataProcInstruction(kind,
+ out_hi,
+ out_hi,
+ Operand(second_lo, ShiftType::LSR, 32 - shift_value),
+ codegen);
+ GenerateDataProcInstruction(kind,
+ out_lo,
+ first_lo,
+ Operand(second_lo, ShiftType::LSL, shift_value),
+ codegen);
+ } else {
+ const vixl32::Register temp = temps.Acquire();
+
+ __ Lsl(temp, second_hi, shift_value);
+ __ Orr(temp, temp, Operand(second_lo, ShiftType::LSR, 32 - shift_value));
+ GenerateDataProc(kind,
+ out,
+ first,
+ Operand(second_lo, ShiftType::LSL, shift_value),
+ temp,
+ codegen);
+ }
+ } else {
+ DCHECK(shift == ShiftType::ASR || shift == ShiftType::LSR);
+
+ // We are not doing this for HInstruction::kAdd because the output will require
+ // Location::kOutputOverlap; not applicable to other cases.
+ if (kind == HInstruction::kOr || kind == HInstruction::kXor) {
+ GenerateDataProcInstruction(kind,
+ out_lo,
+ first_lo,
+ Operand(second_lo, ShiftType::LSR, shift_value),
+ codegen);
+ GenerateDataProcInstruction(kind,
+ out_lo,
+ out_lo,
+ Operand(second_hi, ShiftType::LSL, 32 - shift_value),
+ codegen);
+ GenerateDataProcInstruction(kind,
+ out_hi,
+ first_hi,
+ Operand(second_hi, shift, shift_value),
+ codegen);
+ } else {
+ const vixl32::Register temp = temps.Acquire();
+
+ __ Lsr(temp, second_lo, shift_value);
+ __ Orr(temp, temp, Operand(second_hi, ShiftType::LSL, 32 - shift_value));
+ GenerateDataProc(kind,
+ out,
+ first,
+ temp,
+ Operand(second_hi, shift, shift_value),
+ codegen);
+ }
+ }
+ }
+}
+
#undef __
CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph,
@@ -6781,6 +6971,60 @@ void InstructionCodeGeneratorARMVIXL::VisitBitwiseNegatedRight(HBitwiseNegatedRi
}
}
+void LocationsBuilderARMVIXL::VisitDataProcWithShifterOp(
+ HDataProcWithShifterOp* instruction) {
+ DCHECK(instruction->GetType() == Primitive::kPrimInt ||
+ instruction->GetType() == Primitive::kPrimLong);
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+ const bool overlap = instruction->GetType() == Primitive::kPrimLong &&
+ HDataProcWithShifterOp::IsExtensionOp(instruction->GetOpKind());
+
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(),
+ overlap ? Location::kOutputOverlap : Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitDataProcWithShifterOp(
+ HDataProcWithShifterOp* instruction) {
+ const LocationSummary* const locations = instruction->GetLocations();
+ const HInstruction::InstructionKind kind = instruction->GetInstrKind();
+ const HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind();
+
+ if (instruction->GetType() == Primitive::kPrimInt) {
+ DCHECK(!HDataProcWithShifterOp::IsExtensionOp(op_kind));
+
+ const vixl32::Register second = instruction->InputAt(1)->GetType() == Primitive::kPrimLong
+ ? LowRegisterFrom(locations->InAt(1))
+ : InputRegisterAt(instruction, 1);
+
+ GenerateDataProcInstruction(kind,
+ OutputRegister(instruction),
+ InputRegisterAt(instruction, 0),
+ Operand(second,
+ ShiftFromOpKind(op_kind),
+ instruction->GetShiftAmount()),
+ codegen_);
+ } else {
+ DCHECK_EQ(instruction->GetType(), Primitive::kPrimLong);
+
+ if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) {
+ const vixl32::Register second = InputRegisterAt(instruction, 1);
+
+ DCHECK(!LowRegisterFrom(locations->Out()).Is(second));
+ GenerateDataProc(kind,
+ locations->Out(),
+ locations->InAt(0),
+ second,
+ Operand(second, ShiftType::ASR, 31),
+ codegen_);
+ } else {
+ GenerateLongDataProc(instruction, codegen_);
+ }
+ }
+}
+
// TODO(VIXL): Remove optimizations in the helper when they are implemented in vixl.
void InstructionCodeGeneratorARMVIXL::GenerateAndConst(vixl32::Register out,
vixl32::Register first,
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 09612c8dbf..b779aed763 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -5262,7 +5262,7 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
// Branch cases into compressed and uncompressed for each index's type.
uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
NearLabel done, not_compressed;
- __ testl(Address(obj, count_offset), Immediate(1));
+ __ testb(Address(obj, count_offset), Immediate(1));
codegen_->MaybeRecordImplicitNullCheck(instruction);
static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
"Expecting 0=compressed, 1=uncompressed");
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 0879992e32..179bf6d3d1 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -4720,7 +4720,7 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
// Branch cases into compressed and uncompressed for each index's type.
uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
NearLabel done, not_compressed;
- __ testl(Address(obj, count_offset), Immediate(1));
+ __ testb(Address(obj, count_offset), Immediate(1));
codegen_->MaybeRecordImplicitNullCheck(instruction);
static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
"Expecting 0=compressed, 1=uncompressed");
diff --git a/compiler/optimizing/common_arm.h b/compiler/optimizing/common_arm.h
index ecb86875d6..e184745520 100644
--- a/compiler/optimizing/common_arm.h
+++ b/compiler/optimizing/common_arm.h
@@ -17,6 +17,7 @@
#ifndef ART_COMPILER_OPTIMIZING_COMMON_ARM_H_
#define ART_COMPILER_OPTIMIZING_COMMON_ARM_H_
+#include "instruction_simplifier_shared.h"
#include "debug/dwarf/register.h"
#include "locations.h"
#include "nodes.h"
@@ -29,6 +30,9 @@
#pragma GCC diagnostic pop
namespace art {
+
+using helpers::HasShifterOperand;
+
namespace arm {
namespace helpers {
@@ -218,6 +222,14 @@ inline Location LocationFrom(const vixl::aarch32::SRegister& low,
return Location::FpuRegisterPairLocation(low.GetCode(), high.GetCode());
}
+inline bool ShifterOperandSupportsExtension(HInstruction* instruction) {
+ DCHECK(HasShifterOperand(instruction, kArm));
+ // TODO: HAdd applied to the other integral types could make use of
+ // the SXTAB, SXTAH, UXTAB and UXTAH instructions.
+ return instruction->GetType() == Primitive::kPrimLong &&
+ (instruction->IsAdd() || instruction->IsSub());
+}
+
} // namespace helpers
} // namespace arm
} // namespace art
diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h
index 93ea090583..d3f431e327 100644
--- a/compiler/optimizing/common_arm64.h
+++ b/compiler/optimizing/common_arm64.h
@@ -18,6 +18,7 @@
#define ART_COMPILER_OPTIMIZING_COMMON_ARM64_H_
#include "code_generator.h"
+#include "instruction_simplifier_shared.h"
#include "locations.h"
#include "nodes.h"
#include "utils/arm64/assembler_arm64.h"
@@ -31,6 +32,10 @@
#pragma GCC diagnostic pop
namespace art {
+
+using helpers::CanFitInShifterOperand;
+using helpers::HasShifterOperand;
+
namespace arm64 {
namespace helpers {
@@ -290,11 +295,11 @@ inline bool ArtVixlRegCodeCoherentForRegSet(uint32_t art_core_registers,
return true;
}
-inline vixl::aarch64::Shift ShiftFromOpKind(HArm64DataProcWithShifterOp::OpKind op_kind) {
+inline vixl::aarch64::Shift ShiftFromOpKind(HDataProcWithShifterOp::OpKind op_kind) {
switch (op_kind) {
- case HArm64DataProcWithShifterOp::kASR: return vixl::aarch64::ASR;
- case HArm64DataProcWithShifterOp::kLSL: return vixl::aarch64::LSL;
- case HArm64DataProcWithShifterOp::kLSR: return vixl::aarch64::LSR;
+ case HDataProcWithShifterOp::kASR: return vixl::aarch64::ASR;
+ case HDataProcWithShifterOp::kLSL: return vixl::aarch64::LSL;
+ case HDataProcWithShifterOp::kLSR: return vixl::aarch64::LSR;
default:
LOG(FATAL) << "Unexpected op kind " << op_kind;
UNREACHABLE();
@@ -302,14 +307,14 @@ inline vixl::aarch64::Shift ShiftFromOpKind(HArm64DataProcWithShifterOp::OpKind
}
}
-inline vixl::aarch64::Extend ExtendFromOpKind(HArm64DataProcWithShifterOp::OpKind op_kind) {
+inline vixl::aarch64::Extend ExtendFromOpKind(HDataProcWithShifterOp::OpKind op_kind) {
switch (op_kind) {
- case HArm64DataProcWithShifterOp::kUXTB: return vixl::aarch64::UXTB;
- case HArm64DataProcWithShifterOp::kUXTH: return vixl::aarch64::UXTH;
- case HArm64DataProcWithShifterOp::kUXTW: return vixl::aarch64::UXTW;
- case HArm64DataProcWithShifterOp::kSXTB: return vixl::aarch64::SXTB;
- case HArm64DataProcWithShifterOp::kSXTH: return vixl::aarch64::SXTH;
- case HArm64DataProcWithShifterOp::kSXTW: return vixl::aarch64::SXTW;
+ case HDataProcWithShifterOp::kUXTB: return vixl::aarch64::UXTB;
+ case HDataProcWithShifterOp::kUXTH: return vixl::aarch64::UXTH;
+ case HDataProcWithShifterOp::kUXTW: return vixl::aarch64::UXTW;
+ case HDataProcWithShifterOp::kSXTB: return vixl::aarch64::SXTB;
+ case HDataProcWithShifterOp::kSXTH: return vixl::aarch64::SXTH;
+ case HDataProcWithShifterOp::kSXTW: return vixl::aarch64::SXTW;
default:
LOG(FATAL) << "Unexpected op kind " << op_kind;
UNREACHABLE();
@@ -317,31 +322,8 @@ inline vixl::aarch64::Extend ExtendFromOpKind(HArm64DataProcWithShifterOp::OpKin
}
}
-inline bool CanFitInShifterOperand(HInstruction* instruction) {
- if (instruction->IsTypeConversion()) {
- HTypeConversion* conversion = instruction->AsTypeConversion();
- Primitive::Type result_type = conversion->GetResultType();
- Primitive::Type input_type = conversion->GetInputType();
- // We don't expect to see the same type as input and result.
- return Primitive::IsIntegralType(result_type) && Primitive::IsIntegralType(input_type) &&
- (result_type != input_type);
- } else {
- return (instruction->IsShl() && instruction->AsShl()->InputAt(1)->IsIntConstant()) ||
- (instruction->IsShr() && instruction->AsShr()->InputAt(1)->IsIntConstant()) ||
- (instruction->IsUShr() && instruction->AsUShr()->InputAt(1)->IsIntConstant());
- }
-}
-
-inline bool HasShifterOperand(HInstruction* instr) {
- // `neg` instructions are an alias of `sub` using the zero register as the
- // first register input.
- bool res = instr->IsAdd() || instr->IsAnd() || instr->IsNeg() ||
- instr->IsOr() || instr->IsSub() || instr->IsXor();
- return res;
-}
-
inline bool ShifterOperandSupportsExtension(HInstruction* instruction) {
- DCHECK(HasShifterOperand(instruction));
+ DCHECK(HasShifterOperand(instruction, kArm64));
// Although the `neg` instruction is an alias of the `sub` instruction, `HNeg`
// does *not* support extension. This is because the `extended register` form
// of the `sub` instruction interprets the left register with code 31 as the
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index f6fba883bd..2bf5c53e17 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -511,12 +511,10 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
void VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) OVERRIDE {
StartAttributeStream("kind") << instruction->GetOpKind();
}
-#endif
-#ifdef ART_ENABLE_CODEGEN_arm64
- void VisitArm64DataProcWithShifterOp(HArm64DataProcWithShifterOp* instruction) OVERRIDE {
+ void VisitDataProcWithShifterOp(HDataProcWithShifterOp* instruction) OVERRIDE {
StartAttributeStream("kind") << instruction->GetInstrKind() << "+" << instruction->GetOpKind();
- if (HArm64DataProcWithShifterOp::IsShiftOp(instruction->GetOpKind())) {
+ if (HDataProcWithShifterOp::IsShiftOp(instruction->GetOpKind())) {
StartAttributeStream("shift") << instruction->GetShiftAmount();
}
}
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index e012a4287f..8c73f1d036 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -192,9 +192,9 @@ static uint32_t FindMethodIndexIn(ArtMethod* method,
}
static dex::TypeIndex FindClassIndexIn(mirror::Class* cls,
- const DexFile& dex_file,
- Handle<mirror::DexCache> dex_cache)
+ const DexCompilationUnit& compilation_unit)
REQUIRES_SHARED(Locks::mutator_lock_) {
+ const DexFile& dex_file = *compilation_unit.GetDexFile();
dex::TypeIndex index;
if (cls->GetDexCache() == nullptr) {
DCHECK(cls->IsArrayClass()) << cls->PrettyClass();
@@ -203,22 +203,19 @@ static dex::TypeIndex FindClassIndexIn(mirror::Class* cls,
DCHECK(cls->IsProxyClass()) << cls->PrettyClass();
// TODO: deal with proxy classes.
} else if (IsSameDexFile(cls->GetDexFile(), dex_file)) {
- DCHECK_EQ(cls->GetDexCache(), dex_cache.Get());
+ DCHECK_EQ(cls->GetDexCache(), compilation_unit.GetDexCache().Get());
index = cls->GetDexTypeIndex();
- // Update the dex cache to ensure the class is in. The generated code will
- // consider it is. We make it safe by updating the dex cache, as other
- // dex files might also load the class, and there is no guarantee the dex
- // cache of the dex file of the class will be updated.
- if (dex_cache->GetResolvedType(index) == nullptr) {
- dex_cache->SetResolvedType(index, cls);
- }
} else {
index = cls->FindTypeIndexInOtherDexFile(dex_file);
- // We cannot guarantee the entry in the dex cache will resolve to the same class,
+ // We cannot guarantee the entry will resolve to the same class,
// as there may be different class loaders. So only return the index if it's
- // the right class in the dex cache already.
- if (index.IsValid() && dex_cache->GetResolvedType(index) != cls) {
- index = dex::TypeIndex::Invalid();
+ // the right class already resolved with the class loader.
+ if (index.IsValid()) {
+ ObjPtr<mirror::Class> resolved = ClassLinker::LookupResolvedType(
+ index, compilation_unit.GetDexCache().Get(), compilation_unit.GetClassLoader().Get());
+ if (resolved != cls) {
+ index = dex::TypeIndex::Invalid();
+ }
}
}
@@ -445,9 +442,8 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction,
DCHECK(invoke_instruction->IsInvokeVirtual() || invoke_instruction->IsInvokeInterface())
<< invoke_instruction->DebugName();
- const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
dex::TypeIndex class_index = FindClassIndexIn(
- GetMonomorphicType(classes), caller_dex_file, caller_compilation_unit_.GetDexCache());
+ GetMonomorphicType(classes), caller_compilation_unit_);
if (!class_index.IsValid()) {
VLOG(compiler) << "Call to " << ArtMethod::PrettyMethod(resolved_method)
<< " from inline cache is not inlined because its class is not"
@@ -490,6 +486,7 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction,
// Run type propagation to get the guard typed, and eventually propagate the
// type of the receiver.
ReferenceTypePropagation rtp_fixup(graph_,
+ outer_compilation_unit_.GetClassLoader(),
outer_compilation_unit_.GetDexCache(),
handles_,
/* is_first_run */ false);
@@ -583,7 +580,6 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction,
ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
PointerSize pointer_size = class_linker->GetImagePointerSize();
- const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
bool all_targets_inlined = true;
bool one_target_inlined = false;
@@ -605,8 +601,7 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction,
HInstruction* cursor = invoke_instruction->GetPrevious();
HBasicBlock* bb_cursor = invoke_instruction->GetBlock();
- dex::TypeIndex class_index = FindClassIndexIn(
- handle.Get(), caller_dex_file, caller_compilation_unit_.GetDexCache());
+ dex::TypeIndex class_index = FindClassIndexIn(handle.Get(), caller_compilation_unit_);
HInstruction* return_replacement = nullptr;
if (!class_index.IsValid() ||
!TryBuildAndInline(invoke_instruction,
@@ -662,6 +657,7 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction,
// Run type propagation to get the guards typed.
ReferenceTypePropagation rtp_fixup(graph_,
+ outer_compilation_unit_.GetClassLoader(),
outer_compilation_unit_.GetDexCache(),
handles_,
/* is_first_run */ false);
@@ -855,6 +851,7 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget(
// Run type propagation to get the guard typed.
ReferenceTypePropagation rtp_fixup(graph_,
+ outer_compilation_unit_.GetClassLoader(),
outer_compilation_unit_.GetDexCache(),
handles_,
/* is_first_run */ false);
@@ -923,6 +920,7 @@ bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction,
// Actual return value has a more specific type than the method's declared
// return type. Run RTP again on the outer graph to propagate it.
ReferenceTypePropagation(graph_,
+ outer_compilation_unit_.GetClassLoader(),
outer_compilation_unit_.GetDexCache(),
handles_,
/* is_first_run */ false).Run();
@@ -1175,7 +1173,11 @@ HInstanceFieldGet* HInliner::CreateInstanceFieldGet(Handle<mirror::DexCache> dex
/* dex_pc */ 0);
if (iget->GetType() == Primitive::kPrimNot) {
// Use the same dex_cache that we used for field lookup as the hint_dex_cache.
- ReferenceTypePropagation rtp(graph_, dex_cache, handles_, /* is_first_run */ false);
+ ReferenceTypePropagation rtp(graph_,
+ outer_compilation_unit_.GetClassLoader(),
+ dex_cache,
+ handles_,
+ /* is_first_run */ false);
rtp.Visit(iget);
}
return iget;
@@ -1221,7 +1223,7 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
resolved_method->GetDeclaringClass()->GetClassLoader()));
DexCompilationUnit dex_compilation_unit(
- class_loader.ToJObject(),
+ class_loader,
class_linker,
callee_dex_file,
code_item,
@@ -1338,6 +1340,7 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
// are more specific than the declared ones, run RTP again on the inner graph.
if (run_rtp || ArgumentTypesMoreSpecific(invoke_instruction, resolved_method)) {
ReferenceTypePropagation(callee_graph,
+ outer_compilation_unit_.GetClassLoader(),
dex_compilation_unit.GetDexCache(),
handles_,
/* is_first_run */ false).Run();
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index 3374e42955..c60f6e5393 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -669,11 +669,10 @@ static InvokeType GetInvokeTypeFromOpCode(Instruction::Code opcode) {
ArtMethod* HInstructionBuilder::ResolveMethod(uint16_t method_idx, InvokeType invoke_type) {
ScopedObjectAccess soa(Thread::Current());
- StackHandleScope<3> hs(soa.Self());
+ StackHandleScope<2> hs(soa.Self());
ClassLinker* class_linker = dex_compilation_unit_->GetClassLinker();
- Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
- soa.Decode<mirror::ClassLoader>(dex_compilation_unit_->GetClassLoader())));
+ Handle<mirror::ClassLoader> class_loader = dex_compilation_unit_->GetClassLoader();
Handle<mirror::Class> compiling_class(hs.NewHandle(GetCompilingClass()));
// We fetch the referenced class eagerly (that is, the class pointed by in the MethodId
// at method_idx), as `CanAccessResolvedMethod` expects it be be in the dex cache.
@@ -1260,9 +1259,7 @@ bool HInstructionBuilder::BuildInstanceFieldAccess(const Instruction& instructio
static mirror::Class* GetClassFrom(CompilerDriver* driver,
const DexCompilationUnit& compilation_unit) {
ScopedObjectAccess soa(Thread::Current());
- StackHandleScope<1> hs(soa.Self());
- Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
- soa.Decode<mirror::ClassLoader>(compilation_unit.GetClassLoader())));
+ Handle<mirror::ClassLoader> class_loader = compilation_unit.GetClassLoader();
Handle<mirror::DexCache> dex_cache = compilation_unit.GetDexCache();
return driver->ResolveCompilingMethodsClass(soa, dex_cache, class_loader, &compilation_unit);
@@ -1278,10 +1275,9 @@ mirror::Class* HInstructionBuilder::GetCompilingClass() const {
bool HInstructionBuilder::IsOutermostCompilingClass(dex::TypeIndex type_index) const {
ScopedObjectAccess soa(Thread::Current());
- StackHandleScope<3> hs(soa.Self());
+ StackHandleScope<2> hs(soa.Self());
Handle<mirror::DexCache> dex_cache = dex_compilation_unit_->GetDexCache();
- Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
- soa.Decode<mirror::ClassLoader>(dex_compilation_unit_->GetClassLoader())));
+ Handle<mirror::ClassLoader> class_loader = dex_compilation_unit_->GetClassLoader();
Handle<mirror::Class> cls(hs.NewHandle(compiler_driver_->ResolveClass(
soa, dex_cache, class_loader, type_index, dex_compilation_unit_)));
Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass()));
@@ -1317,8 +1313,7 @@ ArtField* HInstructionBuilder::ResolveField(uint16_t field_idx, bool is_static,
StackHandleScope<2> hs(soa.Self());
ClassLinker* class_linker = dex_compilation_unit_->GetClassLinker();
- Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
- soa.Decode<mirror::ClassLoader>(dex_compilation_unit_->GetClassLoader())));
+ Handle<mirror::ClassLoader> class_loader = dex_compilation_unit_->GetClassLoader();
Handle<mirror::Class> compiling_class(hs.NewHandle(GetCompilingClass()));
ArtField* resolved_field = class_linker->ResolveField(*dex_compilation_unit_->GetDexFile(),
@@ -1635,10 +1630,8 @@ static TypeCheckKind ComputeTypeCheckKind(Handle<mirror::Class> cls)
HLoadClass* HInstructionBuilder::BuildLoadClass(dex::TypeIndex type_index, uint32_t dex_pc) {
ScopedObjectAccess soa(Thread::Current());
- StackHandleScope<2> hs(soa.Self());
const DexFile& dex_file = *dex_compilation_unit_->GetDexFile();
- Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
- soa.Decode<mirror::ClassLoader>(dex_compilation_unit_->GetClassLoader())));
+ Handle<mirror::ClassLoader> class_loader = dex_compilation_unit_->GetClassLoader();
Handle<mirror::Class> klass = handles_->NewHandle(compiler_driver_->ResolveClass(
soa, dex_compilation_unit_->GetDexCache(), class_loader, type_index, dex_compilation_unit_));
@@ -1722,17 +1715,9 @@ void HInstructionBuilder::BuildTypeCheck(const Instruction& instruction,
}
}
-bool HInstructionBuilder::NeedsAccessCheck(dex::TypeIndex type_index,
- Handle<mirror::DexCache> dex_cache,
- bool* finalizable) const {
- return !compiler_driver_->CanAccessInstantiableTypeWithoutChecks(
- dex_compilation_unit_->GetDexMethodIndex(), dex_cache, type_index, finalizable);
-}
-
bool HInstructionBuilder::NeedsAccessCheck(dex::TypeIndex type_index, bool* finalizable) const {
- ScopedObjectAccess soa(Thread::Current());
- Handle<mirror::DexCache> dex_cache = dex_compilation_unit_->GetDexCache();
- return NeedsAccessCheck(type_index, dex_cache, finalizable);
+ return !compiler_driver_->CanAccessInstantiableTypeWithoutChecks(
+ LookupReferrerClass(), LookupResolvedType(type_index, *dex_compilation_unit_), finalizable);
}
bool HInstructionBuilder::CanDecodeQuickenedInfo() const {
@@ -2772,4 +2757,18 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction,
return true;
} // NOLINT(readability/fn_size)
+ObjPtr<mirror::Class> HInstructionBuilder::LookupResolvedType(
+ dex::TypeIndex type_index,
+ const DexCompilationUnit& compilation_unit) const {
+ return ClassLinker::LookupResolvedType(
+ type_index, compilation_unit.GetDexCache().Get(), compilation_unit.GetClassLoader().Get());
+}
+
+ObjPtr<mirror::Class> HInstructionBuilder::LookupReferrerClass() const {
+ // TODO: Cache the result in a Handle<mirror::Class>.
+ const DexFile::MethodId& method_id =
+ dex_compilation_unit_->GetDexFile()->GetMethodId(dex_compilation_unit_->GetDexMethodIndex());
+ return LookupResolvedType(method_id.class_idx_, *dex_compilation_unit_);
+}
+
} // namespace art
diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h
index 3bb680ce44..e735a0c46d 100644
--- a/compiler/optimizing/instruction_builder.h
+++ b/compiler/optimizing/instruction_builder.h
@@ -106,11 +106,8 @@ class HInstructionBuilder : public ValueObject {
// Returns whether the current method needs access check for the type.
// Output parameter finalizable is set to whether the type is finalizable.
- bool NeedsAccessCheck(dex::TypeIndex type_index,
- Handle<mirror::DexCache> dex_cache,
- /*out*/bool* finalizable) const
+ bool NeedsAccessCheck(dex::TypeIndex type_index, /*out*/bool* finalizable) const
REQUIRES_SHARED(Locks::mutator_lock_);
- bool NeedsAccessCheck(dex::TypeIndex type_index, /*out*/bool* finalizable) const;
template<typename T>
void Unop_12x(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
@@ -300,6 +297,12 @@ class HInstructionBuilder : public ValueObject {
// be found.
ArtField* ResolveField(uint16_t field_idx, bool is_static, bool is_put);
+ ObjPtr<mirror::Class> LookupResolvedType(dex::TypeIndex type_index,
+ const DexCompilationUnit& compilation_unit) const
+ REQUIRES_SHARED(Locks::mutator_lock_);
+
+ ObjPtr<mirror::Class> LookupReferrerClass() const REQUIRES_SHARED(Locks::mutator_lock_);
+
ArenaAllocator* const arena_;
HGraph* const graph_;
VariableSizedHandleScope* handles_;
diff --git a/compiler/optimizing/instruction_simplifier_arm.cc b/compiler/optimizing/instruction_simplifier_arm.cc
index 56e4c7a9c2..5f5e29b024 100644
--- a/compiler/optimizing/instruction_simplifier_arm.cc
+++ b/compiler/optimizing/instruction_simplifier_arm.cc
@@ -15,23 +15,124 @@
*/
#include "code_generator.h"
+#include "common_arm.h"
#include "instruction_simplifier_arm.h"
#include "instruction_simplifier_shared.h"
#include "mirror/array-inl.h"
+#include "nodes.h"
namespace art {
+
+using helpers::CanFitInShifterOperand;
+using helpers::HasShifterOperand;
+
namespace arm {
-void InstructionSimplifierArmVisitor::VisitMul(HMul* instruction) {
- if (TryCombineMultiplyAccumulate(instruction, kArm)) {
+using helpers::ShifterOperandSupportsExtension;
+
+bool InstructionSimplifierArmVisitor::TryMergeIntoShifterOperand(HInstruction* use,
+ HInstruction* bitfield_op,
+ bool do_merge) {
+ DCHECK(HasShifterOperand(use, kArm));
+ DCHECK(use->IsBinaryOperation());
+ DCHECK(CanFitInShifterOperand(bitfield_op));
+ DCHECK(!bitfield_op->HasEnvironmentUses());
+
+ Primitive::Type type = use->GetType();
+ if (type != Primitive::kPrimInt && type != Primitive::kPrimLong) {
+ return false;
+ }
+
+ HInstruction* left = use->InputAt(0);
+ HInstruction* right = use->InputAt(1);
+ DCHECK(left == bitfield_op || right == bitfield_op);
+
+ if (left == right) {
+ // TODO: Handle special transformations in this situation?
+ // For example should we transform `(x << 1) + (x << 1)` into `(x << 2)`?
+ // Or should this be part of a separate transformation logic?
+ return false;
+ }
+
+ bool is_commutative = use->AsBinaryOperation()->IsCommutative();
+ HInstruction* other_input;
+ if (bitfield_op == right) {
+ other_input = left;
+ } else {
+ if (is_commutative) {
+ other_input = right;
+ } else {
+ return false;
+ }
+ }
+
+ HDataProcWithShifterOp::OpKind op_kind;
+ int shift_amount = 0;
+
+ HDataProcWithShifterOp::GetOpInfoFromInstruction(bitfield_op, &op_kind, &shift_amount);
+ shift_amount &= use->GetType() == Primitive::kPrimInt
+ ? kMaxIntShiftDistance
+ : kMaxLongShiftDistance;
+
+ if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) {
+ if (!ShifterOperandSupportsExtension(use)) {
+ return false;
+ }
+ // Shift by 1 is a special case that results in the same number and type of instructions
+ // as this simplification, but potentially shorter code.
+ } else if (type == Primitive::kPrimLong && shift_amount == 1) {
+ return false;
+ }
+
+ if (do_merge) {
+ HDataProcWithShifterOp* alu_with_op =
+ new (GetGraph()->GetArena()) HDataProcWithShifterOp(use,
+ other_input,
+ bitfield_op->InputAt(0),
+ op_kind,
+ shift_amount,
+ use->GetDexPc());
+ use->GetBlock()->ReplaceAndRemoveInstructionWith(use, alu_with_op);
+ if (bitfield_op->GetUses().empty()) {
+ bitfield_op->GetBlock()->RemoveInstruction(bitfield_op);
+ }
RecordSimplification();
}
+
+ return true;
}
-void InstructionSimplifierArmVisitor::VisitOr(HOr* instruction) {
- if (TryMergeNegatedInput(instruction)) {
- RecordSimplification();
+// Merge a bitfield move instruction into its uses if it can be merged in all of them.
+bool InstructionSimplifierArmVisitor::TryMergeIntoUsersShifterOperand(HInstruction* bitfield_op) {
+ DCHECK(CanFitInShifterOperand(bitfield_op));
+
+ if (bitfield_op->HasEnvironmentUses()) {
+ return false;
+ }
+
+ const HUseList<HInstruction*>& uses = bitfield_op->GetUses();
+
+ // Check whether we can merge the instruction in all its users' shifter operand.
+ for (const HUseListNode<HInstruction*>& use : uses) {
+ HInstruction* user = use.GetUser();
+ if (!HasShifterOperand(user, kArm)) {
+ return false;
+ }
+ if (!CanMergeIntoShifterOperand(user, bitfield_op)) {
+ return false;
+ }
}
+
+ // Merge the instruction into its uses.
+ for (auto it = uses.begin(), end = uses.end(); it != end; /* ++it below */) {
+ HInstruction* user = it->GetUser();
+ // Increment `it` now because `*it` will disappear thanks to MergeIntoShifterOperand().
+ ++it;
+ bool merged = MergeIntoShifterOperand(user, bitfield_op);
+ DCHECK(merged);
+ }
+
+ return true;
}
void InstructionSimplifierArmVisitor::VisitAnd(HAnd* instruction) {
@@ -89,5 +190,49 @@ void InstructionSimplifierArmVisitor::VisitArraySet(HArraySet* instruction) {
}
}
+void InstructionSimplifierArmVisitor::VisitMul(HMul* instruction) {
+ if (TryCombineMultiplyAccumulate(instruction, kArm)) {
+ RecordSimplification();
+ }
+}
+
+void InstructionSimplifierArmVisitor::VisitOr(HOr* instruction) {
+ if (TryMergeNegatedInput(instruction)) {
+ RecordSimplification();
+ }
+}
+
+void InstructionSimplifierArmVisitor::VisitShl(HShl* instruction) {
+ if (instruction->InputAt(1)->IsConstant()) {
+ TryMergeIntoUsersShifterOperand(instruction);
+ }
+}
+
+void InstructionSimplifierArmVisitor::VisitShr(HShr* instruction) {
+ if (instruction->InputAt(1)->IsConstant()) {
+ TryMergeIntoUsersShifterOperand(instruction);
+ }
+}
+
+void InstructionSimplifierArmVisitor::VisitTypeConversion(HTypeConversion* instruction) {
+ Primitive::Type result_type = instruction->GetResultType();
+ Primitive::Type input_type = instruction->GetInputType();
+
+ if (input_type == result_type) {
+ // We let the arch-independent code handle this.
+ return;
+ }
+
+ if (Primitive::IsIntegralType(result_type) && Primitive::IsIntegralType(input_type)) {
+ TryMergeIntoUsersShifterOperand(instruction);
+ }
+}
+
+void InstructionSimplifierArmVisitor::VisitUShr(HUShr* instruction) {
+ if (instruction->InputAt(1)->IsConstant()) {
+ TryMergeIntoUsersShifterOperand(instruction);
+ }
+}
+
} // namespace arm
} // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_arm.h b/compiler/optimizing/instruction_simplifier_arm.h
index 9b54511340..e2ed257777 100644
--- a/compiler/optimizing/instruction_simplifier_arm.h
+++ b/compiler/optimizing/instruction_simplifier_arm.h
@@ -35,11 +35,41 @@ class InstructionSimplifierArmVisitor : public HGraphVisitor {
}
}
- void VisitMul(HMul* instruction) OVERRIDE;
- void VisitOr(HOr* instruction) OVERRIDE;
+ bool TryMergeIntoUsersShifterOperand(HInstruction* instruction);
+ bool TryMergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op, bool do_merge);
+ bool CanMergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) {
+ return TryMergeIntoShifterOperand(use, bitfield_op, /* do_merge */ false);
+ }
+ bool MergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) {
+ DCHECK(CanMergeIntoShifterOperand(use, bitfield_op));
+ return TryMergeIntoShifterOperand(use, bitfield_op, /* do_merge */ true);
+ }
+
+ /**
+ * This simplifier uses a special-purpose BB visitor.
+ * (1) No need to visit Phi nodes.
+ * (2) Since statements can be removed in a "forward" fashion,
+ * the visitor should test if each statement is still there.
+ */
+ void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
+ // TODO: fragile iteration, provide more robust iterators?
+ for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+ HInstruction* instruction = it.Current();
+ if (instruction->IsInBlock()) {
+ instruction->Accept(this);
+ }
+ }
+ }
+
void VisitAnd(HAnd* instruction) OVERRIDE;
void VisitArrayGet(HArrayGet* instruction) OVERRIDE;
void VisitArraySet(HArraySet* instruction) OVERRIDE;
+ void VisitMul(HMul* instruction) OVERRIDE;
+ void VisitOr(HOr* instruction) OVERRIDE;
+ void VisitShl(HShl* instruction) OVERRIDE;
+ void VisitShr(HShr* instruction) OVERRIDE;
+ void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE;
+ void VisitUShr(HUShr* instruction) OVERRIDE;
OptimizingCompilerStats* stats_;
};
diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc
index 6d107d571f..73b7b2bd95 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.cc
+++ b/compiler/optimizing/instruction_simplifier_arm64.cc
@@ -22,16 +22,18 @@
#include "mirror/string.h"
namespace art {
-namespace arm64 {
using helpers::CanFitInShifterOperand;
using helpers::HasShifterOperand;
+
+namespace arm64 {
+
using helpers::ShifterOperandSupportsExtension;
bool InstructionSimplifierArm64Visitor::TryMergeIntoShifterOperand(HInstruction* use,
HInstruction* bitfield_op,
bool do_merge) {
- DCHECK(HasShifterOperand(use));
+ DCHECK(HasShifterOperand(use, kArm64));
DCHECK(use->IsBinaryOperation() || use->IsNeg());
DCHECK(CanFitInShifterOperand(bitfield_op));
DCHECK(!bitfield_op->HasEnvironmentUses());
@@ -72,23 +74,22 @@ bool InstructionSimplifierArm64Visitor::TryMergeIntoShifterOperand(HInstruction*
}
}
- HArm64DataProcWithShifterOp::OpKind op_kind;
+ HDataProcWithShifterOp::OpKind op_kind;
int shift_amount = 0;
- HArm64DataProcWithShifterOp::GetOpInfoFromInstruction(bitfield_op, &op_kind, &shift_amount);
+ HDataProcWithShifterOp::GetOpInfoFromInstruction(bitfield_op, &op_kind, &shift_amount);
- if (HArm64DataProcWithShifterOp::IsExtensionOp(op_kind) &&
- !ShifterOperandSupportsExtension(use)) {
+ if (HDataProcWithShifterOp::IsExtensionOp(op_kind) && !ShifterOperandSupportsExtension(use)) {
return false;
}
if (do_merge) {
- HArm64DataProcWithShifterOp* alu_with_op =
- new (GetGraph()->GetArena()) HArm64DataProcWithShifterOp(use,
- other_input,
- bitfield_op->InputAt(0),
- op_kind,
- shift_amount,
- use->GetDexPc());
+ HDataProcWithShifterOp* alu_with_op =
+ new (GetGraph()->GetArena()) HDataProcWithShifterOp(use,
+ other_input,
+ bitfield_op->InputAt(0),
+ op_kind,
+ shift_amount,
+ use->GetDexPc());
use->GetBlock()->ReplaceAndRemoveInstructionWith(use, alu_with_op);
if (bitfield_op->GetUses().empty()) {
bitfield_op->GetBlock()->RemoveInstruction(bitfield_op);
@@ -112,7 +113,7 @@ bool InstructionSimplifierArm64Visitor::TryMergeIntoUsersShifterOperand(HInstruc
// Check whether we can merge the instruction in all its users' shifter operand.
for (const HUseListNode<HInstruction*>& use : uses) {
HInstruction* user = use.GetUser();
- if (!HasShifterOperand(user)) {
+ if (!HasShifterOperand(user, kArm64)) {
return false;
}
if (!CanMergeIntoShifterOperand(user, bitfield_op)) {
diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h
index d4cb1f14b7..65654f50f4 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.h
+++ b/compiler/optimizing/instruction_simplifier_arm64.h
@@ -40,11 +40,11 @@ class InstructionSimplifierArm64Visitor : public HGraphVisitor {
HInstruction* bitfield_op,
bool do_merge);
bool CanMergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) {
- return TryMergeIntoShifterOperand(use, bitfield_op, false);
+ return TryMergeIntoShifterOperand(use, bitfield_op, /* do_merge */ false);
}
bool MergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) {
DCHECK(CanMergeIntoShifterOperand(use, bitfield_op));
- return TryMergeIntoShifterOperand(use, bitfield_op, true);
+ return TryMergeIntoShifterOperand(use, bitfield_op, /* do_merge */ true);
}
/**
diff --git a/compiler/optimizing/instruction_simplifier_shared.h b/compiler/optimizing/instruction_simplifier_shared.h
index 56804f5e90..83e3ffca57 100644
--- a/compiler/optimizing/instruction_simplifier_shared.h
+++ b/compiler/optimizing/instruction_simplifier_shared.h
@@ -21,6 +21,33 @@
namespace art {
+namespace helpers {
+
+inline bool CanFitInShifterOperand(HInstruction* instruction) {
+ if (instruction->IsTypeConversion()) {
+ HTypeConversion* conversion = instruction->AsTypeConversion();
+ Primitive::Type result_type = conversion->GetResultType();
+ Primitive::Type input_type = conversion->GetInputType();
+ // We don't expect to see the same type as input and result.
+ return Primitive::IsIntegralType(result_type) && Primitive::IsIntegralType(input_type) &&
+ (result_type != input_type);
+ } else {
+ return (instruction->IsShl() && instruction->AsShl()->InputAt(1)->IsIntConstant()) ||
+ (instruction->IsShr() && instruction->AsShr()->InputAt(1)->IsIntConstant()) ||
+ (instruction->IsUShr() && instruction->AsUShr()->InputAt(1)->IsIntConstant());
+ }
+}
+
+inline bool HasShifterOperand(HInstruction* instr, InstructionSet isa) {
+ // On ARM64 `neg` instructions are an alias of `sub` using the zero register
+ // as the first register input.
+ bool res = instr->IsAdd() || instr->IsAnd() || (isa == kArm64 && instr->IsNeg()) ||
+ instr->IsOr() || instr->IsSub() || instr->IsXor();
+ return res;
+}
+
+} // namespace helpers
+
bool TryCombineMultiplyAccumulate(HMul* mul, InstructionSet isa);
// For bitwise operations (And/Or/Xor) with a negated input, try to use
// a negated bitwise instruction.
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 95838380cc..26c9ab83c2 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -71,7 +71,7 @@ HLoopOptimization::HLoopOptimization(HGraph* graph,
void HLoopOptimization::Run() {
// Well-behaved loops only.
// TODO: make this less of a sledgehammer.
- if (graph_->HasTryCatch() || graph_->HasIrreducibleLoops()) {
+ if (!graph_->HasLoops() || graph_->HasTryCatch() || graph_->HasIrreducibleLoops()) {
return;
}
@@ -84,6 +84,10 @@ void HLoopOptimization::Run() {
// Perform loop optimizations.
LocalRun();
+ if (top_loop_ == nullptr) {
+ graph_->SetHasLoops(false);
+ }
+
// Detach.
loop_allocator_ = nullptr;
last_loop_ = top_loop_ = nullptr;
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 71a26ebe79..62c89100eb 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -688,6 +688,7 @@ void HLoopInformation::Populate() {
contains_irreducible_loop_ = true;
graph->SetHasIrreducibleLoops(true);
}
+ graph->SetHasLoops(true);
}
HBasicBlock* HLoopInformation::GetPreHeader() const {
@@ -2032,9 +2033,19 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
}
}
outer_graph->UpdateMaximumNumberOfOutVRegs(GetMaximumNumberOfOutVRegs());
+
if (HasBoundsChecks()) {
outer_graph->SetHasBoundsChecks(true);
}
+ if (HasLoops()) {
+ outer_graph->SetHasLoops(true);
+ }
+ if (HasIrreducibleLoops()) {
+ outer_graph->SetHasIrreducibleLoops(true);
+ }
+ if (HasTryCatch()) {
+ outer_graph->SetHasTryCatch(true);
+ }
HInstruction* return_value = nullptr;
if (GetBlocks().size() == 3) {
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 96f9abafbf..8a9e61875a 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -323,6 +323,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
temporaries_vreg_slots_(0),
has_bounds_checks_(false),
has_try_catch_(false),
+ has_loops_(false),
has_irreducible_loops_(false),
debuggable_(debuggable),
current_instruction_id_(start_instruction_id),
@@ -559,6 +560,9 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
bool HasTryCatch() const { return has_try_catch_; }
void SetHasTryCatch(bool value) { has_try_catch_ = value; }
+ bool HasLoops() const { return has_loops_; }
+ void SetHasLoops(bool value) { has_loops_ = value; }
+
bool HasIrreducibleLoops() const { return has_irreducible_loops_; }
void SetHasIrreducibleLoops(bool value) { has_irreducible_loops_ = value; }
@@ -637,14 +641,26 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
// Number of vreg size slots that the temporaries use (used in baseline compiler).
size_t temporaries_vreg_slots_;
- // Has bounds checks. We can totally skip BCE if it's false.
+ // Flag whether there are bounds checks in the graph. We can skip
+ // BCE if it's false. It's only best effort to keep it up to date in
+ // the presence of code elimination so there might be false positives.
bool has_bounds_checks_;
- // Flag whether there are any try/catch blocks in the graph. We will skip
- // try/catch-related passes if false.
+ // Flag whether there are try/catch blocks in the graph. We will skip
+ // try/catch-related passes if it's false. It's only best effort to keep
+ // it up to date in the presence of code elimination so there might be
+ // false positives.
bool has_try_catch_;
- // Flag whether there are any irreducible loops in the graph.
+ // Flag whether there are any loops in the graph. We can skip loop
+ // optimization if it's false. It's only best effort to keep it up
+ // to date in the presence of code elimination so there might be false
+ // positives.
+ bool has_loops_;
+
+ // Flag whether there are any irreducible loops in the graph. It's only
+ // best effort to keep it up to date in the presence of code elimination
+ // so there might be false positives.
bool has_irreducible_loops_;
// Indicates whether the graph should be compiled in a way that
@@ -1346,6 +1362,7 @@ class HLoopInformationOutwardIterator : public ValueObject {
#else
#define FOR_EACH_CONCRETE_INSTRUCTION_SHARED(M) \
M(BitwiseNegatedRight, Instruction) \
+ M(DataProcWithShifterOp, Instruction) \
M(MultiplyAccumulate, Instruction) \
M(IntermediateAddress, Instruction)
#endif
@@ -1357,12 +1374,7 @@ class HLoopInformationOutwardIterator : public ValueObject {
M(ArmDexCacheArraysBase, Instruction)
#endif
-#ifndef ART_ENABLE_CODEGEN_arm64
#define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M)
-#else
-#define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M) \
- M(Arm64DataProcWithShifterOp, Instruction)
-#endif
#ifndef ART_ENABLE_CODEGEN_mips
#define FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M)
@@ -6603,9 +6615,6 @@ class HParallelMove FINAL : public HTemplateInstruction<0> {
#ifdef ART_ENABLE_CODEGEN_arm
#include "nodes_arm.h"
#endif
-#ifdef ART_ENABLE_CODEGEN_arm64
-#include "nodes_arm64.h"
-#endif
#ifdef ART_ENABLE_CODEGEN_mips
#include "nodes_mips.h"
#endif
diff --git a/compiler/optimizing/nodes_arm64.h b/compiler/optimizing/nodes_arm64.h
deleted file mode 100644
index 3f88717c2a..0000000000
--- a/compiler/optimizing/nodes_arm64.h
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_OPTIMIZING_NODES_ARM64_H_
-#define ART_COMPILER_OPTIMIZING_NODES_ARM64_H_
-
-#include "nodes.h"
-
-namespace art {
-
-class HArm64DataProcWithShifterOp FINAL : public HExpression<2> {
- public:
- enum OpKind {
- kLSL, // Logical shift left.
- kLSR, // Logical shift right.
- kASR, // Arithmetic shift right.
- kUXTB, // Unsigned extend byte.
- kUXTH, // Unsigned extend half-word.
- kUXTW, // Unsigned extend word.
- kSXTB, // Signed extend byte.
- kSXTH, // Signed extend half-word.
- kSXTW, // Signed extend word.
-
- // Aliases.
- kFirstShiftOp = kLSL,
- kLastShiftOp = kASR,
- kFirstExtensionOp = kUXTB,
- kLastExtensionOp = kSXTW
- };
- HArm64DataProcWithShifterOp(HInstruction* instr,
- HInstruction* left,
- HInstruction* right,
- OpKind op,
- // The shift argument is unused if the operation
- // is an extension.
- int shift = 0,
- uint32_t dex_pc = kNoDexPc)
- : HExpression(instr->GetType(), SideEffects::None(), dex_pc),
- instr_kind_(instr->GetKind()), op_kind_(op), shift_amount_(shift) {
- DCHECK(!instr->HasSideEffects());
- SetRawInputAt(0, left);
- SetRawInputAt(1, right);
- }
-
- bool CanBeMoved() const OVERRIDE { return true; }
- bool InstructionDataEquals(const HInstruction* other_instr) const OVERRIDE {
- const HArm64DataProcWithShifterOp* other = other_instr->AsArm64DataProcWithShifterOp();
- return instr_kind_ == other->instr_kind_ &&
- op_kind_ == other->op_kind_ &&
- shift_amount_ == other->shift_amount_;
- }
-
- static bool IsShiftOp(OpKind op_kind) {
- return kFirstShiftOp <= op_kind && op_kind <= kLastShiftOp;
- }
-
- static bool IsExtensionOp(OpKind op_kind) {
- return kFirstExtensionOp <= op_kind && op_kind <= kLastExtensionOp;
- }
-
- // Find the operation kind and shift amount from a bitfield move instruction.
- static void GetOpInfoFromInstruction(HInstruction* bitfield_op,
- /*out*/OpKind* op_kind,
- /*out*/int* shift_amount);
-
- InstructionKind GetInstrKind() const { return instr_kind_; }
- OpKind GetOpKind() const { return op_kind_; }
- int GetShiftAmount() const { return shift_amount_; }
-
- DECLARE_INSTRUCTION(Arm64DataProcWithShifterOp);
-
- private:
- InstructionKind instr_kind_;
- OpKind op_kind_;
- int shift_amount_;
-
- friend std::ostream& operator<<(std::ostream& os, OpKind op);
-
- DISALLOW_COPY_AND_ASSIGN(HArm64DataProcWithShifterOp);
-};
-
-std::ostream& operator<<(std::ostream& os, const HArm64DataProcWithShifterOp::OpKind op);
-
-} // namespace art
-
-#endif // ART_COMPILER_OPTIMIZING_NODES_ARM64_H_
diff --git a/compiler/optimizing/nodes_arm64.cc b/compiler/optimizing/nodes_shared.cc
index ac2f093847..f145bf9130 100644
--- a/compiler/optimizing/nodes_arm64.cc
+++ b/compiler/optimizing/nodes_shared.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2015 The Android Open Source Project
+ * Copyright (C) 2017 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -15,15 +15,15 @@
*/
#include "common_arm64.h"
-#include "nodes.h"
+#include "nodes_shared.h"
namespace art {
-using arm64::helpers::CanFitInShifterOperand;
+using helpers::CanFitInShifterOperand;
-void HArm64DataProcWithShifterOp::GetOpInfoFromInstruction(HInstruction* instruction,
- /*out*/OpKind* op_kind,
- /*out*/int* shift_amount) {
+void HDataProcWithShifterOp::GetOpInfoFromInstruction(HInstruction* instruction,
+ /*out*/OpKind* op_kind,
+ /*out*/int* shift_amount) {
DCHECK(CanFitInShifterOperand(instruction));
if (instruction->IsShl()) {
*op_kind = kLSL;
@@ -41,12 +41,11 @@ void HArm64DataProcWithShifterOp::GetOpInfoFromInstruction(HInstruction* instruc
int result_size = Primitive::ComponentSize(result_type);
int input_size = Primitive::ComponentSize(input_type);
int min_size = std::min(result_size, input_size);
- // This follows the logic in
- // `InstructionCodeGeneratorARM64::VisitTypeConversion()`.
if (result_type == Primitive::kPrimInt && input_type == Primitive::kPrimLong) {
- // There is actually nothing to do. The register will be used as a W
- // register, discarding the top bits. This is represented by the default
- // encoding 'LSL 0'.
+ // There is actually nothing to do. On ARM the high register from the
+ // pair will be ignored. On ARM64 the register will be used as a W
+ // register, discarding the top bits. This is represented by the
+ // default encoding 'LSL 0'.
*op_kind = kLSL;
*shift_amount = 0;
} else if (result_type == Primitive::kPrimChar ||
@@ -64,17 +63,17 @@ void HArm64DataProcWithShifterOp::GetOpInfoFromInstruction(HInstruction* instruc
}
}
-std::ostream& operator<<(std::ostream& os, const HArm64DataProcWithShifterOp::OpKind op) {
+std::ostream& operator<<(std::ostream& os, const HDataProcWithShifterOp::OpKind op) {
switch (op) {
- case HArm64DataProcWithShifterOp::kLSL: return os << "LSL";
- case HArm64DataProcWithShifterOp::kLSR: return os << "LSR";
- case HArm64DataProcWithShifterOp::kASR: return os << "ASR";
- case HArm64DataProcWithShifterOp::kUXTB: return os << "UXTB";
- case HArm64DataProcWithShifterOp::kUXTH: return os << "UXTH";
- case HArm64DataProcWithShifterOp::kUXTW: return os << "UXTW";
- case HArm64DataProcWithShifterOp::kSXTB: return os << "SXTB";
- case HArm64DataProcWithShifterOp::kSXTH: return os << "SXTH";
- case HArm64DataProcWithShifterOp::kSXTW: return os << "SXTW";
+ case HDataProcWithShifterOp::kLSL: return os << "LSL";
+ case HDataProcWithShifterOp::kLSR: return os << "LSR";
+ case HDataProcWithShifterOp::kASR: return os << "ASR";
+ case HDataProcWithShifterOp::kUXTB: return os << "UXTB";
+ case HDataProcWithShifterOp::kUXTH: return os << "UXTH";
+ case HDataProcWithShifterOp::kUXTW: return os << "UXTW";
+ case HDataProcWithShifterOp::kSXTB: return os << "SXTB";
+ case HDataProcWithShifterOp::kSXTH: return os << "SXTH";
+ case HDataProcWithShifterOp::kSXTW: return os << "SXTW";
default:
LOG(FATAL) << "Invalid OpKind " << static_cast<int>(op);
UNREACHABLE();
diff --git a/compiler/optimizing/nodes_shared.h b/compiler/optimizing/nodes_shared.h
index 814202e97b..c6bfbcc7fb 100644
--- a/compiler/optimizing/nodes_shared.h
+++ b/compiler/optimizing/nodes_shared.h
@@ -150,6 +150,81 @@ class HIntermediateAddress FINAL : public HExpression<2> {
DISALLOW_COPY_AND_ASSIGN(HIntermediateAddress);
};
+class HDataProcWithShifterOp FINAL : public HExpression<2> {
+ public:
+ enum OpKind {
+ kLSL, // Logical shift left.
+ kLSR, // Logical shift right.
+ kASR, // Arithmetic shift right.
+ kUXTB, // Unsigned extend byte.
+ kUXTH, // Unsigned extend half-word.
+ kUXTW, // Unsigned extend word.
+ kSXTB, // Signed extend byte.
+ kSXTH, // Signed extend half-word.
+ kSXTW, // Signed extend word.
+
+ // Aliases.
+ kFirstShiftOp = kLSL,
+ kLastShiftOp = kASR,
+ kFirstExtensionOp = kUXTB,
+ kLastExtensionOp = kSXTW
+ };
+ HDataProcWithShifterOp(HInstruction* instr,
+ HInstruction* left,
+ HInstruction* right,
+ OpKind op,
+ // The shift argument is unused if the operation
+ // is an extension.
+ int shift = 0,
+ uint32_t dex_pc = kNoDexPc)
+ : HExpression(instr->GetType(), SideEffects::None(), dex_pc),
+ instr_kind_(instr->GetKind()), op_kind_(op),
+ shift_amount_(shift & (instr->GetType() == Primitive::kPrimInt
+ ? kMaxIntShiftDistance
+ : kMaxLongShiftDistance)) {
+ DCHECK(!instr->HasSideEffects());
+ SetRawInputAt(0, left);
+ SetRawInputAt(1, right);
+ }
+
+ bool CanBeMoved() const OVERRIDE { return true; }
+ bool InstructionDataEquals(const HInstruction* other_instr) const OVERRIDE {
+ const HDataProcWithShifterOp* other = other_instr->AsDataProcWithShifterOp();
+ return instr_kind_ == other->instr_kind_ &&
+ op_kind_ == other->op_kind_ &&
+ shift_amount_ == other->shift_amount_;
+ }
+
+ static bool IsShiftOp(OpKind op_kind) {
+ return kFirstShiftOp <= op_kind && op_kind <= kLastShiftOp;
+ }
+
+ static bool IsExtensionOp(OpKind op_kind) {
+ return kFirstExtensionOp <= op_kind && op_kind <= kLastExtensionOp;
+ }
+
+ // Find the operation kind and shift amount from a bitfield move instruction.
+ static void GetOpInfoFromInstruction(HInstruction* bitfield_op,
+ /*out*/OpKind* op_kind,
+ /*out*/int* shift_amount);
+
+ InstructionKind GetInstrKind() const { return instr_kind_; }
+ OpKind GetOpKind() const { return op_kind_; }
+ int GetShiftAmount() const { return shift_amount_; }
+
+ DECLARE_INSTRUCTION(DataProcWithShifterOp);
+
+ private:
+ InstructionKind instr_kind_;
+ OpKind op_kind_;
+ int shift_amount_;
+
+ friend std::ostream& operator<<(std::ostream& os, OpKind op);
+
+ DISALLOW_COPY_AND_ASSIGN(HDataProcWithShifterOp);
+};
+
+std::ostream& operator<<(std::ostream& os, const HDataProcWithShifterOp::OpKind op);
} // namespace art
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 8638e346fb..f72bd6a5a3 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -306,7 +306,7 @@ class OptimizingCompiler FINAL : public Compiler {
InvokeType invoke_type,
uint16_t class_def_idx,
uint32_t method_idx,
- jobject class_loader,
+ Handle<mirror::ClassLoader> class_loader,
const DexFile& dex_file,
Handle<mirror::DexCache> dex_cache) const OVERRIDE;
@@ -375,7 +375,7 @@ class OptimizingCompiler FINAL : public Compiler {
InvokeType invoke_type,
uint16_t class_def_idx,
uint32_t method_idx,
- jobject class_loader,
+ Handle<mirror::ClassLoader> class_loader,
const DexFile& dex_file,
Handle<mirror::DexCache> dex_cache,
ArtMethod* method,
@@ -875,7 +875,7 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena,
InvokeType invoke_type,
uint16_t class_def_idx,
uint32_t method_idx,
- jobject class_loader,
+ Handle<mirror::ClassLoader> class_loader,
const DexFile& dex_file,
Handle<mirror::DexCache> dex_cache,
ArtMethod* method,
@@ -946,11 +946,8 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena,
const uint8_t* interpreter_metadata = nullptr;
if (method == nullptr) {
ScopedObjectAccess soa(Thread::Current());
- StackHandleScope<1> hs(soa.Self());
- Handle<mirror::ClassLoader> loader(hs.NewHandle(
- soa.Decode<mirror::ClassLoader>(class_loader)));
method = compiler_driver->ResolveMethod(
- soa, dex_cache, loader, &dex_compilation_unit, method_idx, invoke_type);
+ soa, dex_cache, class_loader, &dex_compilation_unit, method_idx, invoke_type);
}
// For AOT compilation, we may not get a method, for example if its class is erroneous.
// JIT should always have a method.
@@ -959,16 +956,6 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena,
graph->SetArtMethod(method);
ScopedObjectAccess soa(Thread::Current());
interpreter_metadata = method->GetQuickenedInfo(class_linker->GetImagePointerSize());
- dex::TypeIndex type_index = method->GetDeclaringClass()->GetDexTypeIndex();
-
- // Update the dex cache if the type is not in it yet. Note that under AOT,
- // the verifier must have set it, but under JIT, there's no guarantee, as we
- // don't necessarily run the verifier.
- // The compiler and the compiler driver assume the compiling class is
- // in the dex cache.
- if (dex_cache->GetResolvedType(type_index) == nullptr) {
- dex_cache->SetResolvedType(type_index, method->GetDeclaringClass());
- }
}
std::unique_ptr<CodeGenerator> codegen(
@@ -1049,7 +1036,7 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
InvokeType invoke_type,
uint16_t class_def_idx,
uint32_t method_idx,
- jobject jclass_loader,
+ Handle<mirror::ClassLoader> jclass_loader,
const DexFile& dex_file,
Handle<mirror::DexCache> dex_cache) const {
CompilerDriver* compiler_driver = GetCompilerDriver();
@@ -1163,7 +1150,6 @@ bool OptimizingCompiler::JitCompile(Thread* self,
Handle<mirror::DexCache> dex_cache(hs.NewHandle(method->GetDexCache()));
DCHECK(method->IsCompilable());
- jobject jclass_loader = class_loader.ToJObject();
const DexFile* dex_file = method->GetDexFile();
const uint16_t class_def_idx = method->GetClassDefIndex();
const DexFile::CodeItem* code_item = dex_file->GetCodeItem(method->GetCodeItemOffset());
@@ -1187,7 +1173,7 @@ bool OptimizingCompiler::JitCompile(Thread* self,
invoke_type,
class_def_idx,
method_idx,
- jclass_loader,
+ class_loader,
*dex_file,
dex_cache,
method,
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index c55fccc7d3..6e332ca59b 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -65,11 +65,13 @@ ReferenceTypeInfo::TypeHandle ReferenceTypePropagation::HandleCache::GetThrowabl
class ReferenceTypePropagation::RTPVisitor : public HGraphDelegateVisitor {
public:
RTPVisitor(HGraph* graph,
+ Handle<mirror::ClassLoader> class_loader,
Handle<mirror::DexCache> hint_dex_cache,
HandleCache* handle_cache,
ArenaVector<HInstruction*>* worklist,
bool is_first_run)
: HGraphDelegateVisitor(graph),
+ class_loader_(class_loader),
hint_dex_cache_(hint_dex_cache),
handle_cache_(handle_cache),
worklist_(worklist),
@@ -101,6 +103,7 @@ class ReferenceTypePropagation::RTPVisitor : public HGraphDelegateVisitor {
bool is_exact);
private:
+ Handle<mirror::ClassLoader> class_loader_;
Handle<mirror::DexCache> hint_dex_cache_;
HandleCache* handle_cache_;
ArenaVector<HInstruction*>* worklist_;
@@ -108,11 +111,13 @@ class ReferenceTypePropagation::RTPVisitor : public HGraphDelegateVisitor {
};
ReferenceTypePropagation::ReferenceTypePropagation(HGraph* graph,
+ Handle<mirror::ClassLoader> class_loader,
Handle<mirror::DexCache> hint_dex_cache,
VariableSizedHandleScope* handles,
bool is_first_run,
const char* name)
: HOptimization(graph, name),
+ class_loader_(class_loader),
hint_dex_cache_(hint_dex_cache),
handle_cache_(handles),
worklist_(graph->GetArena()->Adapter(kArenaAllocReferenceTypePropagation)),
@@ -147,7 +152,12 @@ void ReferenceTypePropagation::ValidateTypes() {
}
void ReferenceTypePropagation::Visit(HInstruction* instruction) {
- RTPVisitor visitor(graph_, hint_dex_cache_, &handle_cache_, &worklist_, is_first_run_);
+ RTPVisitor visitor(graph_,
+ class_loader_,
+ hint_dex_cache_,
+ &handle_cache_,
+ &worklist_,
+ is_first_run_);
instruction->Accept(&visitor);
}
@@ -321,7 +331,12 @@ void ReferenceTypePropagation::Run() {
}
void ReferenceTypePropagation::VisitBasicBlock(HBasicBlock* block) {
- RTPVisitor visitor(graph_, hint_dex_cache_, &handle_cache_, &worklist_, is_first_run_);
+ RTPVisitor visitor(graph_,
+ class_loader_,
+ hint_dex_cache_,
+ &handle_cache_,
+ &worklist_,
+ is_first_run_);
// Handle Phis first as there might be instructions in the same block who depend on them.
for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
VisitPhi(it.Current()->AsPhi());
@@ -542,8 +557,9 @@ void ReferenceTypePropagation::RTPVisitor::UpdateReferenceTypeInfo(HInstruction*
ScopedObjectAccess soa(Thread::Current());
ObjPtr<mirror::DexCache> dex_cache = FindDexCacheWithHint(soa.Self(), dex_file, hint_dex_cache_);
- // Get type from dex cache assuming it was populated by the verifier.
- SetClassAsTypeInfo(instr, dex_cache->GetResolvedType(type_idx), is_exact);
+ ObjPtr<mirror::Class> klass =
+ ClassLinker::LookupResolvedType(type_idx, dex_cache, class_loader_.Get());
+ SetClassAsTypeInfo(instr, klass, is_exact);
}
void ReferenceTypePropagation::RTPVisitor::VisitNewInstance(HNewInstance* instr) {
@@ -556,25 +572,13 @@ void ReferenceTypePropagation::RTPVisitor::VisitNewArray(HNewArray* instr) {
SetClassAsTypeInfo(instr, instr->GetLoadClass()->GetClass().Get(), /* is_exact */ true);
}
-static mirror::Class* GetClassFromDexCache(Thread* self,
- const DexFile& dex_file,
- dex::TypeIndex type_idx,
- Handle<mirror::DexCache> hint_dex_cache)
- REQUIRES_SHARED(Locks::mutator_lock_) {
- ObjPtr<mirror::DexCache> dex_cache = FindDexCacheWithHint(self, dex_file, hint_dex_cache);
- // Get type from dex cache assuming it was populated by the verifier.
- return dex_cache->GetResolvedType(type_idx);
-}
-
void ReferenceTypePropagation::RTPVisitor::VisitParameterValue(HParameterValue* instr) {
// We check if the existing type is valid: the inliner may have set it.
if (instr->GetType() == Primitive::kPrimNot && !instr->GetReferenceTypeInfo().IsValid()) {
- ScopedObjectAccess soa(Thread::Current());
- mirror::Class* resolved_class = GetClassFromDexCache(soa.Self(),
- instr->GetDexFile(),
- instr->GetTypeIndex(),
- hint_dex_cache_);
- SetClassAsTypeInfo(instr, resolved_class, /* is_exact */ false);
+ UpdateReferenceTypeInfo(instr,
+ instr->GetTypeIndex(),
+ instr->GetDexFile(),
+ /* is_exact */ false);
}
}
diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h
index 4663471729..215e96786b 100644
--- a/compiler/optimizing/reference_type_propagation.h
+++ b/compiler/optimizing/reference_type_propagation.h
@@ -33,6 +33,7 @@ namespace art {
class ReferenceTypePropagation : public HOptimization {
public:
ReferenceTypePropagation(HGraph* graph,
+ Handle<mirror::ClassLoader> class_loader,
Handle<mirror::DexCache> hint_dex_cache,
VariableSizedHandleScope* handles,
bool is_first_run,
@@ -105,6 +106,8 @@ class ReferenceTypePropagation : public HOptimization {
void ValidateTypes();
+ Handle<mirror::ClassLoader> class_loader_;
+
// Note: hint_dex_cache_ is usually, but not necessarily, the dex cache associated with
// graph_->GetDexFile(). Since we may look up also in other dex files, it's used only
// as a hint, to reduce the number of calls to the costly ClassLinker::FindDexCache().
diff --git a/compiler/optimizing/reference_type_propagation_test.cc b/compiler/optimizing/reference_type_propagation_test.cc
index b061c871b0..84a4bab1a9 100644
--- a/compiler/optimizing/reference_type_propagation_test.cc
+++ b/compiler/optimizing/reference_type_propagation_test.cc
@@ -38,6 +38,7 @@ class ReferenceTypePropagationTest : public CommonCompilerTest {
void SetupPropagation(VariableSizedHandleScope* handles) {
graph_->InitializeInexactObjectRTI(handles);
propagation_ = new (&allocator_) ReferenceTypePropagation(graph_,
+ Handle<mirror::ClassLoader>(),
Handle<mirror::DexCache>(),
handles,
true,
diff --git a/compiler/optimizing/scheduler_arm64.cc b/compiler/optimizing/scheduler_arm64.cc
index e3701fbcb1..558dcc4cbc 100644
--- a/compiler/optimizing/scheduler_arm64.cc
+++ b/compiler/optimizing/scheduler_arm64.cc
@@ -31,8 +31,8 @@ void SchedulingLatencyVisitorARM64::VisitBitwiseNegatedRight(
last_visited_latency_ = kArm64IntegerOpLatency;
}
-void SchedulingLatencyVisitorARM64::VisitArm64DataProcWithShifterOp(
- HArm64DataProcWithShifterOp* ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM64::VisitDataProcWithShifterOp(
+ HDataProcWithShifterOp* ATTRIBUTE_UNUSED) {
last_visited_latency_ = kArm64DataProcWithShifterOpLatency;
}
diff --git a/compiler/optimizing/scheduler_arm64.h b/compiler/optimizing/scheduler_arm64.h
index 702027c535..7a33720655 100644
--- a/compiler/optimizing/scheduler_arm64.h
+++ b/compiler/optimizing/scheduler_arm64.h
@@ -74,7 +74,8 @@ class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor {
#define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \
M(BitwiseNegatedRight, unused) \
M(MultiplyAccumulate, unused) \
- M(IntermediateAddress, unused)
+ M(IntermediateAddress, unused) \
+ M(DataProcWithShifterOp, unused)
#define DECLARE_VISIT_INSTRUCTION(type, unused) \
void Visit##type(H##type* instruction) OVERRIDE;
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index 487e4dd498..50ab11bc23 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -499,7 +499,11 @@ GraphAnalysisResult SsaBuilder::BuildSsa() {
// 4) Compute type of reference type instructions. The pass assumes that
// NullConstant has been fixed up.
- ReferenceTypePropagation(graph_, dex_cache_, handles_, /* is_first_run */ true).Run();
+ ReferenceTypePropagation(graph_,
+ class_loader_,
+ dex_cache_,
+ handles_,
+ /* is_first_run */ true).Run();
// 5) HInstructionBuilder duplicated ArrayGet instructions with ambiguous type
// (int/float or long/double) and marked ArraySets with ambiguous input type.
diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h
index 45dac54115..978f113ec4 100644
--- a/compiler/optimizing/ssa_builder.h
+++ b/compiler/optimizing/ssa_builder.h
@@ -48,9 +48,11 @@ namespace art {
class SsaBuilder : public ValueObject {
public:
SsaBuilder(HGraph* graph,
+ Handle<mirror::ClassLoader> class_loader,
Handle<mirror::DexCache> dex_cache,
VariableSizedHandleScope* handles)
: graph_(graph),
+ class_loader_(class_loader),
dex_cache_(dex_cache),
handles_(handles),
agets_fixed_(false),
@@ -115,6 +117,7 @@ class SsaBuilder : public ValueObject {
void RemoveRedundantUninitializedStrings();
HGraph* graph_;
+ Handle<mirror::ClassLoader> class_loader_;
Handle<mirror::DexCache> dex_cache_;
VariableSizedHandleScope* const handles_;
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 5a466e1d5d..6eab302dab 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -642,39 +642,6 @@ void X86Assembler::movhpd(const Address& dst, XmmRegister src) {
}
-void X86Assembler::psrldq(XmmRegister reg, const Immediate& shift_count) {
- DCHECK(shift_count.is_uint8());
-
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- EmitUint8(0x66);
- EmitUint8(0x0F);
- EmitUint8(0x73);
- EmitXmmRegisterOperand(3, reg);
- EmitUint8(shift_count.value());
-}
-
-
-void X86Assembler::psrlq(XmmRegister reg, const Immediate& shift_count) {
- DCHECK(shift_count.is_uint8());
-
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- EmitUint8(0x66);
- EmitUint8(0x0F);
- EmitUint8(0x73);
- EmitXmmRegisterOperand(2, reg);
- EmitUint8(shift_count.value());
-}
-
-
-void X86Assembler::punpckldq(XmmRegister dst, XmmRegister src) {
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- EmitUint8(0x66);
- EmitUint8(0x0F);
- EmitUint8(0x62);
- EmitXmmRegisterOperand(dst, src);
-}
-
-
void X86Assembler::addsd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF2);
@@ -828,6 +795,51 @@ void X86Assembler::movdqu(const Address& dst, XmmRegister src) {
}
+void X86Assembler::paddb(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0xFC);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::psubb(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0xF8);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::paddw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0xFD);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::psubw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0xF9);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::pmullw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0xD5);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
void X86Assembler::paddd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
@@ -856,6 +868,24 @@ void X86Assembler::pmulld(XmmRegister dst, XmmRegister src) {
}
+void X86Assembler::paddq(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0xD4);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::psubq(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0xFB);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
void X86Assembler::cvtsi2ss(XmmRegister dst, Register src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF3);
@@ -1186,6 +1216,141 @@ void X86Assembler::pshufd(XmmRegister dst, XmmRegister src, const Immediate& imm
}
+void X86Assembler::punpcklbw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x60);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::punpcklwd(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x61);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::punpckldq(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x62);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::punpcklqdq(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x6C);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::psllw(XmmRegister reg, const Immediate& shift_count) {
+ DCHECK(shift_count.is_uint8());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x71);
+ EmitXmmRegisterOperand(6, reg);
+ EmitUint8(shift_count.value());
+}
+
+
+void X86Assembler::pslld(XmmRegister reg, const Immediate& shift_count) {
+ DCHECK(shift_count.is_uint8());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x72);
+ EmitXmmRegisterOperand(6, reg);
+ EmitUint8(shift_count.value());
+}
+
+
+void X86Assembler::psllq(XmmRegister reg, const Immediate& shift_count) {
+ DCHECK(shift_count.is_uint8());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x73);
+ EmitXmmRegisterOperand(6, reg);
+ EmitUint8(shift_count.value());
+}
+
+
+void X86Assembler::psraw(XmmRegister reg, const Immediate& shift_count) {
+ DCHECK(shift_count.is_uint8());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x71);
+ EmitXmmRegisterOperand(4, reg);
+ EmitUint8(shift_count.value());
+}
+
+
+void X86Assembler::psrad(XmmRegister reg, const Immediate& shift_count) {
+ DCHECK(shift_count.is_uint8());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x72);
+ EmitXmmRegisterOperand(4, reg);
+ EmitUint8(shift_count.value());
+}
+
+
+void X86Assembler::psrlw(XmmRegister reg, const Immediate& shift_count) {
+ DCHECK(shift_count.is_uint8());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x71);
+ EmitXmmRegisterOperand(2, reg);
+ EmitUint8(shift_count.value());
+}
+
+
+void X86Assembler::psrld(XmmRegister reg, const Immediate& shift_count) {
+ DCHECK(shift_count.is_uint8());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x72);
+ EmitXmmRegisterOperand(2, reg);
+ EmitUint8(shift_count.value());
+}
+
+
+void X86Assembler::psrlq(XmmRegister reg, const Immediate& shift_count) {
+ DCHECK(shift_count.is_uint8());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x73);
+ EmitXmmRegisterOperand(2, reg);
+ EmitUint8(shift_count.value());
+}
+
+
+void X86Assembler::psrldq(XmmRegister reg, const Immediate& shift_count) {
+ DCHECK(shift_count.is_uint8());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x73);
+ EmitXmmRegisterOperand(3, reg);
+ EmitUint8(shift_count.value());
+}
+
+
void X86Assembler::fldl(const Address& src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xDD);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 4343e2e734..2999599fc5 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -408,14 +408,9 @@ class X86Assembler FINAL : public Assembler {
void movsd(const Address& dst, XmmRegister src);
void movsd(XmmRegister dst, XmmRegister src);
- void psrlq(XmmRegister reg, const Immediate& shift_count);
- void punpckldq(XmmRegister dst, XmmRegister src);
-
void movhpd(XmmRegister dst, const Address& src);
void movhpd(const Address& dst, XmmRegister src);
- void psrldq(XmmRegister reg, const Immediate& shift_count);
-
void addsd(XmmRegister dst, XmmRegister src);
void addsd(XmmRegister dst, const Address& src);
void subsd(XmmRegister dst, XmmRegister src);
@@ -436,10 +431,20 @@ class X86Assembler FINAL : public Assembler {
void movdqa(const Address& dst, XmmRegister src); // store aligned
void movdqu(const Address& dst, XmmRegister src); // store unaligned
- void paddd(XmmRegister dst, XmmRegister src); // no addr variant (for now)
+ void paddb(XmmRegister dst, XmmRegister src); // no addr variant (for now)
+ void psubb(XmmRegister dst, XmmRegister src);
+
+ void paddw(XmmRegister dst, XmmRegister src);
+ void psubw(XmmRegister dst, XmmRegister src);
+ void pmullw(XmmRegister dst, XmmRegister src);
+
+ void paddd(XmmRegister dst, XmmRegister src);
void psubd(XmmRegister dst, XmmRegister src);
void pmulld(XmmRegister dst, XmmRegister src);
+ void paddq(XmmRegister dst, XmmRegister src);
+ void psubq(XmmRegister dst, XmmRegister src);
+
void cvtsi2ss(XmmRegister dst, Register src);
void cvtsi2sd(XmmRegister dst, Register src);
@@ -489,6 +494,24 @@ class X86Assembler FINAL : public Assembler {
void shufps(XmmRegister dst, XmmRegister src, const Immediate& imm);
void pshufd(XmmRegister dst, XmmRegister src, const Immediate& imm);
+ void punpcklbw(XmmRegister dst, XmmRegister src);
+ void punpcklwd(XmmRegister dst, XmmRegister src);
+ void punpckldq(XmmRegister dst, XmmRegister src);
+ void punpcklqdq(XmmRegister dst, XmmRegister src);
+
+ void psllw(XmmRegister reg, const Immediate& shift_count);
+ void pslld(XmmRegister reg, const Immediate& shift_count);
+ void psllq(XmmRegister reg, const Immediate& shift_count);
+
+ void psraw(XmmRegister reg, const Immediate& shift_count);
+ void psrad(XmmRegister reg, const Immediate& shift_count);
+ // no psraq
+
+ void psrlw(XmmRegister reg, const Immediate& shift_count);
+ void psrld(XmmRegister reg, const Immediate& shift_count);
+ void psrlq(XmmRegister reg, const Immediate& shift_count);
+ void psrldq(XmmRegister reg, const Immediate& shift_count);
+
void flds(const Address& src);
void fstps(const Address& dst);
void fsts(const Address& dst);
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index c6ab893aea..a74bea207e 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -122,18 +122,6 @@ TEST_F(AssemblerX86Test, Movntl) {
DriverStr(expected, "movntl");
}
-TEST_F(AssemblerX86Test, psrlq) {
- GetAssembler()->psrlq(x86::XMM0, CreateImmediate(32));
- const char* expected = "psrlq $0x20, %xmm0\n";
- DriverStr(expected, "psrlq");
-}
-
-TEST_F(AssemblerX86Test, punpckldq) {
- GetAssembler()->punpckldq(x86::XMM0, x86::XMM1);
- const char* expected = "punpckldq %xmm1, %xmm0\n";
- DriverStr(expected, "punpckldq");
-}
-
TEST_F(AssemblerX86Test, LoadLongConstant) {
GetAssembler()->LoadLongConstant(x86::XMM0, 51);
const char* expected =
@@ -521,6 +509,26 @@ TEST_F(AssemblerX86Test, DivPD) {
DriverStr(RepeatFF(&x86::X86Assembler::divpd, "divpd %{reg2}, %{reg1}"), "divpd");
}
+TEST_F(AssemblerX86Test, PAddB) {
+ DriverStr(RepeatFF(&x86::X86Assembler::paddb, "paddb %{reg2}, %{reg1}"), "paddb");
+}
+
+TEST_F(AssemblerX86Test, PSubB) {
+ DriverStr(RepeatFF(&x86::X86Assembler::psubb, "psubb %{reg2}, %{reg1}"), "psubb");
+}
+
+TEST_F(AssemblerX86Test, PAddW) {
+ DriverStr(RepeatFF(&x86::X86Assembler::paddw, "paddw %{reg2}, %{reg1}"), "paddw");
+}
+
+TEST_F(AssemblerX86Test, PSubW) {
+ DriverStr(RepeatFF(&x86::X86Assembler::psubw, "psubw %{reg2}, %{reg1}"), "psubw");
+}
+
+TEST_F(AssemblerX86Test, PMullW) {
+ DriverStr(RepeatFF(&x86::X86Assembler::pmullw, "pmullw %{reg2}, %{reg1}"), "pmullw");
+}
+
TEST_F(AssemblerX86Test, PAddD) {
DriverStr(RepeatFF(&x86::X86Assembler::paddd, "paddd %{reg2}, %{reg1}"), "paddd");
}
@@ -533,6 +541,14 @@ TEST_F(AssemblerX86Test, PMullD) {
DriverStr(RepeatFF(&x86::X86Assembler::pmulld, "pmulld %{reg2}, %{reg1}"), "pmulld");
}
+TEST_F(AssemblerX86Test, PAddQ) {
+ DriverStr(RepeatFF(&x86::X86Assembler::paddq, "paddq %{reg2}, %{reg1}"), "paddq");
+}
+
+TEST_F(AssemblerX86Test, PSubQ) {
+ DriverStr(RepeatFF(&x86::X86Assembler::psubq, "psubq %{reg2}, %{reg1}"), "psubq");
+}
+
TEST_F(AssemblerX86Test, XorPD) {
DriverStr(RepeatFF(&x86::X86Assembler::xorpd, "xorpd %{reg2}, %{reg1}"), "xorpd");
}
@@ -581,6 +597,67 @@ TEST_F(AssemblerX86Test, PShufD) {
DriverStr(RepeatFFI(&x86::X86Assembler::pshufd, 1, "pshufd ${imm}, %{reg2}, %{reg1}"), "pshufd");
}
+TEST_F(AssemblerX86Test, Punpcklbw) {
+ DriverStr(RepeatFF(&x86::X86Assembler::punpcklbw, "punpcklbw %{reg2}, %{reg1}"), "punpcklbw");
+}
+
+TEST_F(AssemblerX86Test, Punpcklwd) {
+ DriverStr(RepeatFF(&x86::X86Assembler::punpcklwd, "punpcklwd %{reg2}, %{reg1}"), "punpcklwd");
+}
+
+TEST_F(AssemblerX86Test, Punpckldq) {
+ DriverStr(RepeatFF(&x86::X86Assembler::punpckldq, "punpckldq %{reg2}, %{reg1}"), "punpckldq");
+}
+
+TEST_F(AssemblerX86Test, Punpcklqdq) {
+ DriverStr(RepeatFF(&x86::X86Assembler::punpcklqdq, "punpcklqdq %{reg2}, %{reg1}"), "punpcklqdq");
+}
+
+TEST_F(AssemblerX86Test, psllw) {
+ GetAssembler()->psllw(x86::XMM0, CreateImmediate(16));
+ DriverStr("psllw $0x10, %xmm0\n", "psllwi");
+}
+
+TEST_F(AssemblerX86Test, pslld) {
+ GetAssembler()->pslld(x86::XMM0, CreateImmediate(16));
+ DriverStr("pslld $0x10, %xmm0\n", "pslldi");
+}
+
+TEST_F(AssemblerX86Test, psllq) {
+ GetAssembler()->psllq(x86::XMM0, CreateImmediate(16));
+ DriverStr("psllq $0x10, %xmm0\n", "psllqi");
+}
+
+TEST_F(AssemblerX86Test, psraw) {
+ GetAssembler()->psraw(x86::XMM0, CreateImmediate(16));
+ DriverStr("psraw $0x10, %xmm0\n", "psrawi");
+}
+
+TEST_F(AssemblerX86Test, psrad) {
+ GetAssembler()->psrad(x86::XMM0, CreateImmediate(16));
+ DriverStr("psrad $0x10, %xmm0\n", "psradi");
+}
+
+TEST_F(AssemblerX86Test, psrlw) {
+ GetAssembler()->psrlw(x86::XMM0, CreateImmediate(16));
+ DriverStr("psrlw $0x10, %xmm0\n", "psrlwi");
+}
+
+TEST_F(AssemblerX86Test, psrld) {
+ GetAssembler()->psrld(x86::XMM0, CreateImmediate(16));
+ DriverStr("psrld $0x10, %xmm0\n", "psrldi");
+}
+
+TEST_F(AssemblerX86Test, psrlq) {
+ GetAssembler()->psrlq(x86::XMM0, CreateImmediate(16));
+ DriverStr("psrlq $0x10, %xmm0\n", "psrlqi");
+}
+
+TEST_F(AssemblerX86Test, psrldq) {
+ GetAssembler()->psrldq(x86::XMM0, CreateImmediate(16));
+ DriverStr("psrldq $0x10, %xmm0\n", "psrldqi");
+}
+
/////////////////
// Near labels //
/////////////////
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index b41be80ae4..458204aca9 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -882,6 +882,56 @@ void X86_64Assembler::movdqu(const Address& dst, XmmRegister src) {
}
+void X86_64Assembler::paddb(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0xFC);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
+void X86_64Assembler::psubb(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0xF8);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
+void X86_64Assembler::paddw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0xFD);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
+void X86_64Assembler::psubw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0xF9);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
+void X86_64Assembler::pmullw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0xD5);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
void X86_64Assembler::paddd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
@@ -913,6 +963,26 @@ void X86_64Assembler::pmulld(XmmRegister dst, XmmRegister src) {
}
+void X86_64Assembler::paddq(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0xD4);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
+void X86_64Assembler::psubq(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0xFB);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
void X86_64Assembler::cvtsi2ss(XmmRegister dst, CpuRegister src) {
cvtsi2ss(dst, src, false);
}
@@ -1354,6 +1424,142 @@ void X86_64Assembler::pshufd(XmmRegister dst, XmmRegister src, const Immediate&
}
+void X86_64Assembler::punpcklbw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x60);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
+void X86_64Assembler::punpcklwd(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x61);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
+void X86_64Assembler::punpckldq(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x62);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
+void X86_64Assembler::punpcklqdq(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x6C);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
+void X86_64Assembler::psllw(XmmRegister reg, const Immediate& shift_count) {
+ DCHECK(shift_count.is_uint8());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex(false, false, false, false, reg.NeedsRex());
+ EmitUint8(0x0F);
+ EmitUint8(0x71);
+ EmitXmmRegisterOperand(6, reg);
+ EmitUint8(shift_count.value());
+}
+
+
+void X86_64Assembler::pslld(XmmRegister reg, const Immediate& shift_count) {
+ DCHECK(shift_count.is_uint8());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex(false, false, false, false, reg.NeedsRex());
+ EmitUint8(0x0F);
+ EmitUint8(0x72);
+ EmitXmmRegisterOperand(6, reg);
+ EmitUint8(shift_count.value());
+}
+
+
+void X86_64Assembler::psllq(XmmRegister reg, const Immediate& shift_count) {
+ DCHECK(shift_count.is_uint8());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex(false, false, false, false, reg.NeedsRex());
+ EmitUint8(0x0F);
+ EmitUint8(0x73);
+ EmitXmmRegisterOperand(6, reg);
+ EmitUint8(shift_count.value());
+}
+
+
+void X86_64Assembler::psraw(XmmRegister reg, const Immediate& shift_count) {
+ DCHECK(shift_count.is_uint8());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex(false, false, false, false, reg.NeedsRex());
+ EmitUint8(0x0F);
+ EmitUint8(0x71);
+ EmitXmmRegisterOperand(4, reg);
+ EmitUint8(shift_count.value());
+}
+
+
+void X86_64Assembler::psrad(XmmRegister reg, const Immediate& shift_count) {
+ DCHECK(shift_count.is_uint8());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex(false, false, false, false, reg.NeedsRex());
+ EmitUint8(0x0F);
+ EmitUint8(0x72);
+ EmitXmmRegisterOperand(4, reg);
+ EmitUint8(shift_count.value());
+}
+
+
+void X86_64Assembler::psrlw(XmmRegister reg, const Immediate& shift_count) {
+ DCHECK(shift_count.is_uint8());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex(false, false, false, false, reg.NeedsRex());
+ EmitUint8(0x0F);
+ EmitUint8(0x71);
+ EmitXmmRegisterOperand(2, reg);
+ EmitUint8(shift_count.value());
+}
+
+
+void X86_64Assembler::psrld(XmmRegister reg, const Immediate& shift_count) {
+ DCHECK(shift_count.is_uint8());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex(false, false, false, false, reg.NeedsRex());
+ EmitUint8(0x0F);
+ EmitUint8(0x72);
+ EmitXmmRegisterOperand(2, reg);
+ EmitUint8(shift_count.value());
+}
+
+
+void X86_64Assembler::psrlq(XmmRegister reg, const Immediate& shift_count) {
+ DCHECK(shift_count.is_uint8());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex(false, false, false, false, reg.NeedsRex());
+ EmitUint8(0x0F);
+ EmitUint8(0x73);
+ EmitXmmRegisterOperand(2, reg);
+ EmitUint8(shift_count.value());
+}
+
+
void X86_64Assembler::fldl(const Address& src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xDD);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 43ea12a4cb..0dc11d840b 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -452,10 +452,20 @@ class X86_64Assembler FINAL : public Assembler {
void movdqa(const Address& dst, XmmRegister src); // store aligned
void movdqu(const Address& dst, XmmRegister src); // store unaligned
- void paddd(XmmRegister dst, XmmRegister src); // no addr variant (for now)
+ void paddb(XmmRegister dst, XmmRegister src); // no addr variant (for now)
+ void psubb(XmmRegister dst, XmmRegister src);
+
+ void paddw(XmmRegister dst, XmmRegister src);
+ void psubw(XmmRegister dst, XmmRegister src);
+ void pmullw(XmmRegister dst, XmmRegister src);
+
+ void paddd(XmmRegister dst, XmmRegister src);
void psubd(XmmRegister dst, XmmRegister src);
void pmulld(XmmRegister dst, XmmRegister src);
+ void paddq(XmmRegister dst, XmmRegister src);
+ void psubq(XmmRegister dst, XmmRegister src);
+
void cvtsi2ss(XmmRegister dst, CpuRegister src); // Note: this is the r/m32 version.
void cvtsi2ss(XmmRegister dst, CpuRegister src, bool is64bit);
void cvtsi2ss(XmmRegister dst, const Address& src, bool is64bit);
@@ -512,6 +522,23 @@ class X86_64Assembler FINAL : public Assembler {
void shufps(XmmRegister dst, XmmRegister src, const Immediate& imm);
void pshufd(XmmRegister dst, XmmRegister src, const Immediate& imm);
+ void punpcklbw(XmmRegister dst, XmmRegister src);
+ void punpcklwd(XmmRegister dst, XmmRegister src);
+ void punpckldq(XmmRegister dst, XmmRegister src);
+ void punpcklqdq(XmmRegister dst, XmmRegister src);
+
+ void psllw(XmmRegister reg, const Immediate& shift_count);
+ void pslld(XmmRegister reg, const Immediate& shift_count);
+ void psllq(XmmRegister reg, const Immediate& shift_count);
+
+ void psraw(XmmRegister reg, const Immediate& shift_count);
+ void psrad(XmmRegister reg, const Immediate& shift_count);
+ // no psraq
+
+ void psrlw(XmmRegister reg, const Immediate& shift_count);
+ void psrld(XmmRegister reg, const Immediate& shift_count);
+ void psrlq(XmmRegister reg, const Immediate& shift_count);
+
void flds(const Address& src);
void fstps(const Address& dst);
void fsts(const Address& dst);
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index aeb1911835..fe9449720f 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -1128,6 +1128,26 @@ TEST_F(AssemblerX86_64Test, Divpd) {
DriverStr(RepeatFF(&x86_64::X86_64Assembler::divpd, "divpd %{reg2}, %{reg1}"), "divpd");
}
+TEST_F(AssemblerX86_64Test, Paddb) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::paddb, "paddb %{reg2}, %{reg1}"), "paddb");
+}
+
+TEST_F(AssemblerX86_64Test, Psubb) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::psubb, "psubb %{reg2}, %{reg1}"), "psubb");
+}
+
+TEST_F(AssemblerX86_64Test, Paddw) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::paddw, "paddw %{reg2}, %{reg1}"), "paddw");
+}
+
+TEST_F(AssemblerX86_64Test, Psubw) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::psubw, "psubw %{reg2}, %{reg1}"), "psubw");
+}
+
+TEST_F(AssemblerX86_64Test, Pmullw) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::pmullw, "pmullw %{reg2}, %{reg1}"), "pmullw");
+}
+
TEST_F(AssemblerX86_64Test, Paddd) {
DriverStr(RepeatFF(&x86_64::X86_64Assembler::paddd, "paddd %{reg2}, %{reg1}"), "paddd");
}
@@ -1140,6 +1160,14 @@ TEST_F(AssemblerX86_64Test, Pmulld) {
DriverStr(RepeatFF(&x86_64::X86_64Assembler::pmulld, "pmulld %{reg2}, %{reg1}"), "pmulld");
}
+TEST_F(AssemblerX86_64Test, Paddq) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::paddq, "paddq %{reg2}, %{reg1}"), "paddq");
+}
+
+TEST_F(AssemblerX86_64Test, Psubq) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::psubq, "psubq %{reg2}, %{reg1}"), "psubq");
+}
+
TEST_F(AssemblerX86_64Test, Cvtsi2ss) {
DriverStr(RepeatFr(&x86_64::X86_64Assembler::cvtsi2ss, "cvtsi2ss %{reg2}, %{reg1}"), "cvtsi2ss");
}
@@ -1261,6 +1289,78 @@ TEST_F(AssemblerX86_64Test, PShufd) {
DriverStr(RepeatFFI(&x86_64::X86_64Assembler::pshufd, 1, "pshufd ${imm}, %{reg2}, %{reg1}"), "pshufd");
}
+TEST_F(AssemblerX86_64Test, Punpcklbw) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpcklbw, "punpcklbw %{reg2}, %{reg1}"), "punpcklbw");
+}
+
+TEST_F(AssemblerX86_64Test, Punpcklwd) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpcklwd, "punpcklwd %{reg2}, %{reg1}"), "punpcklwd");
+}
+
+TEST_F(AssemblerX86_64Test, Punpckldq) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpckldq, "punpckldq %{reg2}, %{reg1}"), "punpckldq");
+}
+
+TEST_F(AssemblerX86_64Test, Punpcklqdq) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpcklqdq, "punpcklqdq %{reg2}, %{reg1}"), "punpcklqdq");
+}
+
+TEST_F(AssemblerX86_64Test, Psllw) {
+ GetAssembler()->psllw(x86_64::XmmRegister(x86_64::XMM0), x86_64::Immediate(1));
+ GetAssembler()->psllw(x86_64::XmmRegister(x86_64::XMM15), x86_64::Immediate(2));
+ DriverStr("psllw $1, %xmm0\n"
+ "psllw $2, %xmm15\n", "psllwi");
+}
+
+TEST_F(AssemblerX86_64Test, Pslld) {
+ GetAssembler()->pslld(x86_64::XmmRegister(x86_64::XMM0), x86_64::Immediate(1));
+ GetAssembler()->pslld(x86_64::XmmRegister(x86_64::XMM15), x86_64::Immediate(2));
+ DriverStr("pslld $1, %xmm0\n"
+ "pslld $2, %xmm15\n", "pslldi");
+}
+
+TEST_F(AssemblerX86_64Test, Psllq) {
+ GetAssembler()->psllq(x86_64::XmmRegister(x86_64::XMM0), x86_64::Immediate(1));
+ GetAssembler()->psllq(x86_64::XmmRegister(x86_64::XMM15), x86_64::Immediate(2));
+ DriverStr("psllq $1, %xmm0\n"
+ "psllq $2, %xmm15\n", "psllqi");
+}
+
+TEST_F(AssemblerX86_64Test, Psraw) {
+ GetAssembler()->psraw(x86_64::XmmRegister(x86_64::XMM0), x86_64::Immediate(1));
+ GetAssembler()->psraw(x86_64::XmmRegister(x86_64::XMM15), x86_64::Immediate(2));
+ DriverStr("psraw $1, %xmm0\n"
+ "psraw $2, %xmm15\n", "psrawi");
+}
+
+TEST_F(AssemblerX86_64Test, Psrad) {
+ GetAssembler()->psrad(x86_64::XmmRegister(x86_64::XMM0), x86_64::Immediate(1));
+ GetAssembler()->psrad(x86_64::XmmRegister(x86_64::XMM15), x86_64::Immediate(2));
+ DriverStr("psrad $1, %xmm0\n"
+ "psrad $2, %xmm15\n", "psradi");
+}
+
+TEST_F(AssemblerX86_64Test, Psrlw) {
+ GetAssembler()->psrlw(x86_64::XmmRegister(x86_64::XMM0), x86_64::Immediate(1));
+ GetAssembler()->psrlw(x86_64::XmmRegister(x86_64::XMM15), x86_64::Immediate(2));
+ DriverStr("psrlw $1, %xmm0\n"
+ "psrlw $2, %xmm15\n", "psrlwi");
+}
+
+TEST_F(AssemblerX86_64Test, Psrld) {
+ GetAssembler()->psrld(x86_64::XmmRegister(x86_64::XMM0), x86_64::Immediate(1));
+ GetAssembler()->psrld(x86_64::XmmRegister(x86_64::XMM15), x86_64::Immediate(2));
+ DriverStr("psrld $1, %xmm0\n"
+ "psrld $2, %xmm15\n", "pslldi");
+}
+
+TEST_F(AssemblerX86_64Test, Psrlq) {
+ GetAssembler()->psrlq(x86_64::XmmRegister(x86_64::XMM0), x86_64::Immediate(1));
+ GetAssembler()->psrlq(x86_64::XmmRegister(x86_64::XMM15), x86_64::Immediate(2));
+ DriverStr("psrlq $1, %xmm0\n"
+ "psrlq $2, %xmm15\n", "pslrqi");
+}
+
TEST_F(AssemblerX86_64Test, UcomissAddress) {
GetAssembler()->ucomiss(x86_64::XmmRegister(x86_64::XMM0), x86_64::Address(
x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));