summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'compiler')
-rw-r--r--compiler/common_compiler_test.cc10
-rw-r--r--compiler/common_compiler_test.h3
-rw-r--r--compiler/dex/mir_method_info.cc6
-rw-r--r--compiler/dex/quick/dex_file_method_inliner.cc14
-rw-r--r--compiler/dex/quick/dex_file_method_inliner.h1
-rw-r--r--compiler/dex/quick/quick_cfi_test.cc4
-rw-r--r--compiler/dex/quick/x86/quick_assemble_x86_test.cc4
-rw-r--r--compiler/driver/compiled_method_storage_test.cc2
-rw-r--r--compiler/driver/compiler_driver.cc44
-rw-r--r--compiler/driver/compiler_driver.h11
-rw-r--r--compiler/driver/compiler_driver_test.cc89
-rw-r--r--compiler/driver/compiler_options.cc16
-rw-r--r--compiler/driver/compiler_options.h15
-rw-r--r--compiler/dwarf/register.h2
-rw-r--r--compiler/elf_builder.h6
-rw-r--r--compiler/elf_writer_debug.cc2
-rw-r--r--compiler/image_test.cc43
-rw-r--r--compiler/jit/jit_compiler.cc6
-rw-r--r--compiler/linker/relative_patcher_test.h2
-rw-r--r--compiler/oat_test.cc376
-rw-r--r--compiler/oat_writer.cc1060
-rw-r--r--compiler/oat_writer.h142
-rw-r--r--compiler/optimizing/builder.h11
-rw-r--r--compiler/optimizing/code_generator.cc304
-rw-r--r--compiler/optimizing/code_generator.h31
-rw-r--r--compiler/optimizing/code_generator_arm.cc135
-rw-r--r--compiler/optimizing/code_generator_arm.h8
-rw-r--r--compiler/optimizing/code_generator_arm64.cc867
-rw-r--r--compiler/optimizing/code_generator_arm64.h128
-rw-r--r--compiler/optimizing/code_generator_mips.cc70
-rw-r--r--compiler/optimizing/code_generator_mips.h5
-rw-r--r--compiler/optimizing/code_generator_mips64.cc31
-rw-r--r--compiler/optimizing/code_generator_mips64.h5
-rw-r--r--compiler/optimizing/code_generator_x86.cc96
-rw-r--r--compiler/optimizing/code_generator_x86.h8
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc73
-rw-r--r--compiler/optimizing/code_generator_x86_64.h7
-rw-r--r--compiler/optimizing/codegen_test.cc701
-rw-r--r--compiler/optimizing/dead_code_elimination.cc12
-rw-r--r--compiler/optimizing/dominator_test.cc5
-rw-r--r--compiler/optimizing/graph_checker.cc20
-rw-r--r--compiler/optimizing/graph_test.cc4
-rw-r--r--compiler/optimizing/inliner.cc20
-rw-r--r--compiler/optimizing/intrinsics.cc10
-rw-r--r--compiler/optimizing/intrinsics.h6
-rw-r--r--compiler/optimizing/intrinsics_arm.cc2
-rw-r--r--compiler/optimizing/intrinsics_arm64.cc45
-rw-r--r--compiler/optimizing/intrinsics_list.h2
-rw-r--r--compiler/optimizing/intrinsics_mips.cc3
-rw-r--r--compiler/optimizing/intrinsics_mips64.cc3
-rw-r--r--compiler/optimizing/intrinsics_x86.cc75
-rw-r--r--compiler/optimizing/intrinsics_x86_64.cc64
-rw-r--r--compiler/optimizing/nodes.cc45
-rw-r--r--compiler/optimizing/nodes.h18
-rw-r--r--compiler/optimizing/optimizing_compiler.cc155
-rw-r--r--compiler/optimizing/parallel_move_resolver.cc2
-rw-r--r--compiler/optimizing/register_allocator.cc27
-rw-r--r--compiler/optimizing/ssa_builder.cc55
-rw-r--r--compiler/optimizing/ssa_builder.h4
-rw-r--r--compiler/profile_assistant.cc132
-rw-r--r--compiler/profile_assistant.h11
-rw-r--r--compiler/profile_assistant_test.cc279
-rw-r--r--compiler/utils/test_dex_file_builder.h9
-rw-r--r--compiler/utils/x86/assembler_x86.cc16
-rw-r--r--compiler/utils/x86/assembler_x86.h4
-rw-r--r--compiler/utils/x86/assembler_x86_test.cc13
-rw-r--r--compiler/utils/x86_64/assembler_x86_64.cc36
-rw-r--r--compiler/utils/x86_64/assembler_x86_64.h5
-rw-r--r--compiler/utils/x86_64/assembler_x86_64_test.cc38
69 files changed, 3597 insertions, 1861 deletions
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc
index b5fd1e074f..afc8463878 100644
--- a/compiler/common_compiler_test.cc
+++ b/compiler/common_compiler_test.cc
@@ -168,6 +168,12 @@ std::unordered_set<std::string>* CommonCompilerTest::GetCompiledMethods() {
return nullptr;
}
+// Get ProfileCompilationInfo that should be passed to the driver.
+ProfileCompilationInfo* CommonCompilerTest::GetProfileCompilationInfo() {
+ // Null, profile information will not be taken into account.
+ return nullptr;
+}
+
void CommonCompilerTest::SetUp() {
CommonRuntimeTest::SetUp();
{
@@ -204,12 +210,10 @@ void CommonCompilerTest::CreateCompilerDriver(Compiler::Kind kind, InstructionSe
2,
true,
true,
- "",
- false,
timer_.get(),
-1,
/* dex_to_oat_map */ nullptr,
- /* profile_compilation_info */ nullptr));
+ GetProfileCompilationInfo()));
// We typically don't generate an image in unit tests, disable this optimization by default.
compiler_driver_->SetSupportBootImageFixup(false);
}
diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h
index b491946dc3..7e0fbabff8 100644
--- a/compiler/common_compiler_test.h
+++ b/compiler/common_compiler_test.h
@@ -23,6 +23,7 @@
#include "common_runtime_test.h"
#include "compiler.h"
+#include "jit/offline_profiling_info.h"
#include "oat_file.h"
namespace art {
@@ -75,6 +76,8 @@ class CommonCompilerTest : public CommonRuntimeTest {
// driver assumes ownership of the set, so the test should properly release the set.
virtual std::unordered_set<std::string>* GetCompiledMethods();
+ virtual ProfileCompilationInfo* GetProfileCompilationInfo();
+
virtual void TearDown();
void CompileClass(mirror::ClassLoader* class_loader, const char* class_name)
diff --git a/compiler/dex/mir_method_info.cc b/compiler/dex/mir_method_info.cc
index 658e7d67a0..c250bd9fd2 100644
--- a/compiler/dex/mir_method_info.cc
+++ b/compiler/dex/mir_method_info.cc
@@ -100,8 +100,12 @@ void MirMethodLoweringInfo::Resolve(CompilerDriver* compiler_driver,
} else {
// The method index is actually the dex PC in this case.
// Calculate the proper dex file and target method idx.
+
+ // We must be in JIT mode if we get here.
CHECK(use_jit);
- CHECK_EQ(invoke_type, kVirtual);
+
+ // The invoke type better be virtual, except for the string init special case above.
+ CHECK_EQ(invoke_type, string_init ? kDirect : kVirtual);
// Don't devirt if we are in a different dex file since we can't have direct invokes in
// another dex file unless we always put a direct / patch pointer.
devirt_target = nullptr;
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
index 32d751861a..3766093fa8 100644
--- a/compiler/dex/quick/dex_file_method_inliner.cc
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -39,6 +39,7 @@ static constexpr bool kIntrinsicIsStatic[] = {
true, // kIntrinsicFloatCvt
true, // kIntrinsicReverseBits
true, // kIntrinsicReverseBytes
+ true, // kIntrinsicBitCount
true, // kIntrinsicNumberOfLeadingZeros
true, // kIntrinsicNumberOfTrailingZeros
true, // kIntrinsicRotateRight
@@ -99,6 +100,7 @@ static_assert(kIntrinsicIsStatic[kIntrinsicDoubleCvt], "DoubleCvt must be static
static_assert(kIntrinsicIsStatic[kIntrinsicFloatCvt], "FloatCvt must be static");
static_assert(kIntrinsicIsStatic[kIntrinsicReverseBits], "ReverseBits must be static");
static_assert(kIntrinsicIsStatic[kIntrinsicReverseBytes], "ReverseBytes must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicBitCount], "BitCount must be static");
static_assert(kIntrinsicIsStatic[kIntrinsicNumberOfLeadingZeros],
"NumberOfLeadingZeros must be static");
static_assert(kIntrinsicIsStatic[kIntrinsicNumberOfTrailingZeros],
@@ -110,9 +112,9 @@ static_assert(kIntrinsicIsStatic[kIntrinsicAbsLong], "AbsLong must be static");
static_assert(kIntrinsicIsStatic[kIntrinsicAbsFloat], "AbsFloat must be static");
static_assert(kIntrinsicIsStatic[kIntrinsicAbsDouble], "AbsDouble must be static");
static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxInt], "MinMaxInt must be static");
-static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxLong], "MinMaxLong_must_be_static");
-static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxFloat], "MinMaxFloat_must_be_static");
-static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxDouble], "MinMaxDouble_must_be_static");
+static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxLong], "MinMaxLong must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxFloat], "MinMaxFloat must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxDouble], "MinMaxDouble must be static");
static_assert(kIntrinsicIsStatic[kIntrinsicCos], "Cos must be static");
static_assert(kIntrinsicIsStatic[kIntrinsicSin], "Sin must be static");
static_assert(kIntrinsicIsStatic[kIntrinsicAcos], "Acos must be static");
@@ -153,7 +155,7 @@ static_assert(kIntrinsicIsStatic[kIntrinsicCurrentThread], "CurrentThread must b
static_assert(kIntrinsicIsStatic[kIntrinsicPeek], "Peek must be static");
static_assert(kIntrinsicIsStatic[kIntrinsicPoke], "Poke must be static");
static_assert(!kIntrinsicIsStatic[kIntrinsicCas], "Cas must not be static");
-static_assert(!kIntrinsicIsStatic[kIntrinsicUnsafeGet], "UnsafeGet_must_not_be_static");
+static_assert(!kIntrinsicIsStatic[kIntrinsicUnsafeGet], "UnsafeGet must not be static");
static_assert(!kIntrinsicIsStatic[kIntrinsicUnsafePut], "UnsafePut must not be static");
static_assert(kIntrinsicIsStatic[kIntrinsicSystemArrayCopyCharArray],
"SystemArrayCopyCharArray must be static");
@@ -293,6 +295,7 @@ const char* const DexFileMethodInliner::kNameCacheNames[] = {
"putObjectVolatile", // kNameCachePutObjectVolatile
"putOrderedObject", // kNameCachePutOrderedObject
"arraycopy", // kNameCacheArrayCopy
+ "bitCount", // kNameCacheBitCount
"numberOfLeadingZeros", // kNameCacheNumberOfLeadingZeros
"numberOfTrailingZeros", // kNameCacheNumberOfTrailingZeros
"rotateRight", // kNameCacheRotateRight
@@ -447,6 +450,8 @@ const DexFileMethodInliner::IntrinsicDef DexFileMethodInliner::kIntrinsicMethods
INTRINSIC(JavaLangInteger, Reverse, I_I, kIntrinsicReverseBits, k32),
INTRINSIC(JavaLangLong, Reverse, J_J, kIntrinsicReverseBits, k64),
+ INTRINSIC(JavaLangInteger, BitCount, I_I, kIntrinsicBitCount, k32),
+ INTRINSIC(JavaLangLong, BitCount, J_I, kIntrinsicBitCount, k64),
INTRINSIC(JavaLangInteger, NumberOfLeadingZeros, I_I, kIntrinsicNumberOfLeadingZeros, k32),
INTRINSIC(JavaLangLong, NumberOfLeadingZeros, J_I, kIntrinsicNumberOfLeadingZeros, k64),
INTRINSIC(JavaLangInteger, NumberOfTrailingZeros, I_I, kIntrinsicNumberOfTrailingZeros, k32),
@@ -745,6 +750,7 @@ bool DexFileMethodInliner::GenIntrinsic(Mir2Lir* backend, CallInfo* info) {
intrinsic.d.data & kIntrinsicFlagIsOrdered);
case kIntrinsicSystemArrayCopyCharArray:
return backend->GenInlinedArrayCopyCharArray(info);
+ case kIntrinsicBitCount:
case kIntrinsicNumberOfLeadingZeros:
case kIntrinsicNumberOfTrailingZeros:
case kIntrinsicRotateRight:
diff --git a/compiler/dex/quick/dex_file_method_inliner.h b/compiler/dex/quick/dex_file_method_inliner.h
index ac70577b48..28036237d7 100644
--- a/compiler/dex/quick/dex_file_method_inliner.h
+++ b/compiler/dex/quick/dex_file_method_inliner.h
@@ -224,6 +224,7 @@ class DexFileMethodInliner {
kNameCachePutObjectVolatile,
kNameCachePutOrderedObject,
kNameCacheArrayCopy,
+ kNameCacheBitCount,
kNameCacheNumberOfLeadingZeros,
kNameCacheNumberOfTrailingZeros,
kNameCacheRotateRight,
diff --git a/compiler/dex/quick/quick_cfi_test.cc b/compiler/dex/quick/quick_cfi_test.cc
index 12568a4ad4..c5df134493 100644
--- a/compiler/dex/quick/quick_cfi_test.cc
+++ b/compiler/dex/quick/quick_cfi_test.cc
@@ -69,6 +69,8 @@ class QuickCFITest : public CFITest {
false,
nullptr,
nullptr,
+ false,
+ "",
false);
VerificationResults verification_results(&compiler_options);
DexFileToMethodInlinerMap method_inliner_map;
@@ -88,8 +90,6 @@ class QuickCFITest : public CFITest {
0,
false,
false,
- "",
- false,
0,
-1,
nullptr,
diff --git a/compiler/dex/quick/x86/quick_assemble_x86_test.cc b/compiler/dex/quick/x86/quick_assemble_x86_test.cc
index b39fe4da4f..d63878d6b9 100644
--- a/compiler/dex/quick/x86/quick_assemble_x86_test.cc
+++ b/compiler/dex/quick/x86/quick_assemble_x86_test.cc
@@ -52,6 +52,8 @@ class QuickAssembleX86TestBase : public testing::Test {
false,
nullptr,
nullptr,
+ false,
+ "",
false));
verification_results_.reset(new VerificationResults(compiler_options_.get()));
method_inliner_map_.reset(new DexFileToMethodInlinerMap());
@@ -69,8 +71,6 @@ class QuickAssembleX86TestBase : public testing::Test {
0,
false,
false,
- "",
- false,
0,
-1,
nullptr,
diff --git a/compiler/driver/compiled_method_storage_test.cc b/compiler/driver/compiled_method_storage_test.cc
index f18fa67ea5..2e2d1f99f3 100644
--- a/compiler/driver/compiled_method_storage_test.cc
+++ b/compiler/driver/compiled_method_storage_test.cc
@@ -41,8 +41,6 @@ TEST(CompiledMethodStorage, Deduplicate) {
1u,
false,
false,
- "",
- false,
nullptr,
-1,
nullptr,
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 043bd93bd7..d0215255e8 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -345,7 +345,6 @@ CompilerDriver::CompilerDriver(
std::unordered_set<std::string>* compiled_classes,
std::unordered_set<std::string>* compiled_methods,
size_t thread_count, bool dump_stats, bool dump_passes,
- const std::string& dump_cfg_file_name, bool dump_cfg_append,
CumulativeLogger* timer, int swap_fd,
const std::unordered_map<const DexFile*, const char*>* dex_to_oat_map,
const ProfileCompilationInfo* profile_compilation_info)
@@ -370,8 +369,6 @@ CompilerDriver::CompilerDriver(
stats_(new AOTCompilationStats),
dump_stats_(dump_stats),
dump_passes_(dump_passes),
- dump_cfg_file_name_(dump_cfg_file_name),
- dump_cfg_append_(dump_cfg_append),
timings_logger_(timer),
compiler_context_(nullptr),
support_boot_image_fixup_(instruction_set != kMips && instruction_set != kMips64),
@@ -1197,15 +1194,18 @@ bool CompilerDriver::CanAccessTypeWithoutChecks(uint32_t referrer_idx, const Dex
if (equals_referrers_class != nullptr) {
*equals_referrers_class = (method_id.class_idx_ == type_idx);
}
- mirror::Class* referrer_class = dex_cache->GetResolvedType(method_id.class_idx_);
- if (referrer_class == nullptr) {
- stats_->TypeNeedsAccessCheck();
- return false; // Incomplete referrer knowledge needs access check.
+ bool is_accessible = resolved_class->IsPublic(); // Public classes are always accessible.
+ if (!is_accessible) {
+ mirror::Class* referrer_class = dex_cache->GetResolvedType(method_id.class_idx_);
+ if (referrer_class == nullptr) {
+ stats_->TypeNeedsAccessCheck();
+ return false; // Incomplete referrer knowledge needs access check.
+ }
+ // Perform access check, will return true if access is ok or false if we're going to have to
+ // check this at runtime (for example for class loaders).
+ is_accessible = referrer_class->CanAccess(resolved_class);
}
- // Perform access check, will return true if access is ok or false if we're going to have to
- // check this at runtime (for example for class loaders).
- bool result = referrer_class->CanAccess(resolved_class);
- if (result) {
+ if (is_accessible) {
stats_->TypeDoesntNeedAccessCheck();
if (type_known_final != nullptr) {
*type_known_final = resolved_class->IsFinal() && !resolved_class->IsArrayClass();
@@ -1216,7 +1216,7 @@ bool CompilerDriver::CanAccessTypeWithoutChecks(uint32_t referrer_idx, const Dex
} else {
stats_->TypeNeedsAccessCheck();
}
- return result;
+ return is_accessible;
}
bool CompilerDriver::CanAccessInstantiableTypeWithoutChecks(uint32_t referrer_idx,
@@ -1236,14 +1236,18 @@ bool CompilerDriver::CanAccessInstantiableTypeWithoutChecks(uint32_t referrer_id
}
*finalizable = resolved_class->IsFinalizable();
const DexFile::MethodId& method_id = dex_file.GetMethodId(referrer_idx);
- mirror::Class* referrer_class = dex_cache->GetResolvedType(method_id.class_idx_);
- if (referrer_class == nullptr) {
- stats_->TypeNeedsAccessCheck();
- return false; // Incomplete referrer knowledge needs access check.
- }
- // Perform access and instantiable checks, will return true if access is ok or false if we're
- // going to have to check this at runtime (for example for class loaders).
- bool result = referrer_class->CanAccess(resolved_class) && resolved_class->IsInstantiable();
+ bool is_accessible = resolved_class->IsPublic(); // Public classes are always accessible.
+ if (!is_accessible) {
+ mirror::Class* referrer_class = dex_cache->GetResolvedType(method_id.class_idx_);
+ if (referrer_class == nullptr) {
+ stats_->TypeNeedsAccessCheck();
+ return false; // Incomplete referrer knowledge needs access check.
+ }
+ // Perform access and instantiable checks, will return true if access is ok or false if we're
+ // going to have to check this at runtime (for example for class loaders).
+ is_accessible = referrer_class->CanAccess(resolved_class);
+ }
+ bool result = is_accessible && resolved_class->IsInstantiable();
if (result) {
stats_->TypeDoesntNeedAccessCheck();
} else {
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 17b2f5e98d..6a2f7bfd4e 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -95,7 +95,6 @@ class CompilerDriver {
std::unordered_set<std::string>* compiled_classes,
std::unordered_set<std::string>* compiled_methods,
size_t thread_count, bool dump_stats, bool dump_passes,
- const std::string& dump_cfg_file_name, bool dump_cfg_append,
CumulativeLogger* timer, int swap_fd,
const std::unordered_map<const DexFile*, const char*>* dex_to_oat_map,
const ProfileCompilationInfo* profile_compilation_info);
@@ -423,14 +422,6 @@ class CompilerDriver {
return dump_passes_;
}
- const std::string& GetDumpCfgFileName() const {
- return dump_cfg_file_name_;
- }
-
- bool GetDumpCfgAppend() const {
- return dump_cfg_append_;
- }
-
CumulativeLogger* GetTimingsLogger() const {
return timings_logger_;
}
@@ -668,8 +659,6 @@ class CompilerDriver {
bool dump_stats_;
const bool dump_passes_;
- const std::string dump_cfg_file_name_;
- const bool dump_cfg_append_;
CumulativeLogger* const timings_logger_;
diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc
index 82c0e86b25..4c03e5ddfe 100644
--- a/compiler/driver/compiler_driver_test.cc
+++ b/compiler/driver/compiler_driver_test.cc
@@ -31,6 +31,7 @@
#include "mirror/object_array-inl.h"
#include "mirror/object-inl.h"
#include "handle_scope-inl.h"
+#include "jit/offline_profiling_info.h"
#include "scoped_thread_state_change.h"
namespace art {
@@ -240,6 +241,94 @@ TEST_F(CompilerDriverMethodsTest, Selection) {
EXPECT_TRUE(expected->empty());
}
+class CompilerDriverProfileTest : public CompilerDriverTest {
+ protected:
+ ProfileCompilationInfo* GetProfileCompilationInfo() OVERRIDE {
+ ScopedObjectAccess soa(Thread::Current());
+ std::vector<std::unique_ptr<const DexFile>> dex_files = OpenTestDexFiles("ProfileTestMultiDex");
+
+ ProfileCompilationInfo info;
+ for (const std::unique_ptr<const DexFile>& dex_file : dex_files) {
+ std::cout << std::string(dex_file->GetLocation());
+ profile_info_.AddData(dex_file->GetLocation(), dex_file->GetLocationChecksum(), 1);
+ profile_info_.AddData(dex_file->GetLocation(), dex_file->GetLocationChecksum(), 2);
+ }
+ return &profile_info_;
+ }
+
+ std::unordered_set<std::string> GetExpectedMethodsForClass(const std::string& clazz) {
+ if (clazz == "Main") {
+ return std::unordered_set<std::string>({
+ "java.lang.String Main.getA()",
+ "java.lang.String Main.getB()"});
+ } else if (clazz == "Second") {
+ return std::unordered_set<std::string>({
+ "java.lang.String Second.getX()",
+ "java.lang.String Second.getY()"});
+ } else {
+ return std::unordered_set<std::string>();
+ }
+ }
+
+ void CheckCompiledMethods(jobject class_loader,
+ const std::string& clazz,
+ const std::unordered_set<std::string>& expected_methods) {
+ ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+ Thread* self = Thread::Current();
+ ScopedObjectAccess soa(self);
+ StackHandleScope<1> hs(self);
+ Handle<mirror::ClassLoader> h_loader(hs.NewHandle(
+ reinterpret_cast<mirror::ClassLoader*>(self->DecodeJObject(class_loader))));
+ mirror::Class* klass = class_linker->FindClass(self, clazz.c_str(), h_loader);
+ ASSERT_NE(klass, nullptr);
+
+ const auto pointer_size = class_linker->GetImagePointerSize();
+ size_t number_of_compiled_methods = 0;
+ for (auto& m : klass->GetVirtualMethods(pointer_size)) {
+ std::string name = PrettyMethod(&m, true);
+ const void* code = m.GetEntryPointFromQuickCompiledCodePtrSize(pointer_size);
+ ASSERT_NE(code, nullptr);
+ if (expected_methods.find(name) != expected_methods.end()) {
+ number_of_compiled_methods++;
+ EXPECT_FALSE(class_linker->IsQuickToInterpreterBridge(code));
+ } else {
+ EXPECT_TRUE(class_linker->IsQuickToInterpreterBridge(code));
+ }
+ }
+ EXPECT_EQ(expected_methods.size(), number_of_compiled_methods);
+ }
+
+ private:
+ ProfileCompilationInfo profile_info_;
+};
+
+TEST_F(CompilerDriverProfileTest, ProfileGuidedCompilation) {
+ TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING_WITH_QUICK();
+ TEST_DISABLED_FOR_READ_BARRIER_WITH_QUICK();
+ TEST_DISABLED_FOR_READ_BARRIER_WITH_OPTIMIZING_FOR_UNSUPPORTED_INSTRUCTION_SETS();
+ Thread* self = Thread::Current();
+ jobject class_loader;
+ {
+ ScopedObjectAccess soa(self);
+ class_loader = LoadDex("ProfileTestMultiDex");
+ }
+ ASSERT_NE(class_loader, nullptr);
+
+ // Need to enable dex-file writability. Methods rejected to be compiled will run through the
+ // dex-to-dex compiler.
+ ProfileCompilationInfo info;
+ for (const DexFile* dex_file : GetDexFiles(class_loader)) {
+ ASSERT_TRUE(dex_file->EnableWrite());
+ }
+
+ CompileAll(class_loader);
+
+ std::unordered_set<std::string> m = GetExpectedMethodsForClass("Main");
+ std::unordered_set<std::string> s = GetExpectedMethodsForClass("Second");
+ CheckCompiledMethods(class_loader, "LMain;", m);
+ CheckCompiledMethods(class_loader, "LSecond;", s);
+}
+
// TODO: need check-cast test (when stub complete & we can throw/catch
} // namespace art
diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc
index 385f34a9f9..2644528e56 100644
--- a/compiler/driver/compiler_options.cc
+++ b/compiler/driver/compiler_options.cc
@@ -44,7 +44,9 @@ CompilerOptions::CompilerOptions()
verbose_methods_(nullptr),
pass_manager_options_(),
abort_on_hard_verifier_failure_(false),
- init_failure_output_(nullptr) {
+ init_failure_output_(nullptr),
+ dump_cfg_file_name_(""),
+ dump_cfg_append_(false) {
}
CompilerOptions::~CompilerOptions() {
@@ -71,7 +73,9 @@ CompilerOptions::CompilerOptions(CompilerFilter compiler_filter,
bool compile_pic,
const std::vector<std::string>* verbose_methods,
std::ostream* init_failure_output,
- bool abort_on_hard_verifier_failure
+ bool abort_on_hard_verifier_failure,
+ const std::string& dump_cfg_file_name,
+ bool dump_cfg_append
) : // NOLINT(whitespace/parens)
compiler_filter_(compiler_filter),
huge_method_threshold_(huge_method_threshold),
@@ -94,7 +98,9 @@ CompilerOptions::CompilerOptions(CompilerFilter compiler_filter,
verbose_methods_(verbose_methods),
pass_manager_options_(),
abort_on_hard_verifier_failure_(abort_on_hard_verifier_failure),
- init_failure_output_(init_failure_output) {
+ init_failure_output_(init_failure_output),
+ dump_cfg_file_name_(dump_cfg_file_name),
+ dump_cfg_append_(dump_cfg_append) {
}
void CompilerOptions::ParseHugeMethodMax(const StringPiece& option, UsageFn Usage) {
@@ -238,6 +244,10 @@ bool CompilerOptions::ParseCompilerOption(const StringPiece& option, UsageFn Usa
ParsePassOptions(option, Usage);
} else if (option.starts_with("--dump-init-failures=")) {
ParseDumpInitFailures(option, Usage);
+ } else if (option.starts_with("--dump-cfg=")) {
+ dump_cfg_file_name_ = option.substr(strlen("--dump-cfg=")).data();
+ } else if (option.starts_with("--dump-cfg-append")) {
+ dump_cfg_append_ = true;
} else {
// Option not recognized.
return false;
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index f14bdc4a2f..d47fc2ad4b 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -83,7 +83,9 @@ class CompilerOptions FINAL {
bool compile_pic,
const std::vector<std::string>* verbose_methods,
std::ostream* init_failure_output,
- bool abort_on_hard_verifier_failure);
+ bool abort_on_hard_verifier_failure,
+ const std::string& dump_cfg_file_name,
+ bool dump_cfg_append);
CompilerFilter GetCompilerFilter() const {
return compiler_filter_;
@@ -224,6 +226,14 @@ class CompilerOptions FINAL {
bool ParseCompilerOption(const StringPiece& option, UsageFn Usage);
+ const std::string& GetDumpCfgFileName() const {
+ return dump_cfg_file_name_;
+ }
+
+ bool GetDumpCfgAppend() const {
+ return dump_cfg_append_;
+ }
+
private:
void ParseDumpInitFailures(const StringPiece& option, UsageFn Usage);
void ParsePassOptions(const StringPiece& option, UsageFn Usage);
@@ -273,6 +283,9 @@ class CompilerOptions FINAL {
// Log initialization of initialization failures to this stream if not null.
std::unique_ptr<std::ostream> init_failure_output_;
+ std::string dump_cfg_file_name_;
+ bool dump_cfg_append_;
+
friend class Dex2Oat;
DISALLOW_COPY_AND_ASSIGN(CompilerOptions);
diff --git a/compiler/dwarf/register.h b/compiler/dwarf/register.h
index b67e8ddc9d..35b3e15d83 100644
--- a/compiler/dwarf/register.h
+++ b/compiler/dwarf/register.h
@@ -29,7 +29,7 @@ class Reg {
// TODO: Arm S0–S31 register mapping is obsolescent.
// We should use VFP-v3/Neon D0-D31 mapping instead.
// However, D0 is aliased to pair of S0 and S1, so using that
- // mapping we can not easily say S0 is spilled and S1 is not.
+ // mapping we cannot easily say S0 is spilled and S1 is not.
// There are ways around this in DWARF but they are complex.
// It would be much simpler to always spill whole D registers.
// Arm64 mapping is correct since we already do this there.
diff --git a/compiler/elf_builder.h b/compiler/elf_builder.h
index a7461a5525..46484b1cd6 100644
--- a/compiler/elf_builder.h
+++ b/compiler/elf_builder.h
@@ -100,12 +100,6 @@ class ElfBuilder FINAL {
header_.sh_entsize = entsize;
}
- ~Section() OVERRIDE {
- if (started_) {
- CHECK(finished_);
- }
- }
-
// Start writing of this section.
void Start() {
CHECK(!started_);
diff --git a/compiler/elf_writer_debug.cc b/compiler/elf_writer_debug.cc
index 99d2b84a8f..f3baf67463 100644
--- a/compiler/elf_writer_debug.cc
+++ b/compiler/elf_writer_debug.cc
@@ -213,7 +213,7 @@ static void WriteCIE(InstructionSet isa,
case kNone:
break;
}
- LOG(FATAL) << "Can not write CIE frame for ISA " << isa;
+ LOG(FATAL) << "Cannot write CIE frame for ISA " << isa;
UNREACHABLE();
}
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index 6859605095..12132c0cd0 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -95,25 +95,37 @@ void ImageTest::TestWriteRead(ImageHeader::StorageMode storage_mode) {
t.NewTiming("WriteElf");
SafeMap<std::string, std::string> key_value_store;
- OatWriter oat_writer(class_linker->GetBootClassPath(),
- 0,
- 0,
- 0,
- compiler_driver_.get(),
- writer.get(),
- /*compiling_boot_image*/true,
- &timings,
- &key_value_store);
+ const std::vector<const DexFile*>& dex_files = class_linker->GetBootClassPath();
std::unique_ptr<ElfWriter> elf_writer = CreateElfWriterQuick(
compiler_driver_->GetInstructionSet(),
&compiler_driver_->GetCompilerOptions(),
oat_file.GetFile());
- bool success = writer->PrepareImageAddressSpace();
- ASSERT_TRUE(success);
-
elf_writer->Start();
-
+ OatWriter oat_writer(/*compiling_boot_image*/true, &timings);
OutputStream* rodata = elf_writer->StartRoData();
+ for (const DexFile* dex_file : dex_files) {
+ ArrayRef<const uint8_t> raw_dex_file(
+ reinterpret_cast<const uint8_t*>(&dex_file->GetHeader()),
+ dex_file->GetHeader().file_size_);
+ oat_writer.AddRawDexFileSource(raw_dex_file,
+ dex_file->GetLocation().c_str(),
+ dex_file->GetLocationChecksum());
+ }
+ std::unique_ptr<MemMap> opened_dex_files_map;
+ std::vector<std::unique_ptr<const DexFile>> opened_dex_files;
+ bool dex_files_ok = oat_writer.WriteAndOpenDexFiles(
+ rodata,
+ oat_file.GetFile(),
+ compiler_driver_->GetInstructionSet(),
+ compiler_driver_->GetInstructionSetFeatures(),
+ &key_value_store,
+ &opened_dex_files_map,
+ &opened_dex_files);
+ ASSERT_TRUE(dex_files_ok);
+ oat_writer.PrepareLayout(compiler_driver_.get(), writer.get(), dex_files);
+ bool image_space_ok = writer->PrepareImageAddressSpace();
+ ASSERT_TRUE(image_space_ok);
+
bool rodata_ok = oat_writer.WriteRodata(rodata);
ASSERT_TRUE(rodata_ok);
elf_writer->EndRoData(rodata);
@@ -123,12 +135,15 @@ void ImageTest::TestWriteRead(ImageHeader::StorageMode storage_mode) {
ASSERT_TRUE(text_ok);
elf_writer->EndText(text);
+ bool header_ok = oat_writer.WriteHeader(elf_writer->GetStream(), 0u, 0u, 0u);
+ ASSERT_TRUE(header_ok);
+
elf_writer->SetBssSize(oat_writer.GetBssSize());
elf_writer->WriteDynamicSection();
elf_writer->WriteDebugInfo(oat_writer.GetMethodDebugInfo());
elf_writer->WritePatchLocations(oat_writer.GetAbsolutePatchLocations());
- success = elf_writer->End();
+ bool success = elf_writer->End();
ASSERT_TRUE(success);
}
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index 8fdbf4a3f7..3a3275a5f4 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -110,7 +110,9 @@ JitCompiler::JitCompiler() : total_time_(0) {
/* pic */ true, // TODO: Support non-PIC in optimizing.
/* verbose_methods */ nullptr,
/* init_failure_output */ nullptr,
- /* abort_on_hard_verifier_failure */ false));
+ /* abort_on_hard_verifier_failure */ false,
+ /* dump_cfg_file_name */ "",
+ /* dump_cfg_append */ false));
for (const std::string& argument : Runtime::Current()->GetCompilerOptions()) {
compiler_options_->ParseCompilerOption(argument, Usage);
}
@@ -166,8 +168,6 @@ JitCompiler::JitCompiler() : total_time_(0) {
/* thread_count */ 1,
/* dump_stats */ false,
/* dump_passes */ false,
- /* dump_cfg_file_name */ "",
- /* dump_cfg_append */ false,
cumulative_logger_.get(),
/* swap_fd */ -1,
/* dex to oat map */ nullptr,
diff --git a/compiler/linker/relative_patcher_test.h b/compiler/linker/relative_patcher_test.h
index b10cc3534c..bf8e786f64 100644
--- a/compiler/linker/relative_patcher_test.h
+++ b/compiler/linker/relative_patcher_test.h
@@ -47,7 +47,7 @@ class RelativePatcherTest : public testing::Test {
driver_(&compiler_options_, &verification_results_, &inliner_map_,
Compiler::kQuick, instruction_set, nullptr,
false, nullptr, nullptr, nullptr, 1u,
- false, false, "", false, nullptr, -1, nullptr, nullptr),
+ false, false, nullptr, -1, nullptr, nullptr),
error_msg_(),
instruction_set_(instruction_set),
features_(InstructionSetFeatures::FromVariant(instruction_set, variant, &error_msg_)),
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 9f7ffa5ace..c0d15f3439 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -38,6 +38,7 @@
#include "oat_file-inl.h"
#include "oat_writer.h"
#include "scoped_thread_state_change.h"
+#include "utils/test_dex_file_builder.h"
namespace art {
@@ -117,8 +118,6 @@ class OatTest : public CommonCompilerTest {
2,
true,
true,
- "",
- false,
timer_.get(),
-1,
nullptr,
@@ -129,23 +128,74 @@ class OatTest : public CommonCompilerTest {
const std::vector<const DexFile*>& dex_files,
SafeMap<std::string, std::string>& key_value_store) {
TimingLogger timings("WriteElf", false, false);
- OatWriter oat_writer(dex_files,
- 42U,
- 4096U,
- 0,
- compiler_driver_.get(),
- nullptr,
- /*compiling_boot_image*/false,
- &timings,
- &key_value_store);
+ OatWriter oat_writer(/*compiling_boot_image*/false, &timings);
+ for (const DexFile* dex_file : dex_files) {
+ ArrayRef<const uint8_t> raw_dex_file(
+ reinterpret_cast<const uint8_t*>(&dex_file->GetHeader()),
+ dex_file->GetHeader().file_size_);
+ if (!oat_writer.AddRawDexFileSource(raw_dex_file,
+ dex_file->GetLocation().c_str(),
+ dex_file->GetLocationChecksum())) {
+ return false;
+ }
+ }
+ return DoWriteElf(file, oat_writer, key_value_store);
+ }
+
+ bool WriteElf(File* file,
+ const std::vector<const char*>& dex_filenames,
+ SafeMap<std::string, std::string>& key_value_store) {
+ TimingLogger timings("WriteElf", false, false);
+ OatWriter oat_writer(/*compiling_boot_image*/false, &timings);
+ for (const char* dex_filename : dex_filenames) {
+ if (!oat_writer.AddDexFileSource(dex_filename, dex_filename)) {
+ return false;
+ }
+ }
+ return DoWriteElf(file, oat_writer, key_value_store);
+ }
+
+ bool WriteElf(File* file,
+ ScopedFd&& zip_fd,
+ const char* location,
+ SafeMap<std::string, std::string>& key_value_store) {
+ TimingLogger timings("WriteElf", false, false);
+ OatWriter oat_writer(/*compiling_boot_image*/false, &timings);
+ if (!oat_writer.AddZippedDexFilesSource(std::move(zip_fd), location)) {
+ return false;
+ }
+ return DoWriteElf(file, oat_writer, key_value_store);
+ }
+
+ bool DoWriteElf(File* file,
+ OatWriter& oat_writer,
+ SafeMap<std::string, std::string>& key_value_store) {
std::unique_ptr<ElfWriter> elf_writer = CreateElfWriterQuick(
compiler_driver_->GetInstructionSet(),
&compiler_driver_->GetCompilerOptions(),
file);
-
elf_writer->Start();
-
OutputStream* rodata = elf_writer->StartRoData();
+ std::unique_ptr<MemMap> opened_dex_files_map;
+ std::vector<std::unique_ptr<const DexFile>> opened_dex_files;
+ if (!oat_writer.WriteAndOpenDexFiles(rodata,
+ file,
+ compiler_driver_->GetInstructionSet(),
+ compiler_driver_->GetInstructionSetFeatures(),
+ &key_value_store,
+ &opened_dex_files_map,
+ &opened_dex_files)) {
+ return false;
+ }
+ Runtime* runtime = Runtime::Current();
+ ClassLinker* const class_linker = runtime->GetClassLinker();
+ std::vector<const DexFile*> dex_files;
+ for (const std::unique_ptr<const DexFile>& dex_file : opened_dex_files) {
+ dex_files.push_back(dex_file.get());
+ ScopedObjectAccess soa(Thread::Current());
+ class_linker->RegisterDexFile(*dex_file, runtime->GetLinearAlloc());
+ }
+ oat_writer.PrepareLayout(compiler_driver_.get(), nullptr, dex_files);
if (!oat_writer.WriteRodata(rodata)) {
return false;
}
@@ -157,6 +207,10 @@ class OatTest : public CommonCompilerTest {
}
elf_writer->EndText(text);
+ if (!oat_writer.WriteHeader(elf_writer->GetStream(), 42U, 4096U, 0)) {
+ return false;
+ }
+
elf_writer->SetBssSize(oat_writer.GetBssSize());
elf_writer->WriteDynamicSection();
elf_writer->WriteDebugInfo(oat_writer.GetMethodDebugInfo());
@@ -169,6 +223,117 @@ class OatTest : public CommonCompilerTest {
std::unique_ptr<QuickCompilerCallbacks> callbacks_;
};
+class ZipBuilder {
+ public:
+ explicit ZipBuilder(File* zip_file) : zip_file_(zip_file) { }
+
+ bool AddFile(const char* location, const void* data, size_t size) {
+ off_t offset = lseek(zip_file_->Fd(), 0, SEEK_CUR);
+ if (offset == static_cast<off_t>(-1)) {
+ return false;
+ }
+
+ ZipFileHeader file_header;
+ file_header.crc32 = crc32(0u, reinterpret_cast<const Bytef*>(data), size);
+ file_header.compressed_size = size;
+ file_header.uncompressed_size = size;
+ file_header.filename_length = strlen(location);
+
+ if (!zip_file_->WriteFully(&file_header, sizeof(file_header)) ||
+ !zip_file_->WriteFully(location, file_header.filename_length) ||
+ !zip_file_->WriteFully(data, size)) {
+ return false;
+ }
+
+ CentralDirectoryFileHeader cdfh;
+ cdfh.crc32 = file_header.crc32;
+ cdfh.compressed_size = size;
+ cdfh.uncompressed_size = size;
+ cdfh.filename_length = file_header.filename_length;
+ cdfh.relative_offset_of_local_file_header = offset;
+ file_data_.push_back(FileData { cdfh, location });
+ return true;
+ }
+
+ bool Finish() {
+ off_t offset = lseek(zip_file_->Fd(), 0, SEEK_CUR);
+ if (offset == static_cast<off_t>(-1)) {
+ return false;
+ }
+
+ size_t central_directory_size = 0u;
+ for (const FileData& file_data : file_data_) {
+ if (!zip_file_->WriteFully(&file_data.cdfh, sizeof(file_data.cdfh)) ||
+ !zip_file_->WriteFully(file_data.location, file_data.cdfh.filename_length)) {
+ return false;
+ }
+ central_directory_size += sizeof(file_data.cdfh) + file_data.cdfh.filename_length;
+ }
+ EndOfCentralDirectoryRecord eocd_record;
+ eocd_record.number_of_central_directory_records_on_this_disk = file_data_.size();
+ eocd_record.total_number_of_central_directory_records = file_data_.size();
+ eocd_record.size_of_central_directory = central_directory_size;
+ eocd_record.offset_of_start_of_central_directory = offset;
+ return
+ zip_file_->WriteFully(&eocd_record, sizeof(eocd_record)) &&
+ zip_file_->Flush() == 0;
+ }
+
+ private:
+ struct PACKED(1) ZipFileHeader {
+ uint32_t signature = 0x04034b50;
+ uint16_t version_needed_to_extract = 10;
+ uint16_t general_purpose_bit_flag = 0;
+ uint16_t compression_method = 0; // 0 = store only.
+ uint16_t file_last_modification_time = 0u;
+ uint16_t file_last_modification_date = 0u;
+ uint32_t crc32;
+ uint32_t compressed_size;
+ uint32_t uncompressed_size;
+ uint16_t filename_length;
+ uint16_t extra_field_length = 0u; // No extra fields.
+ };
+
+ struct PACKED(1) CentralDirectoryFileHeader {
+ uint32_t signature = 0x02014b50;
+ uint16_t version_made_by = 10;
+ uint16_t version_needed_to_extract = 10;
+ uint16_t general_purpose_bit_flag = 0;
+ uint16_t compression_method = 0; // 0 = store only.
+ uint16_t file_last_modification_time = 0u;
+ uint16_t file_last_modification_date = 0u;
+ uint32_t crc32;
+ uint32_t compressed_size;
+ uint32_t uncompressed_size;
+ uint16_t filename_length;
+ uint16_t extra_field_length = 0u; // No extra fields.
+ uint16_t file_comment_length = 0u; // No file comment.
+ uint16_t disk_number_where_file_starts = 0u;
+ uint16_t internal_file_attributes = 0u;
+ uint32_t external_file_attributes = 0u;
+ uint32_t relative_offset_of_local_file_header;
+ };
+
+ struct PACKED(1) EndOfCentralDirectoryRecord {
+ uint32_t signature = 0x06054b50;
+ uint16_t number_of_this_disk = 0u;
+ uint16_t disk_where_central_directory_starts = 0u;
+ uint16_t number_of_central_directory_records_on_this_disk;
+ uint16_t total_number_of_central_directory_records;
+ uint32_t size_of_central_directory;
+ uint32_t offset_of_start_of_central_directory;
+ uint16_t comment_length = 0u; // No file comment.
+ };
+
+ struct FileData {
+ CentralDirectoryFileHeader cdfh;
+ const char* location;
+ };
+
+ File* zip_file_;
+ std::vector<FileData> file_data_;
+};
+
TEST_F(OatTest, WriteRead) {
TimingLogger timings("OatTest::WriteRead", false, false);
ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
@@ -329,4 +494,189 @@ TEST_F(OatTest, EmptyTextSection) {
EXPECT_LT(static_cast<size_t>(oat_file->Size()), static_cast<size_t>(tmp.GetFile()->GetLength()));
}
+TEST_F(OatTest, DexFileInput) {
+ TimingLogger timings("OatTest::DexFileInput", false, false);
+
+ std::vector<const char*> input_filenames;
+
+ ScratchFile dex_file1;
+ TestDexFileBuilder builder1;
+ builder1.AddField("Lsome.TestClass;", "int", "someField");
+ builder1.AddMethod("Lsome.TestClass;", "()I", "foo");
+ std::unique_ptr<const DexFile> dex_file1_data = builder1.Build(dex_file1.GetFilename());
+ bool success = dex_file1.GetFile()->WriteFully(&dex_file1_data->GetHeader(),
+ dex_file1_data->GetHeader().file_size_);
+ ASSERT_TRUE(success);
+ success = dex_file1.GetFile()->Flush() == 0;
+ ASSERT_TRUE(success);
+ input_filenames.push_back(dex_file1.GetFilename().c_str());
+
+ ScratchFile dex_file2;
+ TestDexFileBuilder builder2;
+ builder2.AddField("Land.AnotherTestClass;", "boolean", "someOtherField");
+ builder2.AddMethod("Land.AnotherTestClass;", "()J", "bar");
+ std::unique_ptr<const DexFile> dex_file2_data = builder2.Build(dex_file2.GetFilename());
+ success = dex_file2.GetFile()->WriteFully(&dex_file2_data->GetHeader(),
+ dex_file2_data->GetHeader().file_size_);
+ ASSERT_TRUE(success);
+ success = dex_file2.GetFile()->Flush() == 0;
+ ASSERT_TRUE(success);
+ input_filenames.push_back(dex_file2.GetFilename().c_str());
+
+ ScratchFile oat_file;
+ SafeMap<std::string, std::string> key_value_store;
+ key_value_store.Put(OatHeader::kImageLocationKey, "test.art");
+ success = WriteElf(oat_file.GetFile(), input_filenames, key_value_store);
+ ASSERT_TRUE(success);
+
+ std::string error_msg;
+ std::unique_ptr<OatFile> opened_oat_file(OatFile::Open(oat_file.GetFilename(),
+ oat_file.GetFilename(),
+ nullptr,
+ nullptr,
+ false,
+ nullptr,
+ &error_msg));
+ ASSERT_TRUE(opened_oat_file != nullptr);
+ ASSERT_EQ(2u, opened_oat_file->GetOatDexFiles().size());
+ std::unique_ptr<const DexFile> opened_dex_file1 =
+ opened_oat_file->GetOatDexFiles()[0]->OpenDexFile(&error_msg);
+ std::unique_ptr<const DexFile> opened_dex_file2 =
+ opened_oat_file->GetOatDexFiles()[1]->OpenDexFile(&error_msg);
+
+ ASSERT_EQ(dex_file1_data->GetHeader().file_size_, opened_dex_file1->GetHeader().file_size_);
+ ASSERT_EQ(0, memcmp(&dex_file1_data->GetHeader(),
+ &opened_dex_file1->GetHeader(),
+ dex_file1_data->GetHeader().file_size_));
+ ASSERT_EQ(dex_file1_data->GetLocation(), opened_dex_file1->GetLocation());
+
+ ASSERT_EQ(dex_file2_data->GetHeader().file_size_, opened_dex_file2->GetHeader().file_size_);
+ ASSERT_EQ(0, memcmp(&dex_file2_data->GetHeader(),
+ &opened_dex_file2->GetHeader(),
+ dex_file2_data->GetHeader().file_size_));
+ ASSERT_EQ(dex_file2_data->GetLocation(), opened_dex_file2->GetLocation());
+}
+
+TEST_F(OatTest, ZipFileInput) {
+ TimingLogger timings("OatTest::DexFileInput", false, false);
+
+ ScratchFile zip_file;
+ ZipBuilder zip_builder(zip_file.GetFile());
+
+ ScratchFile dex_file1;
+ TestDexFileBuilder builder1;
+ builder1.AddField("Lsome.TestClass;", "long", "someField");
+ builder1.AddMethod("Lsome.TestClass;", "()D", "foo");
+ std::unique_ptr<const DexFile> dex_file1_data = builder1.Build(dex_file1.GetFilename());
+ bool success = dex_file1.GetFile()->WriteFully(&dex_file1_data->GetHeader(),
+ dex_file1_data->GetHeader().file_size_);
+ ASSERT_TRUE(success);
+ success = dex_file1.GetFile()->Flush() == 0;
+ ASSERT_TRUE(success);
+ success = zip_builder.AddFile("classes.dex",
+ &dex_file1_data->GetHeader(),
+ dex_file1_data->GetHeader().file_size_);
+ ASSERT_TRUE(success);
+
+ ScratchFile dex_file2;
+ TestDexFileBuilder builder2;
+ builder2.AddField("Land.AnotherTestClass;", "boolean", "someOtherField");
+ builder2.AddMethod("Land.AnotherTestClass;", "()J", "bar");
+ std::unique_ptr<const DexFile> dex_file2_data = builder2.Build(dex_file2.GetFilename());
+ success = dex_file2.GetFile()->WriteFully(&dex_file2_data->GetHeader(),
+ dex_file2_data->GetHeader().file_size_);
+ ASSERT_TRUE(success);
+ success = dex_file2.GetFile()->Flush() == 0;
+ ASSERT_TRUE(success);
+ success = zip_builder.AddFile("classes2.dex",
+ &dex_file2_data->GetHeader(),
+ dex_file2_data->GetHeader().file_size_);
+ ASSERT_TRUE(success);
+
+ success = zip_builder.Finish();
+ ASSERT_TRUE(success) << strerror(errno);
+
+ SafeMap<std::string, std::string> key_value_store;
+ key_value_store.Put(OatHeader::kImageLocationKey, "test.art");
+ {
+ // Test using the AddDexFileSource() interface with the zip file.
+ std::vector<const char*> input_filenames { zip_file.GetFilename().c_str() }; // NOLINT [readability/braces] [4]
+
+ ScratchFile oat_file;
+ success = WriteElf(oat_file.GetFile(), input_filenames, key_value_store);
+ ASSERT_TRUE(success);
+
+ std::string error_msg;
+ std::unique_ptr<OatFile> opened_oat_file(OatFile::Open(oat_file.GetFilename(),
+ oat_file.GetFilename(),
+ nullptr,
+ nullptr,
+ false,
+ nullptr,
+ &error_msg));
+ ASSERT_TRUE(opened_oat_file != nullptr);
+ ASSERT_EQ(2u, opened_oat_file->GetOatDexFiles().size());
+ std::unique_ptr<const DexFile> opened_dex_file1 =
+ opened_oat_file->GetOatDexFiles()[0]->OpenDexFile(&error_msg);
+ std::unique_ptr<const DexFile> opened_dex_file2 =
+ opened_oat_file->GetOatDexFiles()[1]->OpenDexFile(&error_msg);
+
+ ASSERT_EQ(dex_file1_data->GetHeader().file_size_, opened_dex_file1->GetHeader().file_size_);
+ ASSERT_EQ(0, memcmp(&dex_file1_data->GetHeader(),
+ &opened_dex_file1->GetHeader(),
+ dex_file1_data->GetHeader().file_size_));
+ ASSERT_EQ(DexFile::GetMultiDexLocation(0, zip_file.GetFilename().c_str()),
+ opened_dex_file1->GetLocation());
+
+ ASSERT_EQ(dex_file2_data->GetHeader().file_size_, opened_dex_file2->GetHeader().file_size_);
+ ASSERT_EQ(0, memcmp(&dex_file2_data->GetHeader(),
+ &opened_dex_file2->GetHeader(),
+ dex_file2_data->GetHeader().file_size_));
+ ASSERT_EQ(DexFile::GetMultiDexLocation(1, zip_file.GetFilename().c_str()),
+ opened_dex_file2->GetLocation());
+ }
+
+ {
+ // Test using the AddZipDexFileSource() interface with the zip file handle.
+ ScopedFd zip_fd(dup(zip_file.GetFd()));
+ ASSERT_NE(-1, zip_fd.get());
+
+ ScratchFile oat_file;
+ success = WriteElf(oat_file.GetFile(),
+ std::move(zip_fd),
+ zip_file.GetFilename().c_str(),
+ key_value_store);
+ ASSERT_TRUE(success);
+
+ std::string error_msg;
+ std::unique_ptr<OatFile> opened_oat_file(OatFile::Open(oat_file.GetFilename(),
+ oat_file.GetFilename(),
+ nullptr,
+ nullptr,
+ false,
+ nullptr,
+ &error_msg));
+ ASSERT_TRUE(opened_oat_file != nullptr);
+ ASSERT_EQ(2u, opened_oat_file->GetOatDexFiles().size());
+ std::unique_ptr<const DexFile> opened_dex_file1 =
+ opened_oat_file->GetOatDexFiles()[0]->OpenDexFile(&error_msg);
+ std::unique_ptr<const DexFile> opened_dex_file2 =
+ opened_oat_file->GetOatDexFiles()[1]->OpenDexFile(&error_msg);
+
+ ASSERT_EQ(dex_file1_data->GetHeader().file_size_, opened_dex_file1->GetHeader().file_size_);
+ ASSERT_EQ(0, memcmp(&dex_file1_data->GetHeader(),
+ &opened_dex_file1->GetHeader(),
+ dex_file1_data->GetHeader().file_size_));
+ ASSERT_EQ(DexFile::GetMultiDexLocation(0, zip_file.GetFilename().c_str()),
+ opened_dex_file1->GetLocation());
+
+ ASSERT_EQ(dex_file2_data->GetHeader().file_size_, opened_dex_file2->GetHeader().file_size_);
+ ASSERT_EQ(0, memcmp(&dex_file2_data->GetHeader(),
+ &opened_dex_file2->GetHeader(),
+ dex_file2_data->GetHeader().file_size_));
+ ASSERT_EQ(DexFile::GetMultiDexLocation(1, zip_file.GetFilename().c_str()),
+ opened_dex_file2->GetLocation());
+ }
+}
+
} // namespace art
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 025e35e178..c74c41f0c9 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -16,12 +16,14 @@
#include "oat_writer.h"
+#include <unistd.h>
#include <zlib.h>
#include "arch/arm64/instruction_set_features_arm64.h"
#include "art_method-inl.h"
#include "base/allocator.h"
#include "base/bit_vector.h"
+#include "base/file_magic.h"
#include "base/stl_util.h"
#include "base/unix_file/fd_file.h"
#include "class_linker.h"
@@ -49,9 +51,77 @@
#include "type_lookup_table.h"
#include "utils/dex_cache_arrays_layout-inl.h"
#include "verifier/method_verifier.h"
+#include "zip_archive.h"
namespace art {
+namespace { // anonymous namespace
+
+typedef DexFile::Header __attribute__((aligned(1))) UnalignedDexFileHeader;
+
+const UnalignedDexFileHeader* AsUnalignedDexFileHeader(const uint8_t* raw_data) {
+ return reinterpret_cast<const UnalignedDexFileHeader*>(raw_data);
+}
+
+} // anonymous namespace
+
+// Defines the location of the raw dex file to write.
+class OatWriter::DexFileSource {
+ public:
+ explicit DexFileSource(ZipEntry* zip_entry)
+ : type_(kZipEntry), source_(zip_entry) {
+ DCHECK(source_ != nullptr);
+ }
+
+ explicit DexFileSource(File* raw_file)
+ : type_(kRawFile), source_(raw_file) {
+ DCHECK(source_ != nullptr);
+ }
+
+ explicit DexFileSource(const uint8_t* dex_file)
+ : type_(kRawData), source_(dex_file) {
+ DCHECK(source_ != nullptr);
+ }
+
+ bool IsZipEntry() const { return type_ == kZipEntry; }
+ bool IsRawFile() const { return type_ == kRawFile; }
+ bool IsRawData() const { return type_ == kRawData; }
+
+ ZipEntry* GetZipEntry() const {
+ DCHECK(IsZipEntry());
+ DCHECK(source_ != nullptr);
+ return static_cast<ZipEntry*>(const_cast<void*>(source_));
+ }
+
+ File* GetRawFile() const {
+ DCHECK(IsRawFile());
+ DCHECK(source_ != nullptr);
+ return static_cast<File*>(const_cast<void*>(source_));
+ }
+
+ const uint8_t* GetRawData() const {
+ DCHECK(IsRawData());
+ DCHECK(source_ != nullptr);
+ return static_cast<const uint8_t*>(source_);
+ }
+
+ void Clear() {
+ type_ = kNone;
+ source_ = nullptr;
+ }
+
+ private:
+ enum Type {
+ kNone,
+ kZipEntry,
+ kRawFile,
+ kRawData,
+ };
+
+ Type type_;
+ const void* source_;
+};
+
class OatWriter::OatClass {
public:
OatClass(size_t offset,
@@ -116,11 +186,30 @@ class OatWriter::OatClass {
class OatWriter::OatDexFile {
public:
- OatDexFile(size_t offset, const DexFile& dex_file);
+ OatDexFile(const char* dex_file_location,
+ DexFileSource source,
+ CreateTypeLookupTable create_type_lookup_table);
OatDexFile(OatDexFile&& src) = default;
+ const char* GetLocation() const {
+ return dex_file_location_data_;
+ }
+
+ void ReserveTypeLookupTable(OatWriter* oat_writer);
+ void ReserveClassOffsets(OatWriter* oat_writer);
+
size_t SizeOf() const;
- bool Write(OatWriter* oat_writer, OutputStream* out, const size_t file_offset) const;
+ bool Write(OatWriter* oat_writer, OutputStream* out) const;
+ bool WriteClassOffsets(OatWriter* oat_writer, OutputStream* out);
+
+ // The source of the dex file.
+ DexFileSource source_;
+
+ // Whether to create the type lookup table.
+ CreateTypeLookupTable create_type_lookup_table_;
+
+ // Dex file size. Initialized when writing the dex file.
+ size_t dex_file_size_;
// Offset of start of OatDexFile from beginning of OatHeader. It is
// used to validate file position when writing.
@@ -128,11 +217,13 @@ class OatWriter::OatDexFile {
// Data to write.
uint32_t dex_file_location_size_;
- const uint8_t* dex_file_location_data_;
+ const char* dex_file_location_data_;
uint32_t dex_file_location_checksum_;
uint32_t dex_file_offset_;
+ uint32_t class_offsets_offset_;
uint32_t lookup_table_offset_;
- TypeLookupTable* lookup_table_; // Owned by the dex file.
+
+ // Data to write to a separate section.
dchecked_vector<uint32_t> class_offsets_;
private:
@@ -151,26 +242,20 @@ class OatWriter::OatDexFile {
DCHECK_EQ(static_cast<off_t>(file_offset + offset_), out->Seek(0, kSeekCurrent)) \
<< "file_offset=" << file_offset << " offset_=" << offset_
-OatWriter::OatWriter(const std::vector<const DexFile*>& dex_files,
- uint32_t image_file_location_oat_checksum,
- uintptr_t image_file_location_oat_begin,
- int32_t image_patch_delta,
- const CompilerDriver* compiler,
- ImageWriter* image_writer,
- bool compiling_boot_image,
- TimingLogger* timings,
- SafeMap<std::string, std::string>* key_value_store)
- : compiler_driver_(compiler),
- image_writer_(image_writer),
+OatWriter::OatWriter(bool compiling_boot_image, TimingLogger* timings)
+ : write_state_(WriteState::kAddingDexFileSources),
+ timings_(timings),
+ raw_dex_files_(),
+ zip_archives_(),
+ zipped_dex_files_(),
+ zipped_dex_file_locations_(),
+ compiler_driver_(nullptr),
+ image_writer_(nullptr),
compiling_boot_image_(compiling_boot_image),
- dex_files_(&dex_files),
+ dex_files_(nullptr),
size_(0u),
bss_size_(0u),
oat_data_offset_(0u),
- image_file_location_oat_checksum_(image_file_location_oat_checksum),
- image_file_location_oat_begin_(image_file_location_oat_begin),
- image_patch_delta_(image_patch_delta),
- key_value_store_(key_value_store),
oat_header_(nullptr),
size_dex_file_alignment_(0),
size_executable_offset_alignment_(0),
@@ -197,55 +282,192 @@ OatWriter::OatWriter(const std::vector<const DexFile*>& dex_files,
size_oat_dex_file_location_data_(0),
size_oat_dex_file_location_checksum_(0),
size_oat_dex_file_offset_(0),
+ size_oat_dex_file_class_offsets_offset_(0),
size_oat_dex_file_lookup_table_offset_(0),
- size_oat_dex_file_class_offsets_(0),
size_oat_lookup_table_alignment_(0),
size_oat_lookup_table_(0),
+ size_oat_class_offsets_alignment_(0),
+ size_oat_class_offsets_(0),
size_oat_class_type_(0),
size_oat_class_status_(0),
size_oat_class_method_bitmaps_(0),
size_oat_class_method_offsets_(0),
method_offset_map_() {
- CHECK(key_value_store != nullptr);
- if (compiling_boot_image) {
- CHECK(image_writer != nullptr);
+}
+
+bool OatWriter::AddDexFileSource(const char* filename,
+ const char* location,
+ CreateTypeLookupTable create_type_lookup_table) {
+ DCHECK(write_state_ == WriteState::kAddingDexFileSources);
+ uint32_t magic;
+ std::string error_msg;
+ ScopedFd fd(OpenAndReadMagic(filename, &magic, &error_msg));
+ if (fd.get() == -1) {
+ PLOG(ERROR) << "Failed to read magic number from dex file: '" << filename << "'";
+ return false;
+ } else if (IsDexMagic(magic)) {
+ // The file is open for reading, not writing, so it's OK to let the File destructor
+ // close it without checking for explicit Close(), so pass checkUsage = false.
+ raw_dex_files_.emplace_back(new File(fd.release(), location, /* checkUsage */ false));
+ oat_dex_files_.emplace_back(location,
+ DexFileSource(raw_dex_files_.back().get()),
+ create_type_lookup_table);
+ } else if (IsZipMagic(magic)) {
+ if (!AddZippedDexFilesSource(std::move(fd), location, create_type_lookup_table)) {
+ return false;
+ }
+ } else {
+ LOG(ERROR) << "Expected valid zip or dex file: '" << filename << "'";
+ return false;
}
- InstructionSet instruction_set = compiler_driver_->GetInstructionSet();
- const InstructionSetFeatures* features = compiler_driver_->GetInstructionSetFeatures();
- relative_patcher_ = linker::RelativePatcher::Create(instruction_set, features,
- &method_offset_map_);
+ return true;
+}
- size_t offset;
- {
- TimingLogger::ScopedTiming split("InitOatHeader", timings);
- offset = InitOatHeader();
+// Add dex file source(s) from a zip file specified by a file handle.
+bool OatWriter::AddZippedDexFilesSource(ScopedFd&& zip_fd,
+ const char* location,
+ CreateTypeLookupTable create_type_lookup_table) {
+ DCHECK(write_state_ == WriteState::kAddingDexFileSources);
+ std::string error_msg;
+ zip_archives_.emplace_back(ZipArchive::OpenFromFd(zip_fd.release(), location, &error_msg));
+ ZipArchive* zip_archive = zip_archives_.back().get();
+ if (zip_archive == nullptr) {
+ LOG(ERROR) << "Failed to open zip from file descriptor for '" << location << "': "
+ << error_msg;
+ return false;
}
- {
- TimingLogger::ScopedTiming split("InitOatDexFiles", timings);
- offset = InitOatDexFiles(offset);
+ for (size_t i = 0; ; ++i) {
+ std::string entry_name = DexFile::GetMultiDexClassesDexName(i);
+ std::unique_ptr<ZipEntry> entry(zip_archive->Find(entry_name.c_str(), &error_msg));
+ if (entry == nullptr) {
+ break;
+ }
+ zipped_dex_files_.push_back(std::move(entry));
+ zipped_dex_file_locations_.push_back(DexFile::GetMultiDexLocation(i, location));
+ const char* full_location = zipped_dex_file_locations_.back().c_str();
+ oat_dex_files_.emplace_back(full_location,
+ DexFileSource(zipped_dex_files_.back().get()),
+ create_type_lookup_table);
+ }
+ if (zipped_dex_file_locations_.empty()) {
+ LOG(ERROR) << "No dex files in zip file '" << location << "': " << error_msg;
+ return false;
}
- {
- TimingLogger::ScopedTiming split("InitDexFiles", timings);
- offset = InitDexFiles(offset);
+ return true;
+}
+
+// Add dex file source from raw memory.
+bool OatWriter::AddRawDexFileSource(const ArrayRef<const uint8_t>& data,
+ const char* location,
+ uint32_t location_checksum,
+ CreateTypeLookupTable create_type_lookup_table) {
+ DCHECK(write_state_ == WriteState::kAddingDexFileSources);
+ if (data.size() < sizeof(DexFile::Header)) {
+ LOG(ERROR) << "Provided data is shorter than dex file header. size: "
+ << data.size() << " File: " << location;
+ return false;
}
- {
- TimingLogger::ScopedTiming split("InitLookupTables", timings);
- offset = InitLookupTables(offset);
+ if (!ValidateDexFileHeader(data.data(), location)) {
+ return false;
+ }
+ const UnalignedDexFileHeader* header = AsUnalignedDexFileHeader(data.data());
+ if (data.size() < header->file_size_) {
+ LOG(ERROR) << "Truncated dex file data. Data size: " << data.size()
+ << " file size from header: " << header->file_size_ << " File: " << location;
+ return false;
+ }
+
+ oat_dex_files_.emplace_back(location, DexFileSource(data.data()), create_type_lookup_table);
+ oat_dex_files_.back().dex_file_location_checksum_ = location_checksum;
+ return true;
+}
+
+dchecked_vector<const char*> OatWriter::GetSourceLocations() const {
+ dchecked_vector<const char*> locations;
+ locations.reserve(oat_dex_files_.size());
+ for (const OatDexFile& oat_dex_file : oat_dex_files_) {
+ locations.push_back(oat_dex_file.GetLocation());
}
+ return locations;
+}
+
+bool OatWriter::WriteAndOpenDexFiles(
+ OutputStream* rodata,
+ File* file,
+ InstructionSet instruction_set,
+ const InstructionSetFeatures* instruction_set_features,
+ SafeMap<std::string, std::string>* key_value_store,
+ /*out*/ std::unique_ptr<MemMap>* opened_dex_files_map,
+ /*out*/ std::vector<std::unique_ptr<const DexFile>>* opened_dex_files) {
+ CHECK(write_state_ == WriteState::kAddingDexFileSources);
+
+ size_t offset = InitOatHeader(instruction_set,
+ instruction_set_features,
+ dchecked_integral_cast<uint32_t>(oat_dex_files_.size()),
+ key_value_store);
+ offset = InitOatDexFiles(offset);
+ size_ = offset;
+
+ std::unique_ptr<MemMap> dex_files_map;
+ std::vector<std::unique_ptr<const DexFile>> dex_files;
+ if (!WriteDexFiles(rodata, file)) {
+ return false;
+ }
+ // Reserve space for type lookup tables and update type_lookup_table_offset_.
+ for (OatDexFile& oat_dex_file : oat_dex_files_) {
+ oat_dex_file.ReserveTypeLookupTable(this);
+ }
+ size_t size_after_type_lookup_tables = size_;
+ // Reserve space for class offsets and update class_offsets_offset_.
+ for (OatDexFile& oat_dex_file : oat_dex_files_) {
+ oat_dex_file.ReserveClassOffsets(this);
+ }
+ if (!WriteOatDexFiles(rodata) ||
+ !ExtendForTypeLookupTables(rodata, file, size_after_type_lookup_tables) ||
+ !OpenDexFiles(file, &dex_files_map, &dex_files) ||
+ !WriteTypeLookupTables(dex_files_map.get(), dex_files)) {
+ return false;
+ }
+
+ *opened_dex_files_map = std::move(dex_files_map);
+ *opened_dex_files = std::move(dex_files);
+ write_state_ = WriteState::kPrepareLayout;
+ return true;
+}
+
+void OatWriter::PrepareLayout(const CompilerDriver* compiler,
+ ImageWriter* image_writer,
+ const std::vector<const DexFile*>& dex_files) {
+ CHECK(write_state_ == WriteState::kPrepareLayout);
+
+ dex_files_ = &dex_files;
+
+ compiler_driver_ = compiler;
+ image_writer_ = image_writer;
+ if (compiling_boot_image_) {
+ CHECK(image_writer_ != nullptr);
+ }
+ InstructionSet instruction_set = compiler_driver_->GetInstructionSet();
+ CHECK_EQ(instruction_set, oat_header_->GetInstructionSet());
+ const InstructionSetFeatures* features = compiler_driver_->GetInstructionSetFeatures();
+ relative_patcher_ = linker::RelativePatcher::Create(instruction_set, features,
+ &method_offset_map_);
+
+ uint32_t offset = size_;
{
- TimingLogger::ScopedTiming split("InitOatClasses", timings);
+ TimingLogger::ScopedTiming split("InitOatClasses", timings_);
offset = InitOatClasses(offset);
}
{
- TimingLogger::ScopedTiming split("InitOatMaps", timings);
+ TimingLogger::ScopedTiming split("InitOatMaps", timings_);
offset = InitOatMaps(offset);
}
{
- TimingLogger::ScopedTiming split("InitOatCode", timings);
+ TimingLogger::ScopedTiming split("InitOatCode", timings_);
offset = InitOatCode(offset);
}
{
- TimingLogger::ScopedTiming split("InitOatCodeDexFiles", timings);
+ TimingLogger::ScopedTiming split("InitOatCodeDexFiles", timings_);
offset = InitOatCodeDexFiles(offset);
}
size_ = offset;
@@ -255,7 +477,7 @@ OatWriter::OatWriter(const std::vector<const DexFile*>& dex_files,
size_t bss_start = RoundUp(size_, kPageSize);
size_t pointer_size = GetInstructionSetPointerSize(instruction_set);
bss_size_ = 0u;
- for (const DexFile* dex_file : dex_files) {
+ for (const DexFile* dex_file : *dex_files_) {
dex_cache_arrays_offsets_.Put(dex_file, bss_start + bss_size_);
DexCacheArraysLayout layout(pointer_size, dex_file);
bss_size_ += layout.Size();
@@ -265,9 +487,10 @@ OatWriter::OatWriter(const std::vector<const DexFile*>& dex_files,
CHECK_EQ(dex_files_->size(), oat_dex_files_.size());
if (compiling_boot_image_) {
CHECK_EQ(image_writer_ != nullptr,
- key_value_store_->find(OatHeader::kImageLocationKey) == key_value_store_->end());
+ oat_header_->GetStoreValueByKey(OatHeader::kImageLocationKey) == nullptr);
}
- CHECK_ALIGNED(image_patch_delta_, kPageSize);
+
+ write_state_ = WriteState::kWriteRoData;
}
OatWriter::~OatWriter() {
@@ -1134,59 +1357,26 @@ bool OatWriter::VisitDexMethods(DexMethodVisitor* visitor) {
return true;
}
-size_t OatWriter::InitOatHeader() {
- oat_header_.reset(OatHeader::Create(compiler_driver_->GetInstructionSet(),
- compiler_driver_->GetInstructionSetFeatures(),
- dchecked_integral_cast<uint32_t>(dex_files_->size()),
- key_value_store_));
- oat_header_->SetImageFileLocationOatChecksum(image_file_location_oat_checksum_);
- oat_header_->SetImageFileLocationOatDataBegin(image_file_location_oat_begin_);
-
+size_t OatWriter::InitOatHeader(InstructionSet instruction_set,
+ const InstructionSetFeatures* instruction_set_features,
+ uint32_t num_dex_files,
+ SafeMap<std::string, std::string>* key_value_store) {
+ TimingLogger::ScopedTiming split("InitOatHeader", timings_);
+ oat_header_.reset(OatHeader::Create(instruction_set,
+ instruction_set_features,
+ num_dex_files,
+ key_value_store));
+ size_oat_header_ += sizeof(OatHeader);
+ size_oat_header_key_value_store_ += oat_header_->GetHeaderSize() - sizeof(OatHeader);
return oat_header_->GetHeaderSize();
}
size_t OatWriter::InitOatDexFiles(size_t offset) {
- // create the OatDexFiles
- for (size_t i = 0; i != dex_files_->size(); ++i) {
- const DexFile* dex_file = (*dex_files_)[i];
- CHECK(dex_file != nullptr);
- oat_dex_files_.emplace_back(offset, *dex_file);
- offset += oat_dex_files_.back().SizeOf();
- }
- return offset;
-}
-
-size_t OatWriter::InitDexFiles(size_t offset) {
- // calculate the offsets within OatDexFiles to the DexFiles
- for (size_t i = 0; i != dex_files_->size(); ++i) {
- // dex files are required to be 4 byte aligned
- size_t original_offset = offset;
- offset = RoundUp(offset, 4);
- size_dex_file_alignment_ += offset - original_offset;
-
- // set offset in OatDexFile to DexFile
- oat_dex_files_[i].dex_file_offset_ = offset;
-
- const DexFile* dex_file = (*dex_files_)[i];
-
- // Initialize type lookup table
- oat_dex_files_[i].lookup_table_ = dex_file->GetTypeLookupTable();
-
- offset += dex_file->GetHeader().file_size_;
- }
- return offset;
-}
-
-size_t OatWriter::InitLookupTables(size_t offset) {
+ TimingLogger::ScopedTiming split("InitOatDexFiles", timings_);
+ // Initialize offsets of dex files.
for (OatDexFile& oat_dex_file : oat_dex_files_) {
- if (oat_dex_file.lookup_table_ != nullptr) {
- uint32_t aligned_offset = RoundUp(offset, 4);
- oat_dex_file.lookup_table_offset_ = aligned_offset;
- size_oat_lookup_table_alignment_ += aligned_offset - offset;
- offset = aligned_offset + oat_dex_file.lookup_table_->RawDataLength();
- } else {
- oat_dex_file.lookup_table_offset_ = 0;
- }
+ oat_dex_file.offset_ = offset;
+ offset += oat_dex_file.SizeOf();
}
return offset;
}
@@ -1239,7 +1429,6 @@ size_t OatWriter::InitOatCode(size_t offset) {
oat_header_->SetExecutableOffset(offset);
size_executable_offset_alignment_ = offset - old_offset;
if (compiler_driver_->IsBootImage()) {
- CHECK_EQ(image_patch_delta_, 0);
InstructionSet instruction_set = compiler_driver_->GetInstructionSet();
#define DO_TRAMPOLINE(field, fn_name) \
@@ -1264,7 +1453,6 @@ size_t OatWriter::InitOatCode(size_t offset) {
oat_header_->SetQuickImtConflictTrampolineOffset(0);
oat_header_->SetQuickResolutionTrampolineOffset(0);
oat_header_->SetQuickToInterpreterBridgeOffset(0);
- oat_header_->SetImagePatchDelta(image_patch_delta_);
}
return offset;
}
@@ -1289,22 +1477,15 @@ size_t OatWriter::InitOatCodeDexFiles(size_t offset) {
}
bool OatWriter::WriteRodata(OutputStream* out) {
- if (!GetOatDataOffset(out)) {
- return false;
- }
- const size_t file_offset = oat_data_offset_;
+ CHECK(write_state_ == WriteState::kWriteRoData);
- // Reserve space for header. It will be written last - after updating the checksum.
- size_t header_size = oat_header_->GetHeaderSize();
- if (out->Seek(header_size, kSeekCurrent) == static_cast<off_t>(-1)) {
- PLOG(ERROR) << "Failed to reserve space for oat header in " << out->GetLocation();
+ if (!WriteClassOffsets(out)) {
+ LOG(ERROR) << "Failed to write class offsets to " << out->GetLocation();
return false;
}
- size_oat_header_ += sizeof(OatHeader);
- size_oat_header_key_value_store_ += oat_header_->GetHeaderSize() - sizeof(OatHeader);
- if (!WriteTables(out, file_offset)) {
- LOG(ERROR) << "Failed to write oat tables to " << out->GetLocation();
+ if (!WriteClasses(out)) {
+ LOG(ERROR) << "Failed to write classes to " << out->GetLocation();
return false;
}
@@ -1313,6 +1494,7 @@ bool OatWriter::WriteRodata(OutputStream* out) {
LOG(ERROR) << "Failed to seek to oat code position in " << out->GetLocation();
return false;
}
+ size_t file_offset = oat_data_offset_;
size_t relative_offset = static_cast<size_t>(tables_end_offset) - file_offset;
relative_offset = WriteMaps(out, file_offset, relative_offset);
if (relative_offset == 0) {
@@ -1332,11 +1514,13 @@ bool OatWriter::WriteRodata(OutputStream* out) {
}
DCHECK_OFFSET();
+ write_state_ = WriteState::kWriteText;
return true;
}
bool OatWriter::WriteCode(OutputStream* out) {
- size_t header_size = oat_header_->GetHeaderSize();
+ CHECK(write_state_ == WriteState::kWriteText);
+
const size_t file_offset = oat_data_offset_;
size_t relative_offset = oat_header_->GetExecutableOffset();
DCHECK_OFFSET();
@@ -1390,10 +1574,12 @@ bool OatWriter::WriteCode(OutputStream* out) {
DO_STAT(size_oat_dex_file_location_data_);
DO_STAT(size_oat_dex_file_location_checksum_);
DO_STAT(size_oat_dex_file_offset_);
+ DO_STAT(size_oat_dex_file_class_offsets_offset_);
DO_STAT(size_oat_dex_file_lookup_table_offset_);
- DO_STAT(size_oat_dex_file_class_offsets_);
DO_STAT(size_oat_lookup_table_alignment_);
DO_STAT(size_oat_lookup_table_);
+ DO_STAT(size_oat_class_offsets_alignment_);
+ DO_STAT(size_oat_class_offsets_);
DO_STAT(size_oat_class_type_);
DO_STAT(size_oat_class_status_);
DO_STAT(size_oat_class_method_bitmaps_);
@@ -1408,89 +1594,91 @@ bool OatWriter::WriteCode(OutputStream* out) {
CHECK_EQ(file_offset + size_, static_cast<size_t>(oat_end_file_offset));
CHECK_EQ(size_, relative_offset);
- // Finalize the header checksum.
+ write_state_ = WriteState::kWriteHeader;
+ return true;
+}
+
+bool OatWriter::WriteHeader(OutputStream* out,
+ uint32_t image_file_location_oat_checksum,
+ uintptr_t image_file_location_oat_begin,
+ int32_t image_patch_delta) {
+ CHECK(write_state_ == WriteState::kWriteHeader);
+
+ oat_header_->SetImageFileLocationOatChecksum(image_file_location_oat_checksum);
+ oat_header_->SetImageFileLocationOatDataBegin(image_file_location_oat_begin);
+ if (compiler_driver_->IsBootImage()) {
+ CHECK_EQ(image_patch_delta, 0);
+ CHECK_EQ(oat_header_->GetImagePatchDelta(), 0);
+ } else {
+ CHECK_ALIGNED(image_patch_delta, kPageSize);
+ oat_header_->SetImagePatchDelta(image_patch_delta);
+ }
oat_header_->UpdateChecksumWithHeaderData();
- // Write the header now that the checksum is final.
+ const size_t file_offset = oat_data_offset_;
+
+ off_t current_offset = out->Seek(0, kSeekCurrent);
+ if (current_offset == static_cast<off_t>(-1)) {
+ PLOG(ERROR) << "Failed to get current offset from " << out->GetLocation();
+ return false;
+ }
if (out->Seek(file_offset, kSeekSet) == static_cast<off_t>(-1)) {
PLOG(ERROR) << "Failed to seek to oat header position in " << out->GetLocation();
return false;
}
DCHECK_EQ(file_offset, static_cast<size_t>(out->Seek(0, kSeekCurrent)));
+
+ // Flush all other data before writing the header.
+ if (!out->Flush()) {
+ PLOG(ERROR) << "Failed to flush before writing oat header to " << out->GetLocation();
+ return false;
+ }
+ // Write the header.
+ size_t header_size = oat_header_->GetHeaderSize();
if (!out->WriteFully(oat_header_.get(), header_size)) {
PLOG(ERROR) << "Failed to write oat header to " << out->GetLocation();
return false;
}
- if (out->Seek(oat_end_file_offset, kSeekSet) == static_cast<off_t>(-1)) {
- PLOG(ERROR) << "Failed to seek to end after writing oat header to " << out->GetLocation();
+ // Flush the header data.
+ if (!out->Flush()) {
+ PLOG(ERROR) << "Failed to flush after writing oat header to " << out->GetLocation();
+ return false;
+ }
+
+ if (out->Seek(current_offset, kSeekSet) == static_cast<off_t>(-1)) {
+ PLOG(ERROR) << "Failed to seek back after writing oat header to " << out->GetLocation();
return false;
}
- DCHECK_EQ(oat_end_file_offset, out->Seek(0, kSeekCurrent));
+ DCHECK_EQ(current_offset, out->Seek(0, kSeekCurrent));
+ write_state_ = WriteState::kDone;
return true;
}
-bool OatWriter::WriteTables(OutputStream* out, const size_t file_offset) {
- for (size_t i = 0; i != oat_dex_files_.size(); ++i) {
- if (!oat_dex_files_[i].Write(this, out, file_offset)) {
- PLOG(ERROR) << "Failed to write oat dex information to " << out->GetLocation();
- return false;
- }
- }
- for (size_t i = 0; i != oat_dex_files_.size(); ++i) {
- uint32_t expected_offset = file_offset + oat_dex_files_[i].dex_file_offset_;
- off_t actual_offset = out->Seek(expected_offset, kSeekSet);
- if (static_cast<uint32_t>(actual_offset) != expected_offset) {
- const DexFile* dex_file = (*dex_files_)[i];
- PLOG(ERROR) << "Failed to seek to dex file section. Actual: " << actual_offset
- << " Expected: " << expected_offset << " File: " << dex_file->GetLocation();
- return false;
- }
- const DexFile* dex_file = (*dex_files_)[i];
- if (!out->WriteFully(&dex_file->GetHeader(), dex_file->GetHeader().file_size_)) {
- PLOG(ERROR) << "Failed to write dex file " << dex_file->GetLocation()
- << " to " << out->GetLocation();
- return false;
- }
- size_dex_file_ += dex_file->GetHeader().file_size_;
- }
- if (!WriteLookupTables(out, file_offset)) {
- return false;
- }
- for (size_t i = 0; i != oat_classes_.size(); ++i) {
- if (!oat_classes_[i].Write(this, out, file_offset)) {
- PLOG(ERROR) << "Failed to write oat methods information to " << out->GetLocation();
- return false;
+bool OatWriter::WriteClassOffsets(OutputStream* out) {
+ for (OatDexFile& oat_dex_file : oat_dex_files_) {
+ if (oat_dex_file.class_offsets_offset_ != 0u) {
+ uint32_t expected_offset = oat_data_offset_ + oat_dex_file.class_offsets_offset_;
+ off_t actual_offset = out->Seek(expected_offset, kSeekSet);
+ if (static_cast<uint32_t>(actual_offset) != expected_offset) {
+ PLOG(ERROR) << "Failed to seek to oat class offsets section. Actual: " << actual_offset
+ << " Expected: " << expected_offset << " File: " << oat_dex_file.GetLocation();
+ return false;
+ }
+ if (!oat_dex_file.WriteClassOffsets(this, out)) {
+ return false;
+ }
}
}
return true;
}
-bool OatWriter::WriteLookupTables(OutputStream* out, const size_t file_offset) {
- for (size_t i = 0; i < oat_dex_files_.size(); ++i) {
- const uint32_t lookup_table_offset = oat_dex_files_[i].lookup_table_offset_;
- const TypeLookupTable* table = oat_dex_files_[i].lookup_table_;
- DCHECK_EQ(lookup_table_offset == 0, table == nullptr);
- if (lookup_table_offset == 0) {
- continue;
- }
- const uint32_t expected_offset = file_offset + lookup_table_offset;
- off_t actual_offset = out->Seek(expected_offset, kSeekSet);
- if (static_cast<uint32_t>(actual_offset) != expected_offset) {
- const DexFile* dex_file = (*dex_files_)[i];
- PLOG(ERROR) << "Failed to seek to lookup table section. Actual: " << actual_offset
- << " Expected: " << expected_offset << " File: " << dex_file->GetLocation();
+bool OatWriter::WriteClasses(OutputStream* out) {
+ for (OatClass& oat_class : oat_classes_) {
+ if (!oat_class.Write(this, out, oat_data_offset_)) {
+ PLOG(ERROR) << "Failed to write oat methods information to " << out->GetLocation();
return false;
}
- if (table != nullptr) {
- if (!WriteData(out, table->RawData(), table->RawDataLength())) {
- const DexFile* dex_file = (*dex_files_)[i];
- PLOG(ERROR) << "Failed to write lookup table for " << dex_file->GetLocation()
- << " to " << out->GetLocation();
- return false;
- }
- size_oat_lookup_table_ += table->RawDataLength();
- }
}
return true;
}
@@ -1585,6 +1773,455 @@ bool OatWriter::GetOatDataOffset(OutputStream* out) {
return true;
}
+bool OatWriter::ReadDexFileHeader(File* file, OatDexFile* oat_dex_file) {
+ // Read the dex file header and perform minimal verification.
+ uint8_t raw_header[sizeof(DexFile::Header)];
+ if (!file->ReadFully(&raw_header, sizeof(DexFile::Header))) {
+ PLOG(ERROR) << "Failed to read dex file header. Actual: "
+ << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+ return false;
+ }
+ if (!ValidateDexFileHeader(raw_header, oat_dex_file->GetLocation())) {
+ return false;
+ }
+
+ const UnalignedDexFileHeader* header = AsUnalignedDexFileHeader(raw_header);
+ oat_dex_file->dex_file_size_ = header->file_size_;
+ oat_dex_file->dex_file_location_checksum_ = header->checksum_;
+ oat_dex_file->class_offsets_.resize(header->class_defs_size_);
+ return true;
+}
+
+bool OatWriter::ValidateDexFileHeader(const uint8_t* raw_header, const char* location) {
+ if (!DexFile::IsMagicValid(raw_header)) {
+ LOG(ERROR) << "Invalid magic number in dex file header. " << " File: " << location;
+ return false;
+ }
+ if (!DexFile::IsVersionValid(raw_header)) {
+ LOG(ERROR) << "Invalid version number in dex file header. " << " File: " << location;
+ return false;
+ }
+ const UnalignedDexFileHeader* header = AsUnalignedDexFileHeader(raw_header);
+ if (header->file_size_ < sizeof(DexFile::Header)) {
+ LOG(ERROR) << "Dex file header specifies file size insufficient to contain the header."
+ << " File: " << location;
+ return false;
+ }
+ return true;
+}
+
+bool OatWriter::WriteDexFiles(OutputStream* rodata, File* file) {
+ TimingLogger::ScopedTiming split("WriteDexFiles", timings_);
+
+ // Get the elf file offset of the oat file.
+ if (!GetOatDataOffset(rodata)) {
+ return false;
+ }
+
+ // Write dex files.
+ for (OatDexFile& oat_dex_file : oat_dex_files_) {
+ if (!WriteDexFile(rodata, file, &oat_dex_file)) {
+ return false;
+ }
+ }
+
+ // Close sources.
+ for (OatDexFile& oat_dex_file : oat_dex_files_) {
+ oat_dex_file.source_.Clear(); // Get rid of the reference, it's about to be invalidated.
+ }
+ zipped_dex_files_.clear();
+ zip_archives_.clear();
+ raw_dex_files_.clear();
+ return true;
+}
+
+bool OatWriter::WriteDexFile(OutputStream* rodata, File* file, OatDexFile* oat_dex_file) {
+ if (!SeekToDexFile(rodata, file, oat_dex_file)) {
+ return false;
+ }
+ if (oat_dex_file->source_.IsZipEntry()) {
+ if (!WriteDexFile(rodata, file, oat_dex_file, oat_dex_file->source_.GetZipEntry())) {
+ return false;
+ }
+ } else if (oat_dex_file->source_.IsRawFile()) {
+ if (!WriteDexFile(rodata, file, oat_dex_file, oat_dex_file->source_.GetRawFile())) {
+ return false;
+ }
+ } else {
+ DCHECK(oat_dex_file->source_.IsRawData());
+ if (!WriteDexFile(rodata, oat_dex_file, oat_dex_file->source_.GetRawData())) {
+ return false;
+ }
+ }
+
+ // Update current size and account for the written data.
+ DCHECK_EQ(size_, oat_dex_file->dex_file_offset_);
+ size_ += oat_dex_file->dex_file_size_;
+ size_dex_file_ += oat_dex_file->dex_file_size_;
+ return true;
+}
+
+bool OatWriter::SeekToDexFile(OutputStream* out, File* file, OatDexFile* oat_dex_file) {
+ // Dex files are required to be 4 byte aligned.
+ size_t original_offset = size_;
+ size_t offset = RoundUp(original_offset, 4);
+ size_dex_file_alignment_ += offset - original_offset;
+
+ // Seek to the start of the dex file and flush any pending operations in the stream.
+ // Verify that, after flushing the stream, the file is at the same offset as the stream.
+ uint32_t start_offset = oat_data_offset_ + offset;
+ off_t actual_offset = out->Seek(start_offset, kSeekSet);
+ if (actual_offset != static_cast<off_t>(start_offset)) {
+ PLOG(ERROR) << "Failed to seek to dex file section. Actual: " << actual_offset
+ << " Expected: " << start_offset
+ << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+ return false;
+ }
+ if (!out->Flush()) {
+ PLOG(ERROR) << "Failed to flush before writing dex file."
+ << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+ return false;
+ }
+ actual_offset = lseek(file->Fd(), 0, SEEK_CUR);
+ if (actual_offset != static_cast<off_t>(start_offset)) {
+ PLOG(ERROR) << "Stream/file position mismatch! Actual: " << actual_offset
+ << " Expected: " << start_offset
+ << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+ return false;
+ }
+
+ size_ = offset;
+ oat_dex_file->dex_file_offset_ = offset;
+ return true;
+}
+
+bool OatWriter::WriteDexFile(OutputStream* rodata,
+ File* file,
+ OatDexFile* oat_dex_file,
+ ZipEntry* dex_file) {
+ size_t start_offset = oat_data_offset_ + size_;
+ DCHECK_EQ(static_cast<off_t>(start_offset), rodata->Seek(0, kSeekCurrent));
+
+ // Extract the dex file and get the extracted size.
+ std::string error_msg;
+ if (!dex_file->ExtractToFile(*file, &error_msg)) {
+ LOG(ERROR) << "Failed to extract dex file from ZIP entry: " << error_msg
+ << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+ return false;
+ }
+ if (file->Flush() != 0) {
+ PLOG(ERROR) << "Failed to flush dex file from ZIP entry."
+ << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+ return false;
+ }
+ off_t extracted_end = lseek(file->Fd(), 0, SEEK_CUR);
+ if (extracted_end == static_cast<off_t>(-1)) {
+ PLOG(ERROR) << "Failed get end offset after writing dex file from ZIP entry."
+ << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+ return false;
+ }
+ if (extracted_end < static_cast<off_t>(start_offset)) {
+ LOG(ERROR) << "Dex file end position is before start position! End: " << extracted_end
+ << " Start: " << start_offset
+ << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+ return false;
+ }
+ uint64_t extracted_size = static_cast<uint64_t>(extracted_end - start_offset);
+ if (extracted_size < sizeof(DexFile::Header)) {
+ LOG(ERROR) << "Extracted dex file is shorter than dex file header. size: "
+ << extracted_size << " File: " << oat_dex_file->GetLocation();
+ return false;
+ }
+
+ // Read the dex file header and extract required data to OatDexFile.
+ off_t actual_offset = lseek(file->Fd(), start_offset, SEEK_SET);
+ if (actual_offset != static_cast<off_t>(start_offset)) {
+ PLOG(ERROR) << "Failed to seek back to dex file header. Actual: " << actual_offset
+ << " Expected: " << start_offset
+ << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+ return false;
+ }
+ if (!ReadDexFileHeader(file, oat_dex_file)) {
+ return false;
+ }
+ if (extracted_size < oat_dex_file->dex_file_size_) {
+ LOG(ERROR) << "Extracted truncated dex file. Extracted size: " << extracted_size
+ << " file size from header: " << oat_dex_file->dex_file_size_
+ << " File: " << oat_dex_file->GetLocation();
+ return false;
+ }
+
+ // Override the checksum from header with the CRC from ZIP entry.
+ oat_dex_file->dex_file_location_checksum_ = dex_file->GetCrc32();
+
+ // Seek both file and stream to the end offset.
+ size_t end_offset = start_offset + oat_dex_file->dex_file_size_;
+ actual_offset = lseek(file->Fd(), end_offset, SEEK_SET);
+ if (actual_offset != static_cast<off_t>(end_offset)) {
+ PLOG(ERROR) << "Failed to seek to end of dex file. Actual: " << actual_offset
+ << " Expected: " << end_offset
+ << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+ return false;
+ }
+ actual_offset = rodata->Seek(end_offset, kSeekSet);
+ if (actual_offset != static_cast<off_t>(end_offset)) {
+ PLOG(ERROR) << "Failed to seek stream to end of dex file. Actual: " << actual_offset
+ << " Expected: " << end_offset << " File: " << oat_dex_file->GetLocation();
+ return false;
+ }
+ if (!rodata->Flush()) {
+ PLOG(ERROR) << "Failed to flush stream after seeking over dex file."
+ << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+ return false;
+ }
+
+ // If we extracted more than the size specified in the header, truncate the file.
+ if (extracted_size > oat_dex_file->dex_file_size_) {
+ if (file->SetLength(end_offset) != 0) {
+ PLOG(ERROR) << "Failed to truncate excessive dex file length."
+ << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool OatWriter::WriteDexFile(OutputStream* rodata,
+ File* file,
+ OatDexFile* oat_dex_file,
+ File* dex_file) {
+ size_t start_offset = oat_data_offset_ + size_;
+ DCHECK_EQ(static_cast<off_t>(start_offset), rodata->Seek(0, kSeekCurrent));
+
+ off_t input_offset = lseek(dex_file->Fd(), 0, SEEK_SET);
+ if (input_offset != static_cast<off_t>(0)) {
+ PLOG(ERROR) << "Failed to seek to dex file header. Actual: " << input_offset
+ << " Expected: 0"
+ << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+ return false;
+ }
+ if (!ReadDexFileHeader(dex_file, oat_dex_file)) {
+ return false;
+ }
+
+ // Copy the input dex file using sendfile().
+ if (!file->Copy(dex_file, 0, oat_dex_file->dex_file_size_)) {
+ PLOG(ERROR) << "Failed to copy dex file to oat file."
+ << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+ return false;
+ }
+ if (file->Flush() != 0) {
+ PLOG(ERROR) << "Failed to flush dex file."
+ << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+ return false;
+ }
+
+ // Check file position and seek the stream to the end offset.
+ size_t end_offset = start_offset + oat_dex_file->dex_file_size_;
+ off_t actual_offset = lseek(file->Fd(), 0, SEEK_CUR);
+ if (actual_offset != static_cast<off_t>(end_offset)) {
+ PLOG(ERROR) << "Unexpected file position after copying dex file. Actual: " << actual_offset
+ << " Expected: " << end_offset
+ << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+ return false;
+ }
+ actual_offset = rodata->Seek(end_offset, kSeekSet);
+ if (actual_offset != static_cast<off_t>(end_offset)) {
+ PLOG(ERROR) << "Failed to seek stream to end of dex file. Actual: " << actual_offset
+ << " Expected: " << end_offset << " File: " << oat_dex_file->GetLocation();
+ return false;
+ }
+ if (!rodata->Flush()) {
+ PLOG(ERROR) << "Failed to flush stream after seeking over dex file."
+ << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
+ return false;
+ }
+
+ return true;
+}
+
+bool OatWriter::WriteDexFile(OutputStream* rodata,
+ OatDexFile* oat_dex_file,
+ const uint8_t* dex_file) {
+ // Note: The raw data has already been checked to contain the header
+ // and all the data that the header specifies as the file size.
+ DCHECK(dex_file != nullptr);
+ DCHECK(ValidateDexFileHeader(dex_file, oat_dex_file->GetLocation()));
+ const UnalignedDexFileHeader* header = AsUnalignedDexFileHeader(dex_file);
+
+ if (!rodata->WriteFully(dex_file, header->file_size_)) {
+ PLOG(ERROR) << "Failed to write dex file " << oat_dex_file->GetLocation()
+ << " to " << rodata->GetLocation();
+ return false;
+ }
+ if (!rodata->Flush()) {
+ PLOG(ERROR) << "Failed to flush stream after writing dex file."
+ << " File: " << oat_dex_file->GetLocation();
+ return false;
+ }
+
+ // Update dex file size and resize class offsets in the OatDexFile.
+ // Note: For raw data, the checksum is passed directly to AddRawDexFileSource().
+ oat_dex_file->dex_file_size_ = header->file_size_;
+ oat_dex_file->class_offsets_.resize(header->class_defs_size_);
+ return true;
+}
+
+bool OatWriter::WriteOatDexFiles(OutputStream* rodata) {
+ TimingLogger::ScopedTiming split("WriteOatDexFiles", timings_);
+
+ // Seek to the start of OatDexFiles, i.e. to the end of the OatHeader. If there are
+ // no OatDexFiles, no data is actually written to .rodata before WriteHeader() and
+ // this Seek() ensures that we reserve the space for OatHeader in .rodata.
+ DCHECK(oat_dex_files_.empty() || oat_dex_files_[0u].offset_ == oat_header_->GetHeaderSize());
+ uint32_t expected_offset = oat_data_offset_ + oat_header_->GetHeaderSize();
+ off_t actual_offset = rodata->Seek(expected_offset, kSeekSet);
+ if (static_cast<uint32_t>(actual_offset) != expected_offset) {
+ PLOG(ERROR) << "Failed to seek to OatDexFile table section. Actual: " << actual_offset
+ << " Expected: " << expected_offset << " File: " << rodata->GetLocation();
+ return false;
+ }
+
+ for (size_t i = 0, size = oat_dex_files_.size(); i != size; ++i) {
+ OatDexFile* oat_dex_file = &oat_dex_files_[i];
+
+ DCHECK_EQ(oat_data_offset_ + oat_dex_file->offset_,
+ static_cast<size_t>(rodata->Seek(0, kSeekCurrent)));
+
+ // Write OatDexFile.
+ if (!oat_dex_file->Write(this, rodata)) {
+ PLOG(ERROR) << "Failed to write oat dex information to " << rodata->GetLocation();
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool OatWriter::ExtendForTypeLookupTables(OutputStream* rodata, File* file, size_t offset) {
+ TimingLogger::ScopedTiming split("ExtendForTypeLookupTables", timings_);
+
+ int64_t new_length = oat_data_offset_ + dchecked_integral_cast<int64_t>(offset);
+ if (file->SetLength(new_length) != 0) {
+ PLOG(ERROR) << "Failed to extend file for type lookup tables. new_length: " << new_length
+ << "File: " << file->GetPath();
+ return false;
+ }
+ off_t actual_offset = rodata->Seek(new_length, kSeekSet);
+ if (actual_offset != static_cast<off_t>(new_length)) {
+ PLOG(ERROR) << "Failed to seek stream after extending file for type lookup tables."
+ << " Actual: " << actual_offset << " Expected: " << new_length
+ << " File: " << rodata->GetLocation();
+ return false;
+ }
+ if (!rodata->Flush()) {
+ PLOG(ERROR) << "Failed to flush stream after extending for type lookup tables."
+ << " File: " << rodata->GetLocation();
+ return false;
+ }
+ return true;
+}
+
+bool OatWriter::OpenDexFiles(
+ File* file,
+ /*out*/ std::unique_ptr<MemMap>* opened_dex_files_map,
+ /*out*/ std::vector<std::unique_ptr<const DexFile>>* opened_dex_files) {
+ TimingLogger::ScopedTiming split("OpenDexFiles", timings_);
+
+ if (oat_dex_files_.empty()) {
+ // Nothing to do.
+ return true;
+ }
+
+ size_t map_offset = oat_dex_files_[0].dex_file_offset_;
+ size_t length = size_ - map_offset;
+ std::string error_msg;
+ std::unique_ptr<MemMap> dex_files_map(MemMap::MapFile(length,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED,
+ file->Fd(),
+ oat_data_offset_ + map_offset,
+ /* low_4gb */ false,
+ file->GetPath().c_str(),
+ &error_msg));
+ if (dex_files_map == nullptr) {
+ LOG(ERROR) << "Failed to mmap() dex files from oat file. File: " << file->GetPath()
+ << " error: " << error_msg;
+ return false;
+ }
+ std::vector<std::unique_ptr<const DexFile>> dex_files;
+ for (OatDexFile& oat_dex_file : oat_dex_files_) {
+ // Make sure no one messed with input files while we were copying data.
+ // At the very least we need consistent file size and number of class definitions.
+ const uint8_t* raw_dex_file =
+ dex_files_map->Begin() + oat_dex_file.dex_file_offset_ - map_offset;
+ if (!ValidateDexFileHeader(raw_dex_file, oat_dex_file.GetLocation())) {
+ // Note: ValidateDexFileHeader() already logged an error message.
+ LOG(ERROR) << "Failed to verify written dex file header!"
+ << " Output: " << file->GetPath() << " ~ " << std::hex << map_offset
+ << " ~ " << static_cast<const void*>(raw_dex_file);
+ return false;
+ }
+ const UnalignedDexFileHeader* header = AsUnalignedDexFileHeader(raw_dex_file);
+ if (header->file_size_ != oat_dex_file.dex_file_size_) {
+ LOG(ERROR) << "File size mismatch in written dex file header! Expected: "
+ << oat_dex_file.dex_file_size_ << " Actual: " << header->file_size_
+ << " Output: " << file->GetPath();
+ return false;
+ }
+ if (header->class_defs_size_ != oat_dex_file.class_offsets_.size()) {
+ LOG(ERROR) << "Class defs size mismatch in written dex file header! Expected: "
+ << oat_dex_file.class_offsets_.size() << " Actual: " << header->class_defs_size_
+ << " Output: " << file->GetPath();
+ return false;
+ }
+
+ // Now, open the dex file.
+ dex_files.emplace_back(DexFile::Open(raw_dex_file,
+ oat_dex_file.dex_file_size_,
+ oat_dex_file.GetLocation(),
+ oat_dex_file.dex_file_location_checksum_,
+ /* oat_dex_file */ nullptr,
+ &error_msg));
+ if (dex_files.back() == nullptr) {
+ LOG(ERROR) << "Failed to open dex file from oat file. File:" << oat_dex_file.GetLocation();
+ return false;
+ }
+ }
+
+ *opened_dex_files_map = std::move(dex_files_map);
+ *opened_dex_files = std::move(dex_files);
+ return true;
+}
+
+bool OatWriter::WriteTypeLookupTables(
+ MemMap* opened_dex_files_map,
+ const std::vector<std::unique_ptr<const DexFile>>& opened_dex_files) {
+ TimingLogger::ScopedTiming split("WriteTypeLookupTables", timings_);
+
+ DCHECK_EQ(opened_dex_files.size(), oat_dex_files_.size());
+ for (size_t i = 0, size = opened_dex_files.size(); i != size; ++i) {
+ OatDexFile* oat_dex_file = &oat_dex_files_[i];
+ if (oat_dex_file->lookup_table_offset_ != 0u) {
+ DCHECK(oat_dex_file->create_type_lookup_table_ == CreateTypeLookupTable::kCreate);
+ DCHECK_NE(oat_dex_file->class_offsets_.size(), 0u);
+ size_t map_offset = oat_dex_files_[0].dex_file_offset_;
+ size_t lookup_table_offset = oat_dex_file->lookup_table_offset_;
+ uint8_t* lookup_table = opened_dex_files_map->Begin() + (lookup_table_offset - map_offset);
+ opened_dex_files[i]->CreateTypeLookupTable(lookup_table);
+ }
+ }
+
+ DCHECK_EQ(opened_dex_files_map == nullptr, opened_dex_files.empty());
+ if (opened_dex_files_map != nullptr && !opened_dex_files_map->Sync()) {
+ PLOG(ERROR) << "Failed to Sync() type lookup tables. Map: " << opened_dex_files_map->GetName();
+ return false;
+ }
+
+ return true;
+}
+
bool OatWriter::WriteCodeAlignment(OutputStream* out, uint32_t aligned_code_delta) {
static const uint8_t kPadding[] = {
0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u
@@ -1611,15 +2248,20 @@ std::pair<bool, uint32_t> OatWriter::MethodOffsetMap::FindMethodOffset(MethodRef
}
}
-OatWriter::OatDexFile::OatDexFile(size_t offset, const DexFile& dex_file) {
- offset_ = offset;
- const std::string& location(dex_file.GetLocation());
- dex_file_location_size_ = location.size();
- dex_file_location_data_ = reinterpret_cast<const uint8_t*>(location.data());
- dex_file_location_checksum_ = dex_file.GetLocationChecksum();
- dex_file_offset_ = 0;
- lookup_table_offset_ = 0;
- class_offsets_.resize(dex_file.NumClassDefs());
+OatWriter::OatDexFile::OatDexFile(const char* dex_file_location,
+ DexFileSource source,
+ CreateTypeLookupTable create_type_lookup_table)
+ : source_(source),
+ create_type_lookup_table_(create_type_lookup_table),
+ dex_file_size_(0),
+ offset_(0),
+ dex_file_location_size_(strlen(dex_file_location)),
+ dex_file_location_data_(dex_file_location),
+ dex_file_location_checksum_(0u),
+ dex_file_offset_(0u),
+ class_offsets_offset_(0u),
+ lookup_table_offset_(0u),
+ class_offsets_() {
}
size_t OatWriter::OatDexFile::SizeOf() const {
@@ -1627,24 +2269,54 @@ size_t OatWriter::OatDexFile::SizeOf() const {
+ dex_file_location_size_
+ sizeof(dex_file_location_checksum_)
+ sizeof(dex_file_offset_)
- + sizeof(lookup_table_offset_)
- + (sizeof(class_offsets_[0]) * class_offsets_.size());
+ + sizeof(class_offsets_offset_)
+ + sizeof(lookup_table_offset_);
+}
+
+void OatWriter::OatDexFile::ReserveTypeLookupTable(OatWriter* oat_writer) {
+ DCHECK_EQ(lookup_table_offset_, 0u);
+ if (create_type_lookup_table_ == CreateTypeLookupTable::kCreate && !class_offsets_.empty()) {
+ size_t table_size = TypeLookupTable::RawDataLength(class_offsets_.size());
+ if (table_size != 0u) {
+ // Type tables are required to be 4 byte aligned.
+ size_t original_offset = oat_writer->size_;
+ size_t offset = RoundUp(original_offset, 4);
+ oat_writer->size_oat_lookup_table_alignment_ += offset - original_offset;
+ lookup_table_offset_ = offset;
+ oat_writer->size_ = offset + table_size;
+ oat_writer->size_oat_lookup_table_ += table_size;
+ }
+ }
}
-bool OatWriter::OatDexFile::Write(OatWriter* oat_writer,
- OutputStream* out,
- const size_t file_offset) const {
+void OatWriter::OatDexFile::ReserveClassOffsets(OatWriter* oat_writer) {
+ DCHECK_EQ(class_offsets_offset_, 0u);
+ if (!class_offsets_.empty()) {
+ // Class offsets are required to be 4 byte aligned.
+ size_t original_offset = oat_writer->size_;
+ size_t offset = RoundUp(original_offset, 4);
+ oat_writer->size_oat_class_offsets_alignment_ += offset - original_offset;
+ class_offsets_offset_ = offset;
+ oat_writer->size_ = offset + GetClassOffsetsRawSize();
+ }
+}
+
+bool OatWriter::OatDexFile::Write(OatWriter* oat_writer, OutputStream* out) const {
+ const size_t file_offset = oat_writer->oat_data_offset_;
DCHECK_OFFSET_();
+
if (!oat_writer->WriteData(out, &dex_file_location_size_, sizeof(dex_file_location_size_))) {
PLOG(ERROR) << "Failed to write dex file location length to " << out->GetLocation();
return false;
}
oat_writer->size_oat_dex_file_location_size_ += sizeof(dex_file_location_size_);
+
if (!oat_writer->WriteData(out, dex_file_location_data_, dex_file_location_size_)) {
PLOG(ERROR) << "Failed to write dex file location data to " << out->GetLocation();
return false;
}
oat_writer->size_oat_dex_file_location_data_ += dex_file_location_size_;
+
if (!oat_writer->WriteData(out,
&dex_file_location_checksum_,
sizeof(dex_file_location_checksum_))) {
@@ -1652,21 +2324,35 @@ bool OatWriter::OatDexFile::Write(OatWriter* oat_writer,
return false;
}
oat_writer->size_oat_dex_file_location_checksum_ += sizeof(dex_file_location_checksum_);
+
if (!oat_writer->WriteData(out, &dex_file_offset_, sizeof(dex_file_offset_))) {
PLOG(ERROR) << "Failed to write dex file offset to " << out->GetLocation();
return false;
}
oat_writer->size_oat_dex_file_offset_ += sizeof(dex_file_offset_);
+
+ if (!oat_writer->WriteData(out, &class_offsets_offset_, sizeof(class_offsets_offset_))) {
+ PLOG(ERROR) << "Failed to write class offsets offset to " << out->GetLocation();
+ return false;
+ }
+ oat_writer->size_oat_dex_file_class_offsets_offset_ += sizeof(class_offsets_offset_);
+
if (!oat_writer->WriteData(out, &lookup_table_offset_, sizeof(lookup_table_offset_))) {
PLOG(ERROR) << "Failed to write lookup table offset to " << out->GetLocation();
return false;
}
oat_writer->size_oat_dex_file_lookup_table_offset_ += sizeof(lookup_table_offset_);
+
+ return true;
+}
+
+bool OatWriter::OatDexFile::WriteClassOffsets(OatWriter* oat_writer, OutputStream* out) {
if (!oat_writer->WriteData(out, class_offsets_.data(), GetClassOffsetsRawSize())) {
- PLOG(ERROR) << "Failed to write methods offsets to " << out->GetLocation();
+ PLOG(ERROR) << "Failed to write oat class offsets for " << GetLocation()
+ << " to " << out->GetLocation();
return false;
}
- oat_writer->size_oat_dex_file_class_offsets_ += GetClassOffsetsRawSize();
+ oat_writer->size_oat_class_offsets_ += GetClassOffsetsRawSize();
return true;
}
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index 5feb5fc516..d681998774 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -27,7 +27,9 @@
#include "method_reference.h"
#include "mirror/class.h"
#include "oat.h"
+#include "os.h"
#include "safe_map.h"
+#include "ScopedFd.h"
#include "utils/array_ref.h"
namespace art {
@@ -39,6 +41,7 @@ class ImageWriter;
class OutputStream;
class TimingLogger;
class TypeLookupTable;
+class ZipEntry;
namespace dwarf {
struct MethodDebugInfo;
@@ -61,6 +64,11 @@ struct MethodDebugInfo;
// ...
// TypeLookupTable[D]
//
+// ClassOffsets[0] one table of OatClass offsets for each class def for each OatDexFile.
+// ClassOffsets[1]
+// ...
+// ClassOffsets[D]
+//
// OatClass[0] one variable sized OatClass for each of C DexFile::ClassDefs
// OatClass[1] contains OatClass entries with class status, offsets to code, etc.
// ...
@@ -93,15 +101,65 @@ struct MethodDebugInfo;
//
class OatWriter {
public:
- OatWriter(const std::vector<const DexFile*>& dex_files,
- uint32_t image_file_location_oat_checksum,
- uintptr_t image_file_location_oat_begin,
- int32_t image_patch_delta,
- const CompilerDriver* compiler,
- ImageWriter* image_writer,
- bool compiling_boot_image,
- TimingLogger* timings,
- SafeMap<std::string, std::string>* key_value_store);
+ enum class CreateTypeLookupTable {
+ kCreate,
+ kDontCreate,
+ kDefault = kCreate
+ };
+
+ OatWriter(bool compiling_boot_image, TimingLogger* timings);
+
+ // To produce a valid oat file, the user must first add sources with any combination of
+ // - AddDexFileSource(),
+ // - AddZippedDexFilesSource(),
+ // - AddRawDexFileSource().
+ // Then the user must call in order
+ // - WriteAndOpenDexFiles()
+ // - PrepareLayout(),
+ // - WriteRodata(),
+ // - WriteCode(),
+ // - WriteHeader().
+
+ // Add dex file source(s) from a file, either a plain dex file or
+ // a zip file with one or more dex files.
+ bool AddDexFileSource(
+ const char* filename,
+ const char* location,
+ CreateTypeLookupTable create_type_lookup_table = CreateTypeLookupTable::kDefault);
+ // Add dex file source(s) from a zip file specified by a file handle.
+ bool AddZippedDexFilesSource(
+ ScopedFd&& zip_fd,
+ const char* location,
+ CreateTypeLookupTable create_type_lookup_table = CreateTypeLookupTable::kDefault);
+ // Add dex file source from raw memory.
+ bool AddRawDexFileSource(
+ const ArrayRef<const uint8_t>& data,
+ const char* location,
+ uint32_t location_checksum,
+ CreateTypeLookupTable create_type_lookup_table = CreateTypeLookupTable::kDefault);
+ dchecked_vector<const char*> GetSourceLocations() const;
+
+ // Write raw dex files to the .rodata section and open them from the oat file.
+ bool WriteAndOpenDexFiles(OutputStream* rodata,
+ File* file,
+ InstructionSet instruction_set,
+ const InstructionSetFeatures* instruction_set_features,
+ SafeMap<std::string, std::string>* key_value_store,
+ /*out*/ std::unique_ptr<MemMap>* opened_dex_files_map,
+ /*out*/ std::vector<std::unique_ptr<const DexFile>>* opened_dex_files);
+ // Prepare layout of remaining data.
+ void PrepareLayout(const CompilerDriver* compiler,
+ ImageWriter* image_writer,
+ const std::vector<const DexFile*>& dex_files);
+ // Write the rest of .rodata section (ClassOffsets[], OatClass[], maps).
+ bool WriteRodata(OutputStream* out);
+ // Write the code to the .text section.
+ bool WriteCode(OutputStream* out);
+ // Write the oat header. This finalizes the oat file.
+ bool WriteHeader(OutputStream* out,
+ uint32_t image_file_location_oat_checksum,
+ uintptr_t image_file_location_oat_begin,
+ int32_t image_patch_delta);
// Returns whether the oat file has an associated image.
bool HasImage() const {
@@ -130,9 +188,6 @@ class OatWriter {
return ArrayRef<const uintptr_t>(absolute_patch_locations_);
}
- bool WriteRodata(OutputStream* out);
- bool WriteCode(OutputStream* out);
-
~OatWriter();
ArrayRef<const dwarf::MethodDebugInfo> GetMethodDebugInfo() const {
@@ -144,6 +199,7 @@ class OatWriter {
}
private:
+ class DexFileSource;
class OatClass;
class OatDexFile;
@@ -174,29 +230,65 @@ class OatWriter {
// with a given DexMethodVisitor.
bool VisitDexMethods(DexMethodVisitor* visitor);
- size_t InitOatHeader();
+ size_t InitOatHeader(InstructionSet instruction_set,
+ const InstructionSetFeatures* instruction_set_features,
+ uint32_t num_dex_files,
+ SafeMap<std::string, std::string>* key_value_store);
size_t InitOatDexFiles(size_t offset);
- size_t InitLookupTables(size_t offset);
- size_t InitDexFiles(size_t offset);
size_t InitOatClasses(size_t offset);
size_t InitOatMaps(size_t offset);
size_t InitOatCode(size_t offset);
size_t InitOatCodeDexFiles(size_t offset);
- bool WriteTables(OutputStream* out, const size_t file_offset);
- bool WriteLookupTables(OutputStream* out, const size_t file_offset);
+ bool WriteClassOffsets(OutputStream* out);
+ bool WriteClasses(OutputStream* out);
size_t WriteMaps(OutputStream* out, const size_t file_offset, size_t relative_offset);
size_t WriteCode(OutputStream* out, const size_t file_offset, size_t relative_offset);
size_t WriteCodeDexFiles(OutputStream* out, const size_t file_offset, size_t relative_offset);
bool GetOatDataOffset(OutputStream* out);
+ bool ReadDexFileHeader(File* file, OatDexFile* oat_dex_file);
+ bool ValidateDexFileHeader(const uint8_t* raw_header, const char* location);
+ bool WriteDexFiles(OutputStream* rodata, File* file);
+ bool WriteDexFile(OutputStream* rodata, File* file, OatDexFile* oat_dex_file);
+ bool SeekToDexFile(OutputStream* rodata, File* file, OatDexFile* oat_dex_file);
+ bool WriteDexFile(OutputStream* rodata, File* file, OatDexFile* oat_dex_file, ZipEntry* dex_file);
+ bool WriteDexFile(OutputStream* rodata, File* file, OatDexFile* oat_dex_file, File* dex_file);
+ bool WriteDexFile(OutputStream* rodata, OatDexFile* oat_dex_file, const uint8_t* dex_file);
+ bool WriteOatDexFiles(OutputStream* rodata);
+ bool ExtendForTypeLookupTables(OutputStream* rodata, File* file, size_t offset);
+ bool OpenDexFiles(File* file,
+ /*out*/ std::unique_ptr<MemMap>* opened_dex_files_map,
+ /*out*/ std::vector<std::unique_ptr<const DexFile>>* opened_dex_files);
+ bool WriteTypeLookupTables(MemMap* opened_dex_files_map,
+ const std::vector<std::unique_ptr<const DexFile>>& opened_dex_files);
bool WriteCodeAlignment(OutputStream* out, uint32_t aligned_code_delta);
bool WriteData(OutputStream* out, const void* data, size_t size);
+ enum class WriteState {
+ kAddingDexFileSources,
+ kPrepareLayout,
+ kWriteRoData,
+ kWriteText,
+ kWriteHeader,
+ kDone
+ };
+
+ WriteState write_state_;
+ TimingLogger* timings_;
+
+ std::vector<std::unique_ptr<File>> raw_dex_files_;
+ std::vector<std::unique_ptr<ZipArchive>> zip_archives_;
+ std::vector<std::unique_ptr<ZipEntry>> zipped_dex_files_;
+
+ // Using std::list<> which doesn't move elements around on push/emplace_back().
+ // We need this because we keep plain pointers to the strings' c_str().
+ std::list<std::string> zipped_dex_file_locations_;
+
dchecked_vector<dwarf::MethodDebugInfo> method_info_;
- const CompilerDriver* const compiler_driver_;
- ImageWriter* const image_writer_;
+ const CompilerDriver* compiler_driver_;
+ ImageWriter* image_writer_;
const bool compiling_boot_image_;
// note OatFile does not take ownership of the DexFiles
@@ -215,13 +307,7 @@ class OatWriter {
// Offset of the oat data from the start of the mmapped region of the elf file.
size_t oat_data_offset_;
- // dependencies on the image.
- uint32_t image_file_location_oat_checksum_;
- uintptr_t image_file_location_oat_begin_;
- int32_t image_patch_delta_;
-
// data to write
- SafeMap<std::string, std::string>* key_value_store_;
std::unique_ptr<OatHeader> oat_header_;
dchecked_vector<OatDexFile> oat_dex_files_;
dchecked_vector<OatClass> oat_classes_;
@@ -257,10 +343,12 @@ class OatWriter {
uint32_t size_oat_dex_file_location_data_;
uint32_t size_oat_dex_file_location_checksum_;
uint32_t size_oat_dex_file_offset_;
+ uint32_t size_oat_dex_file_class_offsets_offset_;
uint32_t size_oat_dex_file_lookup_table_offset_;
- uint32_t size_oat_dex_file_class_offsets_;
uint32_t size_oat_lookup_table_alignment_;
uint32_t size_oat_lookup_table_;
+ uint32_t size_oat_class_offsets_alignment_;
+ uint32_t size_oat_class_offsets_;
uint32_t size_oat_class_type_;
uint32_t size_oat_class_status_;
uint32_t size_oat_class_method_bitmaps_;
@@ -269,7 +357,7 @@ class OatWriter {
std::unique_ptr<linker::RelativePatcher> relative_patcher_;
// The locations of absolute patches relative to the start of the executable section.
- std::vector<uintptr_t> absolute_patch_locations_;
+ dchecked_vector<uintptr_t> absolute_patch_locations_;
// Map method reference to assigned offset.
// Wrap the map in a class implementing linker::RelativePatcherTargetProvider.
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 26bf1cbc75..1d604e7135 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -56,7 +56,6 @@ class HGraphBuilder : public ValueObject {
return_type_(Primitive::GetType(dex_compilation_unit_->GetShorty()[0])),
code_start_(nullptr),
latest_result_(nullptr),
- can_use_baseline_for_string_init_(true),
compilation_stats_(compiler_stats),
interpreter_metadata_(interpreter_metadata),
dex_cache_(dex_cache) {}
@@ -77,7 +76,6 @@ class HGraphBuilder : public ValueObject {
return_type_(return_type),
code_start_(nullptr),
latest_result_(nullptr),
- can_use_baseline_for_string_init_(true),
compilation_stats_(nullptr),
interpreter_metadata_(nullptr),
null_dex_cache_(),
@@ -85,10 +83,6 @@ class HGraphBuilder : public ValueObject {
bool BuildGraph(const DexFile::CodeItem& code);
- bool CanUseBaselineForStringInit() const {
- return can_use_baseline_for_string_init_;
- }
-
static constexpr const char* kBuilderPassName = "builder";
// The number of entries in a packed switch before we use a jump table or specified
@@ -363,11 +357,6 @@ class HGraphBuilder : public ValueObject {
// used by move-result instructions.
HInstruction* latest_result_;
- // We need to know whether we have built a graph that has calls to StringFactory
- // and hasn't gone through the verifier. If the following flag is `false`, then
- // we cannot compile with baseline.
- bool can_use_baseline_for_string_init_;
-
OptimizingCompilerStats* compilation_stats_;
const uint8_t* interpreter_metadata_;
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index ea0b9eca9a..a3bbfdbd27 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -142,23 +142,6 @@ size_t CodeGenerator::GetCachePointerOffset(uint32_t index) {
return pointer_size * index;
}
-void CodeGenerator::CompileBaseline(CodeAllocator* allocator, bool is_leaf) {
- Initialize();
- if (!is_leaf) {
- MarkNotLeaf();
- }
- const bool is_64_bit = Is64BitInstructionSet(GetInstructionSet());
- InitializeCodeGeneration(GetGraph()->GetNumberOfLocalVRegs()
- + GetGraph()->GetTemporariesVRegSlots()
- + 1 /* filler */,
- 0, /* the baseline compiler does not have live registers at slow path */
- 0, /* the baseline compiler does not have live registers at slow path */
- GetGraph()->GetMaximumNumberOfOutVRegs()
- + (is_64_bit ? 2 : 1) /* current method */,
- GetGraph()->GetBlocks());
- CompileInternal(allocator, /* is_baseline */ true);
-}
-
bool CodeGenerator::GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const {
DCHECK_EQ((*block_order_)[current_block_index_], current);
return GetNextBlockToEmit() == FirstNonEmptyBlock(next);
@@ -220,8 +203,12 @@ void CodeGenerator::GenerateSlowPaths() {
current_slow_path_ = nullptr;
}
-void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline) {
- is_baseline_ = is_baseline;
+void CodeGenerator::Compile(CodeAllocator* allocator) {
+ // The register allocator already called `InitializeCodeGeneration`,
+ // where the frame size has been computed.
+ DCHECK(block_order_ != nullptr);
+ Initialize();
+
HGraphVisitor* instruction_visitor = GetInstructionVisitor();
DCHECK_EQ(current_block_index_, 0u);
@@ -242,9 +229,6 @@ void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline)
for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
HInstruction* current = it.Current();
DisassemblyScope disassembly_scope(current, *this);
- if (is_baseline) {
- InitLocationsBaseline(current);
- }
DCHECK(CheckTypeConsistency(current));
current->Accept(instruction_visitor);
}
@@ -254,7 +238,7 @@ void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline)
// Emit catch stack maps at the end of the stack map stream as expected by the
// runtime exception handler.
- if (!is_baseline && graph_->HasTryCatch()) {
+ if (graph_->HasTryCatch()) {
RecordCatchBlockInfo();
}
@@ -262,14 +246,6 @@ void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline)
Finalize(allocator);
}
-void CodeGenerator::CompileOptimized(CodeAllocator* allocator) {
- // The register allocator already called `InitializeCodeGeneration`,
- // where the frame size has been computed.
- DCHECK(block_order_ != nullptr);
- Initialize();
- CompileInternal(allocator, /* is_baseline */ false);
-}
-
void CodeGenerator::Finalize(CodeAllocator* allocator) {
size_t code_size = GetAssembler()->CodeSize();
uint8_t* buffer = allocator->Allocate(code_size);
@@ -282,29 +258,6 @@ void CodeGenerator::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches A
// No linker patches by default.
}
-size_t CodeGenerator::FindFreeEntry(bool* array, size_t length) {
- for (size_t i = 0; i < length; ++i) {
- if (!array[i]) {
- array[i] = true;
- return i;
- }
- }
- LOG(FATAL) << "Could not find a register in baseline register allocator";
- UNREACHABLE();
-}
-
-size_t CodeGenerator::FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t length) {
- for (size_t i = 0; i < length - 1; i += 2) {
- if (!array[i] && !array[i + 1]) {
- array[i] = true;
- array[i + 1] = true;
- return i;
- }
- }
- LOG(FATAL) << "Could not find a register in baseline register allocator";
- UNREACHABLE();
-}
-
void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots,
size_t maximum_number_of_live_core_registers,
size_t maximum_number_of_live_fpu_registers,
@@ -592,123 +545,6 @@ void CodeGenerator::BlockIfInRegister(Location location, bool is_out) const {
}
}
-void CodeGenerator::AllocateRegistersLocally(HInstruction* instruction) const {
- LocationSummary* locations = instruction->GetLocations();
- if (locations == nullptr) return;
-
- for (size_t i = 0, e = GetNumberOfCoreRegisters(); i < e; ++i) {
- blocked_core_registers_[i] = false;
- }
-
- for (size_t i = 0, e = GetNumberOfFloatingPointRegisters(); i < e; ++i) {
- blocked_fpu_registers_[i] = false;
- }
-
- for (size_t i = 0, e = number_of_register_pairs_; i < e; ++i) {
- blocked_register_pairs_[i] = false;
- }
-
- // Mark all fixed input, temp and output registers as used.
- for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
- BlockIfInRegister(locations->InAt(i));
- }
-
- for (size_t i = 0, e = locations->GetTempCount(); i < e; ++i) {
- Location loc = locations->GetTemp(i);
- BlockIfInRegister(loc);
- }
- Location result_location = locations->Out();
- if (locations->OutputCanOverlapWithInputs()) {
- BlockIfInRegister(result_location, /* is_out */ true);
- }
-
- SetupBlockedRegisters(/* is_baseline */ true);
-
- // Allocate all unallocated input locations.
- for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
- Location loc = locations->InAt(i);
- HInstruction* input = instruction->InputAt(i);
- if (loc.IsUnallocated()) {
- if ((loc.GetPolicy() == Location::kRequiresRegister)
- || (loc.GetPolicy() == Location::kRequiresFpuRegister)) {
- loc = AllocateFreeRegister(input->GetType());
- } else {
- DCHECK_EQ(loc.GetPolicy(), Location::kAny);
- HLoadLocal* load = input->AsLoadLocal();
- if (load != nullptr) {
- loc = GetStackLocation(load);
- } else {
- loc = AllocateFreeRegister(input->GetType());
- }
- }
- locations->SetInAt(i, loc);
- }
- }
-
- // Allocate all unallocated temp locations.
- for (size_t i = 0, e = locations->GetTempCount(); i < e; ++i) {
- Location loc = locations->GetTemp(i);
- if (loc.IsUnallocated()) {
- switch (loc.GetPolicy()) {
- case Location::kRequiresRegister:
- // Allocate a core register (large enough to fit a 32-bit integer).
- loc = AllocateFreeRegister(Primitive::kPrimInt);
- break;
-
- case Location::kRequiresFpuRegister:
- // Allocate a core register (large enough to fit a 64-bit double).
- loc = AllocateFreeRegister(Primitive::kPrimDouble);
- break;
-
- default:
- LOG(FATAL) << "Unexpected policy for temporary location "
- << loc.GetPolicy();
- }
- locations->SetTempAt(i, loc);
- }
- }
- if (result_location.IsUnallocated()) {
- switch (result_location.GetPolicy()) {
- case Location::kAny:
- case Location::kRequiresRegister:
- case Location::kRequiresFpuRegister:
- result_location = AllocateFreeRegister(instruction->GetType());
- break;
- case Location::kSameAsFirstInput:
- result_location = locations->InAt(0);
- break;
- }
- locations->UpdateOut(result_location);
- }
-}
-
-void CodeGenerator::InitLocationsBaseline(HInstruction* instruction) {
- AllocateLocations(instruction);
- if (instruction->GetLocations() == nullptr) {
- if (instruction->IsTemporary()) {
- HInstruction* previous = instruction->GetPrevious();
- Location temp_location = GetTemporaryLocation(instruction->AsTemporary());
- Move(previous, temp_location, instruction);
- }
- return;
- }
- AllocateRegistersLocally(instruction);
- for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
- Location location = instruction->GetLocations()->InAt(i);
- HInstruction* input = instruction->InputAt(i);
- if (location.IsValid()) {
- // Move the input to the desired location.
- if (input->GetNext()->IsTemporary()) {
- // If the input was stored in a temporary, use that temporary to
- // perform the move.
- Move(input->GetNext(), location, instruction);
- } else {
- Move(input, location, instruction);
- }
- }
- }
-}
-
void CodeGenerator::AllocateLocations(HInstruction* instruction) {
instruction->Accept(GetLocationBuilder());
DCHECK(CheckTypeConsistency(instruction));
@@ -789,132 +625,6 @@ CodeGenerator* CodeGenerator::Create(HGraph* graph,
}
}
-void CodeGenerator::BuildNativeGCMap(
- ArenaVector<uint8_t>* data, const CompilerDriver& compiler_driver) const {
- const std::vector<uint8_t>& gc_map_raw =
- compiler_driver.GetVerifiedMethod(&GetGraph()->GetDexFile(), GetGraph()->GetMethodIdx())
- ->GetDexGcMap();
- verifier::DexPcToReferenceMap dex_gc_map(&(gc_map_raw)[0]);
-
- uint32_t max_native_offset = stack_map_stream_.ComputeMaxNativePcOffset();
-
- size_t num_stack_maps = stack_map_stream_.GetNumberOfStackMaps();
- GcMapBuilder builder(data, num_stack_maps, max_native_offset, dex_gc_map.RegWidth());
- for (size_t i = 0; i != num_stack_maps; ++i) {
- const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i);
- uint32_t native_offset = stack_map_entry.native_pc_offset;
- uint32_t dex_pc = stack_map_entry.dex_pc;
- const uint8_t* references = dex_gc_map.FindBitMap(dex_pc, false);
- CHECK(references != nullptr) << "Missing ref for dex pc 0x" << std::hex << dex_pc;
- builder.AddEntry(native_offset, references);
- }
-}
-
-void CodeGenerator::BuildMappingTable(ArenaVector<uint8_t>* data) const {
- uint32_t pc2dex_data_size = 0u;
- uint32_t pc2dex_entries = stack_map_stream_.GetNumberOfStackMaps();
- uint32_t pc2dex_offset = 0u;
- int32_t pc2dex_dalvik_offset = 0;
- uint32_t dex2pc_data_size = 0u;
- uint32_t dex2pc_entries = 0u;
- uint32_t dex2pc_offset = 0u;
- int32_t dex2pc_dalvik_offset = 0;
-
- for (size_t i = 0; i < pc2dex_entries; i++) {
- const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i);
- pc2dex_data_size += UnsignedLeb128Size(stack_map_entry.native_pc_offset - pc2dex_offset);
- pc2dex_data_size += SignedLeb128Size(stack_map_entry.dex_pc - pc2dex_dalvik_offset);
- pc2dex_offset = stack_map_entry.native_pc_offset;
- pc2dex_dalvik_offset = stack_map_entry.dex_pc;
- }
-
- // Walk over the blocks and find which ones correspond to catch block entries.
- for (HBasicBlock* block : graph_->GetBlocks()) {
- if (block->IsCatchBlock()) {
- intptr_t native_pc = GetAddressOf(block);
- ++dex2pc_entries;
- dex2pc_data_size += UnsignedLeb128Size(native_pc - dex2pc_offset);
- dex2pc_data_size += SignedLeb128Size(block->GetDexPc() - dex2pc_dalvik_offset);
- dex2pc_offset = native_pc;
- dex2pc_dalvik_offset = block->GetDexPc();
- }
- }
-
- uint32_t total_entries = pc2dex_entries + dex2pc_entries;
- uint32_t hdr_data_size = UnsignedLeb128Size(total_entries) + UnsignedLeb128Size(pc2dex_entries);
- uint32_t data_size = hdr_data_size + pc2dex_data_size + dex2pc_data_size;
- data->resize(data_size);
-
- uint8_t* data_ptr = &(*data)[0];
- uint8_t* write_pos = data_ptr;
-
- write_pos = EncodeUnsignedLeb128(write_pos, total_entries);
- write_pos = EncodeUnsignedLeb128(write_pos, pc2dex_entries);
- DCHECK_EQ(static_cast<size_t>(write_pos - data_ptr), hdr_data_size);
- uint8_t* write_pos2 = write_pos + pc2dex_data_size;
-
- pc2dex_offset = 0u;
- pc2dex_dalvik_offset = 0u;
- dex2pc_offset = 0u;
- dex2pc_dalvik_offset = 0u;
-
- for (size_t i = 0; i < pc2dex_entries; i++) {
- const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i);
- DCHECK(pc2dex_offset <= stack_map_entry.native_pc_offset);
- write_pos = EncodeUnsignedLeb128(write_pos, stack_map_entry.native_pc_offset - pc2dex_offset);
- write_pos = EncodeSignedLeb128(write_pos, stack_map_entry.dex_pc - pc2dex_dalvik_offset);
- pc2dex_offset = stack_map_entry.native_pc_offset;
- pc2dex_dalvik_offset = stack_map_entry.dex_pc;
- }
-
- for (HBasicBlock* block : graph_->GetBlocks()) {
- if (block->IsCatchBlock()) {
- intptr_t native_pc = GetAddressOf(block);
- write_pos2 = EncodeUnsignedLeb128(write_pos2, native_pc - dex2pc_offset);
- write_pos2 = EncodeSignedLeb128(write_pos2, block->GetDexPc() - dex2pc_dalvik_offset);
- dex2pc_offset = native_pc;
- dex2pc_dalvik_offset = block->GetDexPc();
- }
- }
-
-
- DCHECK_EQ(static_cast<size_t>(write_pos - data_ptr), hdr_data_size + pc2dex_data_size);
- DCHECK_EQ(static_cast<size_t>(write_pos2 - data_ptr), data_size);
-
- if (kIsDebugBuild) {
- // Verify the encoded table holds the expected data.
- MappingTable table(data_ptr);
- CHECK_EQ(table.TotalSize(), total_entries);
- CHECK_EQ(table.PcToDexSize(), pc2dex_entries);
- auto it = table.PcToDexBegin();
- auto it2 = table.DexToPcBegin();
- for (size_t i = 0; i < pc2dex_entries; i++) {
- const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i);
- CHECK_EQ(stack_map_entry.native_pc_offset, it.NativePcOffset());
- CHECK_EQ(stack_map_entry.dex_pc, it.DexPc());
- ++it;
- }
- for (HBasicBlock* block : graph_->GetBlocks()) {
- if (block->IsCatchBlock()) {
- CHECK_EQ(GetAddressOf(block), it2.NativePcOffset());
- CHECK_EQ(block->GetDexPc(), it2.DexPc());
- ++it2;
- }
- }
- CHECK(it == table.PcToDexEnd());
- CHECK(it2 == table.DexToPcEnd());
- }
-}
-
-void CodeGenerator::BuildVMapTable(ArenaVector<uint8_t>* data) const {
- Leb128Encoder<ArenaVector<uint8_t>> vmap_encoder(data);
- // We currently don't use callee-saved registers.
- size_t size = 0 + 1 /* marker */ + 0;
- vmap_encoder.Reserve(size + 1u); // All values are likely to be one byte in ULEB128 (<128).
- vmap_encoder.PushBackUnsigned(size);
- vmap_encoder.PushBackUnsigned(VmapTable::kAdjustedFpMarker);
-}
-
size_t CodeGenerator::ComputeStackMapsSize() {
return stack_map_stream_.PrepareForFillIn();
}
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 5958cd89bc..4f8f146753 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -158,10 +158,8 @@ class FieldAccessCallingConvention {
class CodeGenerator {
public:
- // Compiles the graph to executable instructions. Returns whether the compilation
- // succeeded.
- void CompileBaseline(CodeAllocator* allocator, bool is_leaf = false);
- void CompileOptimized(CodeAllocator* allocator);
+ // Compiles the graph to executable instructions.
+ void Compile(CodeAllocator* allocator);
static CodeGenerator* Create(HGraph* graph,
InstructionSet instruction_set,
const InstructionSetFeatures& isa_features,
@@ -214,7 +212,7 @@ class CodeGenerator {
size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; }
size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; }
- virtual void SetupBlockedRegisters(bool is_baseline) const = 0;
+ virtual void SetupBlockedRegisters() const = 0;
virtual void ComputeSpillMask() {
core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
@@ -290,17 +288,9 @@ class CodeGenerator {
slow_paths_.push_back(slow_path);
}
- void BuildMappingTable(ArenaVector<uint8_t>* vector) const;
- void BuildVMapTable(ArenaVector<uint8_t>* vector) const;
- void BuildNativeGCMap(
- ArenaVector<uint8_t>* vector, const CompilerDriver& compiler_driver) const;
void BuildStackMaps(MemoryRegion region);
size_t ComputeStackMapsSize();
- bool IsBaseline() const {
- return is_baseline_;
- }
-
bool IsLeafMethod() const {
return is_leaf_;
}
@@ -489,7 +479,6 @@ class CodeGenerator {
fpu_callee_save_mask_(fpu_callee_save_mask),
stack_map_stream_(graph->GetArena()),
block_order_(nullptr),
- is_baseline_(false),
disasm_info_(nullptr),
stats_(stats),
graph_(graph),
@@ -502,15 +491,6 @@ class CodeGenerator {
slow_paths_.reserve(8);
}
- // Register allocation logic.
- void AllocateRegistersLocally(HInstruction* instruction) const;
-
- // Backend specific implementation for allocating a register.
- virtual Location AllocateFreeRegister(Primitive::Type type) const = 0;
-
- static size_t FindFreeEntry(bool* array, size_t length);
- static size_t FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t length);
-
virtual Location GetStackLocation(HLoadLocal* load) const = 0;
virtual HGraphVisitor* GetLocationBuilder() = 0;
@@ -593,16 +573,11 @@ class CodeGenerator {
// The order to use for code generation.
const ArenaVector<HBasicBlock*>* block_order_;
- // Whether we are using baseline.
- bool is_baseline_;
-
DisassemblyInformation* disasm_info_;
private:
- void InitLocationsBaseline(HInstruction* instruction);
size_t GetStackOffsetOfSavedRegister(size_t index);
void GenerateSlowPaths();
- void CompileInternal(CodeAllocator* allocator, bool is_baseline);
void BlockIfInRegister(Location location, bool is_out = false) const;
void EmitEnvironment(HEnvironment* environment, SlowPathCode* slow_path);
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index a11ceb9bd9..272579219f 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -47,9 +47,7 @@ static bool ExpectedPairLayout(Location location) {
static constexpr int kCurrentMethodStackOffset = 0;
static constexpr Register kMethodRegisterArgument = R0;
-// We unconditionally allocate R5 to ensure we can do long operations
-// with baseline.
-static constexpr Register kCoreSavedRegisterForBaseline = R5;
+static constexpr Register kCoreAlwaysSpillRegister = R5;
static constexpr Register kCoreCalleeSaves[] =
{ R5, R6, R7, R8, R10, R11, LR };
static constexpr SRegister kFpuCalleeSaves[] =
@@ -728,6 +726,24 @@ inline Condition ARMUnsignedCondition(IfCondition cond) {
UNREACHABLE();
}
+inline Condition ARMFPCondition(IfCondition cond, bool gt_bias) {
+ // The ARM condition codes can express all the necessary branches, see the
+ // "Meaning (floating-point)" column in the table A8-1 of the ARMv7 reference manual.
+ // There is no dex instruction or HIR that would need the missing conditions
+ // "equal or unordered" or "not equal".
+ switch (cond) {
+ case kCondEQ: return EQ;
+ case kCondNE: return NE /* unordered */;
+ case kCondLT: return gt_bias ? CC : LT /* unordered */;
+ case kCondLE: return gt_bias ? LS : LE /* unordered */;
+ case kCondGT: return gt_bias ? HI /* unordered */ : GT;
+ case kCondGE: return gt_bias ? CS /* unordered */ : GE;
+ default:
+ LOG(FATAL) << "UNREACHABLE";
+ UNREACHABLE();
+ }
+}
+
void CodeGeneratorARM::DumpCoreRegister(std::ostream& stream, int reg) const {
stream << Register(reg);
}
@@ -815,58 +831,7 @@ void CodeGeneratorARM::Finalize(CodeAllocator* allocator) {
CodeGenerator::Finalize(allocator);
}
-Location CodeGeneratorARM::AllocateFreeRegister(Primitive::Type type) const {
- switch (type) {
- case Primitive::kPrimLong: {
- size_t reg = FindFreeEntry(blocked_register_pairs_, kNumberOfRegisterPairs);
- ArmManagedRegister pair =
- ArmManagedRegister::FromRegisterPair(static_cast<RegisterPair>(reg));
- DCHECK(!blocked_core_registers_[pair.AsRegisterPairLow()]);
- DCHECK(!blocked_core_registers_[pair.AsRegisterPairHigh()]);
-
- blocked_core_registers_[pair.AsRegisterPairLow()] = true;
- blocked_core_registers_[pair.AsRegisterPairHigh()] = true;
- UpdateBlockedPairRegisters();
- return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh());
- }
-
- case Primitive::kPrimByte:
- case Primitive::kPrimBoolean:
- case Primitive::kPrimChar:
- case Primitive::kPrimShort:
- case Primitive::kPrimInt:
- case Primitive::kPrimNot: {
- int reg = FindFreeEntry(blocked_core_registers_, kNumberOfCoreRegisters);
- // Block all register pairs that contain `reg`.
- for (int i = 0; i < kNumberOfRegisterPairs; i++) {
- ArmManagedRegister current =
- ArmManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i));
- if (current.AsRegisterPairLow() == reg || current.AsRegisterPairHigh() == reg) {
- blocked_register_pairs_[i] = true;
- }
- }
- return Location::RegisterLocation(reg);
- }
-
- case Primitive::kPrimFloat: {
- int reg = FindFreeEntry(blocked_fpu_registers_, kNumberOfSRegisters);
- return Location::FpuRegisterLocation(reg);
- }
-
- case Primitive::kPrimDouble: {
- int reg = FindTwoFreeConsecutiveAlignedEntries(blocked_fpu_registers_, kNumberOfSRegisters);
- DCHECK_EQ(reg % 2, 0);
- return Location::FpuRegisterPairLocation(reg, reg + 1);
- }
-
- case Primitive::kPrimVoid:
- LOG(FATAL) << "Unreachable type " << type;
- }
-
- return Location::NoLocation();
-}
-
-void CodeGeneratorARM::SetupBlockedRegisters(bool is_baseline) const {
+void CodeGeneratorARM::SetupBlockedRegisters() const {
// Don't allocate the dalvik style register pair passing.
blocked_register_pairs_[R1_R2] = true;
@@ -881,15 +846,7 @@ void CodeGeneratorARM::SetupBlockedRegisters(bool is_baseline) const {
// Reserve temp register.
blocked_core_registers_[IP] = true;
- if (is_baseline) {
- for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
- blocked_core_registers_[kCoreCalleeSaves[i]] = true;
- }
-
- blocked_core_registers_[kCoreSavedRegisterForBaseline] = false;
- }
-
- if (is_baseline || GetGraph()->IsDebuggable()) {
+ if (GetGraph()->IsDebuggable()) {
// Stubs do not save callee-save floating point registers. If the graph
// is debuggable, we need to deal with these registers differently. For
// now, just block them.
@@ -919,11 +876,10 @@ InstructionCodeGeneratorARM::InstructionCodeGeneratorARM(HGraph* graph, CodeGene
void CodeGeneratorARM::ComputeSpillMask() {
core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
- // Save one extra register for baseline. Note that on thumb2, there is no easy
- // instruction to restore just the PC, so this actually helps both baseline
- // and non-baseline to save and restore at least two registers at entry and exit.
- core_spill_mask_ |= (1 << kCoreSavedRegisterForBaseline);
DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved";
+ // There is no easy instruction to restore just the PC on thumb2. We spill and
+ // restore another arbitrary register.
+ core_spill_mask_ |= (1 << kCoreAlwaysSpillRegister);
fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
// We use vpush and vpop for saving and restoring floating point registers, which take
// a SRegister and the number of registers to save/restore after that SRegister. We
@@ -1416,15 +1372,9 @@ void InstructionCodeGeneratorARM::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
void InstructionCodeGeneratorARM::GenerateFPJumps(HCondition* cond,
Label* true_label,
- Label* false_label) {
+ Label* false_label ATTRIBUTE_UNUSED) {
__ vmstat(); // transfer FP status register to ARM APSR.
- // TODO: merge into a single branch (except "equal or unordered" and "not equal")
- if (cond->IsFPConditionTrueIfNaN()) {
- __ b(true_label, VS); // VS for unordered.
- } else if (cond->IsFPConditionFalseIfNaN()) {
- __ b(false_label, VS); // VS for unordered.
- }
- __ b(true_label, ARMCondition(cond->GetCondition()));
+ __ b(true_label, ARMFPCondition(cond->GetCondition(), cond->IsGtBias()));
}
void InstructionCodeGeneratorARM::GenerateLongComparesAndJumps(HCondition* cond,
@@ -1972,9 +1922,9 @@ void InstructionCodeGeneratorARM::VisitInvokeUnresolved(HInvokeUnresolved* invok
}
void LocationsBuilderARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
- // When we do not run baseline, explicit clinit checks triggered by static
- // invokes must have been pruned by art::PrepareForRegisterAllocation.
- DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+ // Explicit clinit checks triggered by static invokes must have been pruned by
+ // art::PrepareForRegisterAllocation.
+ DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
IntrinsicLocationsBuilderARM intrinsic(GetGraph()->GetArena(),
codegen_->GetAssembler(),
@@ -2004,9 +1954,9 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM* codegen)
}
void InstructionCodeGeneratorARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
- // When we do not run baseline, explicit clinit checks triggered by static
- // invokes must have been pruned by art::PrepareForRegisterAllocation.
- DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+ // Explicit clinit checks triggered by static invokes must have been pruned by
+ // art::PrepareForRegisterAllocation.
+ DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
if (TryGenerateIntrinsicCode(invoke, codegen_)) {
return;
@@ -3803,6 +3753,7 @@ void InstructionCodeGeneratorARM::VisitCompare(HCompare* compare) {
Label less, greater, done;
Primitive::Type type = compare->InputAt(0)->GetType();
+ Condition less_cond;
switch (type) {
case Primitive::kPrimLong: {
__ cmp(left.AsRegisterPairHigh<Register>(),
@@ -3813,6 +3764,7 @@ void InstructionCodeGeneratorARM::VisitCompare(HCompare* compare) {
__ LoadImmediate(out, 0);
__ cmp(left.AsRegisterPairLow<Register>(),
ShifterOperand(right.AsRegisterPairLow<Register>())); // Unsigned compare.
+ less_cond = LO;
break;
}
case Primitive::kPrimFloat:
@@ -3825,14 +3777,15 @@ void InstructionCodeGeneratorARM::VisitCompare(HCompare* compare) {
FromLowSToD(right.AsFpuRegisterPairLow<SRegister>()));
}
__ vmstat(); // transfer FP status register to ARM APSR.
- __ b(compare->IsGtBias() ? &greater : &less, VS); // VS for unordered.
+ less_cond = ARMFPCondition(kCondLT, compare->IsGtBias());
break;
}
default:
LOG(FATAL) << "Unexpected compare type " << type;
+ UNREACHABLE();
}
__ b(&done, EQ);
- __ b(&less, LO); // LO is for both: unsigned compare for longs and 'less than' for floats.
+ __ b(&less, less_cond);
__ Bind(&greater);
__ LoadImmediate(out, 1);
@@ -5530,7 +5483,7 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kUnresolvedCheck:
case TypeCheckKind::kInterfaceCheck: {
// Note that we indeed only call on slow path, but we always go
- // into the slow path for the unresolved & interface check
+ // into the slow path for the unresolved and interface check
// cases.
//
// We cannot directly call the InstanceofNonTrivial runtime
@@ -5740,8 +5693,8 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
case TypeCheckKind::kUnresolvedCheck:
case TypeCheckKind::kInterfaceCheck:
- // We always go into the type check slow path for the unresolved &
- // interface check cases.
+ // We always go into the type check slow path for the unresolved
+ // and interface check cases.
//
// We cannot directly call the CheckCast runtime entry point
// without resorting to a type checking slow path here (i.e. by
@@ -6027,6 +5980,7 @@ void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruct
new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, root, root);
codegen_->AddSlowPath(slow_path);
+ // IP = Thread::Current()->GetIsGcMarking()
__ LoadFromOffset(
kLoadWord, IP, TR, Thread::IsGcMarkingOffset<kArmWordSize>().Int32Value());
__ CompareAndBranchIfNonZero(IP, slow_path->GetEntryLabel());
@@ -6105,11 +6059,8 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
// }
//
// Note: the original implementation in ReadBarrier::Barrier is
- // slightly more complex as:
- // - it implements the load-load fence using a data dependency on
- // the high-bits of rb_state, which are expected to be all zeroes;
- // - it performs additional checks that we do not do here for
- // performance reasons.
+ // slightly more complex as it performs additional checks that we do
+ // not do here for performance reasons.
Register ref_reg = ref.AsRegister<Register>();
Register temp_reg = temp.AsRegister<Register>();
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 26d6d63b31..d45ea973f9 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -340,9 +340,7 @@ class CodeGeneratorARM : public CodeGenerator {
return GetLabelOf(block)->Position();
}
- void SetupBlockedRegisters(bool is_baseline) const OVERRIDE;
-
- Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
+ void SetupBlockedRegisters() const OVERRIDE;
Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
@@ -444,7 +442,7 @@ class CodeGeneratorARM : public CodeGenerator {
// Fast path implementation of ReadBarrier::Barrier for a heap
// reference field load when Baker's read barriers are used.
void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
- Location out,
+ Location ref,
Register obj,
uint32_t offset,
Location temp,
@@ -452,7 +450,7 @@ class CodeGeneratorARM : public CodeGenerator {
// Fast path implementation of ReadBarrier::Barrier for a heap
// reference array load when Baker's read barriers are used.
void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
- Location out,
+ Location ref,
Register obj,
uint32_t data_offset,
Location index,
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 6ed2c5ab38..c0e3959933 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -93,6 +93,24 @@ inline Condition ARM64Condition(IfCondition cond) {
UNREACHABLE();
}
+inline Condition ARM64FPCondition(IfCondition cond, bool gt_bias) {
+ // The ARM64 condition codes can express all the necessary branches, see the
+ // "Meaning (floating-point)" column in the table C1-1 in the ARMv8 reference manual.
+ // There is no dex instruction or HIR that would need the missing conditions
+ // "equal or unordered" or "not equal".
+ switch (cond) {
+ case kCondEQ: return eq;
+ case kCondNE: return ne /* unordered */;
+ case kCondLT: return gt_bias ? cc : lt /* unordered */;
+ case kCondLE: return gt_bias ? ls : le /* unordered */;
+ case kCondGT: return gt_bias ? hi /* unordered */ : gt;
+ case kCondGE: return gt_bias ? cs /* unordered */ : ge;
+ default:
+ LOG(FATAL) << "UNREACHABLE";
+ UNREACHABLE();
+ }
+}
+
Location ARM64ReturnLocation(Primitive::Type return_type) {
// Note that in practice, `LocationFrom(x0)` and `LocationFrom(w0)` create the
// same Location object, and so do `LocationFrom(d0)` and `LocationFrom(s0)`,
@@ -566,6 +584,56 @@ void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) {
}
}
+// Slow path marking an object during a read barrier.
+class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 {
+ public:
+ ReadBarrierMarkSlowPathARM64(HInstruction* instruction, Location out, Location obj)
+ : instruction_(instruction), out_(out), obj_(obj) {
+ DCHECK(kEmitCompilerReadBarrier);
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARM64"; }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
+ Primitive::Type type = Primitive::kPrimNot;
+ DCHECK(locations->CanCall());
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
+ DCHECK(instruction_->IsInstanceFieldGet() ||
+ instruction_->IsStaticFieldGet() ||
+ instruction_->IsArrayGet() ||
+ instruction_->IsLoadClass() ||
+ instruction_->IsLoadString() ||
+ instruction_->IsInstanceOf() ||
+ instruction_->IsCheckCast())
+ << "Unexpected instruction in read barrier marking slow path: "
+ << instruction_->DebugName();
+
+ __ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations);
+
+ InvokeRuntimeCallingConvention calling_convention;
+ CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
+ arm64_codegen->MoveLocation(LocationFrom(calling_convention.GetRegisterAt(0)), obj_, type);
+ arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierMark),
+ instruction_,
+ instruction_->GetDexPc(),
+ this);
+ CheckEntrypointTypes<kQuickReadBarrierMark, mirror::Object*, mirror::Object*>();
+ arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
+
+ RestoreLiveRegisters(codegen, locations);
+ __ B(GetExitLabel());
+ }
+
+ private:
+ HInstruction* const instruction_;
+ const Location out_;
+ const Location obj_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM64);
+};
+
// Slow path generating a read barrier for a heap reference.
class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
public:
@@ -587,7 +655,7 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
// to be instrumented, e.g.:
//
// __ Ldr(out, HeapOperand(out, class_offset);
- // codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset);
+ // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
//
// In that case, we have lost the information about the original
// object, and the emitted read barrier cannot work properly.
@@ -603,7 +671,9 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
DCHECK(!instruction_->IsInvoke() ||
(instruction_->IsInvokeStaticOrDirect() &&
- instruction_->GetLocations()->Intrinsified()));
+ instruction_->GetLocations()->Intrinsified()))
+ << "Unexpected instruction in read barrier for heap reference slow path: "
+ << instruction_->DebugName();
// The read barrier instrumentation does not support the
// HArm64IntermediateAddress instruction yet.
DCHECK(!(instruction_->IsArrayGet() &&
@@ -751,14 +821,18 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 {
public:
ReadBarrierForRootSlowPathARM64(HInstruction* instruction, Location out, Location root)
- : instruction_(instruction), out_(out), root_(root) {}
+ : instruction_(instruction), out_(out), root_(root) {
+ DCHECK(kEmitCompilerReadBarrier);
+ }
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
LocationSummary* locations = instruction_->GetLocations();
Primitive::Type type = Primitive::kPrimNot;
DCHECK(locations->CanCall());
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
- DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString());
+ DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
+ << "Unexpected instruction in read barrier for GC root slow path: "
+ << instruction_->DebugName();
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
@@ -1094,7 +1168,7 @@ void CodeGeneratorARM64::MarkGCCard(Register object, Register value, bool value_
}
}
-void CodeGeneratorARM64::SetupBlockedRegisters(bool is_baseline) const {
+void CodeGeneratorARM64::SetupBlockedRegisters() const {
// Blocked core registers:
// lr : Runtime reserved.
// tr : Runtime reserved.
@@ -1115,40 +1189,17 @@ void CodeGeneratorARM64::SetupBlockedRegisters(bool is_baseline) const {
blocked_fpu_registers_[reserved_fp_registers.PopLowestIndex().code()] = true;
}
- if (is_baseline) {
- CPURegList reserved_core_baseline_registers = callee_saved_core_registers;
- while (!reserved_core_baseline_registers.IsEmpty()) {
- blocked_core_registers_[reserved_core_baseline_registers.PopLowestIndex().code()] = true;
- }
- }
-
- if (is_baseline || GetGraph()->IsDebuggable()) {
+ if (GetGraph()->IsDebuggable()) {
// Stubs do not save callee-save floating point registers. If the graph
// is debuggable, we need to deal with these registers differently. For
// now, just block them.
- CPURegList reserved_fp_baseline_registers = callee_saved_fp_registers;
- while (!reserved_fp_baseline_registers.IsEmpty()) {
- blocked_fpu_registers_[reserved_fp_baseline_registers.PopLowestIndex().code()] = true;
+ CPURegList reserved_fp_registers_debuggable = callee_saved_fp_registers;
+ while (!reserved_fp_registers_debuggable.IsEmpty()) {
+ blocked_fpu_registers_[reserved_fp_registers_debuggable.PopLowestIndex().code()] = true;
}
}
}
-Location CodeGeneratorARM64::AllocateFreeRegister(Primitive::Type type) const {
- if (type == Primitive::kPrimVoid) {
- LOG(FATAL) << "Unreachable type " << type;
- }
-
- if (Primitive::IsFloatingPointType(type)) {
- ssize_t reg = FindFreeEntry(blocked_fpu_registers_, kNumberOfAllocatableFPRegisters);
- DCHECK_NE(reg, -1);
- return Location::FpuRegisterLocation(reg);
- } else {
- ssize_t reg = FindFreeEntry(blocked_core_registers_, kNumberOfAllocatableRegisters);
- DCHECK_NE(reg, -1);
- return Location::RegisterLocation(reg);
- }
-}
-
size_t CodeGeneratorARM64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize);
__ Str(reg, MemOperand(sp, stack_index));
@@ -1343,7 +1394,8 @@ void CodeGeneratorARM64::Load(Primitive::Type type,
void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction,
CPURegister dst,
- const MemOperand& src) {
+ const MemOperand& src,
+ bool needs_null_check) {
MacroAssembler* masm = GetVIXLAssembler();
BlockPoolsScope block_pools(masm);
UseScratchRegisterScope temps(masm);
@@ -1359,20 +1411,28 @@ void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction,
switch (type) {
case Primitive::kPrimBoolean:
__ Ldarb(Register(dst), base);
- MaybeRecordImplicitNullCheck(instruction);
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
break;
case Primitive::kPrimByte:
__ Ldarb(Register(dst), base);
- MaybeRecordImplicitNullCheck(instruction);
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
__ Sbfx(Register(dst), Register(dst), 0, Primitive::ComponentSize(type) * kBitsPerByte);
break;
case Primitive::kPrimChar:
__ Ldarh(Register(dst), base);
- MaybeRecordImplicitNullCheck(instruction);
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
break;
case Primitive::kPrimShort:
__ Ldarh(Register(dst), base);
- MaybeRecordImplicitNullCheck(instruction);
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
__ Sbfx(Register(dst), Register(dst), 0, Primitive::ComponentSize(type) * kBitsPerByte);
break;
case Primitive::kPrimInt:
@@ -1380,7 +1440,9 @@ void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction,
case Primitive::kPrimLong:
DCHECK_EQ(dst.Is64Bits(), Primitive::Is64BitType(type));
__ Ldar(Register(dst), base);
- MaybeRecordImplicitNullCheck(instruction);
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
break;
case Primitive::kPrimFloat:
case Primitive::kPrimDouble: {
@@ -1389,7 +1451,9 @@ void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction,
Register temp = dst.Is64Bits() ? temps.AcquireX() : temps.AcquireW();
__ Ldar(temp, base);
- MaybeRecordImplicitNullCheck(instruction);
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
__ Fmov(FPRegister(dst), temp);
break;
}
@@ -1510,7 +1574,7 @@ void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCod
__ Bind(slow_path->GetExitLabel());
}
-void InstructionCodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) {
+void CodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) {
BarrierType type = BarrierAll;
switch (kind) {
@@ -1646,33 +1710,62 @@ void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction) {
void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction,
const FieldInfo& field_info) {
DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
+ LocationSummary* locations = instruction->GetLocations();
+ Location base_loc = locations->InAt(0);
+ Location out = locations->Out();
+ uint32_t offset = field_info.GetFieldOffset().Uint32Value();
Primitive::Type field_type = field_info.GetFieldType();
BlockPoolsScope block_pools(GetVIXLAssembler());
MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), field_info.GetFieldOffset());
bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease();
- if (field_info.IsVolatile()) {
- if (use_acquire_release) {
- // NB: LoadAcquire will record the pc info if needed.
- codegen_->LoadAcquire(instruction, OutputCPURegister(instruction), field);
+ if (field_type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // Object FieldGet with Baker's read barrier case.
+ MacroAssembler* masm = GetVIXLAssembler();
+ UseScratchRegisterScope temps(masm);
+ // /* HeapReference<Object> */ out = *(base + offset)
+ Register base = RegisterFrom(base_loc, Primitive::kPrimNot);
+ Register temp = temps.AcquireW();
+ // Note that potential implicit null checks are handled in this
+ // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier call.
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ instruction,
+ out,
+ base,
+ offset,
+ temp,
+ /* needs_null_check */ true,
+ field_info.IsVolatile() && use_acquire_release);
+ if (field_info.IsVolatile() && !use_acquire_release) {
+ // For IRIW sequential consistency kLoadAny is not sufficient.
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+ }
+ } else {
+ // General case.
+ if (field_info.IsVolatile()) {
+ if (use_acquire_release) {
+ // Note that a potential implicit null check is handled in this
+ // CodeGeneratorARM64::LoadAcquire call.
+ // NB: LoadAcquire will record the pc info if needed.
+ codegen_->LoadAcquire(
+ instruction, OutputCPURegister(instruction), field, /* needs_null_check */ true);
+ } else {
+ codegen_->Load(field_type, OutputCPURegister(instruction), field);
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ // For IRIW sequential consistency kLoadAny is not sufficient.
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+ }
} else {
codegen_->Load(field_type, OutputCPURegister(instruction), field);
codegen_->MaybeRecordImplicitNullCheck(instruction);
- // For IRIW sequential consistency kLoadAny is not sufficient.
- GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
}
- } else {
- codegen_->Load(field_type, OutputCPURegister(instruction), field);
- codegen_->MaybeRecordImplicitNullCheck(instruction);
- }
-
- if (field_type == Primitive::kPrimNot) {
- LocationSummary* locations = instruction->GetLocations();
- Location base = locations->InAt(0);
- Location out = locations->Out();
- uint32_t offset = field_info.GetFieldOffset().Uint32Value();
- codegen_->MaybeGenerateReadBarrier(instruction, out, out, base, offset);
+ if (field_type == Primitive::kPrimNot) {
+ // If read barriers are enabled, emit read barriers other than
+ // Baker's using a slow path (and also unpoison the loaded
+ // reference, if heap poisoning is enabled).
+ codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
+ }
}
}
@@ -1718,10 +1811,10 @@ void InstructionCodeGeneratorARM64::HandleFieldSet(HInstruction* instruction,
codegen_->StoreRelease(field_type, source, HeapOperand(obj, offset));
codegen_->MaybeRecordImplicitNullCheck(instruction);
} else {
- GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
codegen_->Store(field_type, source, HeapOperand(obj, offset));
codegen_->MaybeRecordImplicitNullCheck(instruction);
- GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
}
} else {
codegen_->Store(field_type, source, HeapOperand(obj, offset));
@@ -2026,50 +2119,62 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) {
LocationSummary* locations = instruction->GetLocations();
Location index = locations->InAt(1);
uint32_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value();
- MemOperand source = HeapOperand(obj);
- CPURegister dest = OutputCPURegister(instruction);
+ Location out = locations->Out();
MacroAssembler* masm = GetVIXLAssembler();
UseScratchRegisterScope temps(masm);
// Block pools between `Load` and `MaybeRecordImplicitNullCheck`.
BlockPoolsScope block_pools(masm);
- if (index.IsConstant()) {
- offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type);
- source = HeapOperand(obj, offset);
+ if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // Object ArrayGet with Baker's read barrier case.
+ Register temp = temps.AcquireW();
+ // The read barrier instrumentation does not support the
+ // HArm64IntermediateAddress instruction yet.
+ DCHECK(!instruction->GetArray()->IsArm64IntermediateAddress());
+ // Note that a potential implicit null check is handled in the
+ // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call.
+ codegen_->GenerateArrayLoadWithBakerReadBarrier(
+ instruction, out, obj.W(), offset, index, temp, /* needs_null_check */ true);
} else {
- Register temp = temps.AcquireSameSizeAs(obj);
- if (instruction->GetArray()->IsArm64IntermediateAddress()) {
- // The read barrier instrumentation does not support the
- // HArm64IntermediateAddress instruction yet.
- DCHECK(!kEmitCompilerReadBarrier);
- // We do not need to compute the intermediate address from the array: the
- // input instruction has done it already. See the comment in
- // `InstructionSimplifierArm64::TryExtractArrayAccessAddress()`.
- if (kIsDebugBuild) {
- HArm64IntermediateAddress* tmp = instruction->GetArray()->AsArm64IntermediateAddress();
- DCHECK(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64() == offset);
- }
- temp = obj;
+ // General case.
+ MemOperand source = HeapOperand(obj);
+ if (index.IsConstant()) {
+ offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type);
+ source = HeapOperand(obj, offset);
} else {
- __ Add(temp, obj, offset);
+ Register temp = temps.AcquireSameSizeAs(obj);
+ if (instruction->GetArray()->IsArm64IntermediateAddress()) {
+ // The read barrier instrumentation does not support the
+ // HArm64IntermediateAddress instruction yet.
+ DCHECK(!kEmitCompilerReadBarrier);
+ // We do not need to compute the intermediate address from the array: the
+ // input instruction has done it already. See the comment in
+ // `InstructionSimplifierArm64::TryExtractArrayAccessAddress()`.
+ if (kIsDebugBuild) {
+ HArm64IntermediateAddress* tmp = instruction->GetArray()->AsArm64IntermediateAddress();
+ DCHECK_EQ(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64(), offset);
+ }
+ temp = obj;
+ } else {
+ __ Add(temp, obj, offset);
+ }
+ source = HeapOperand(temp, XRegisterFrom(index), LSL, Primitive::ComponentSizeShift(type));
}
- source = HeapOperand(temp, XRegisterFrom(index), LSL, Primitive::ComponentSizeShift(type));
- }
- codegen_->Load(type, dest, source);
- codegen_->MaybeRecordImplicitNullCheck(instruction);
+ codegen_->Load(type, OutputCPURegister(instruction), source);
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
- if (type == Primitive::kPrimNot) {
- static_assert(
- sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
- "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
- Location obj_loc = locations->InAt(0);
- Location out = locations->Out();
- if (index.IsConstant()) {
- codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset);
- } else {
- codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset, index);
+ if (type == Primitive::kPrimNot) {
+ static_assert(
+ sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+ Location obj_loc = locations->InAt(0);
+ if (index.IsConstant()) {
+ codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset);
+ } else {
+ codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset, index);
+ }
}
}
}
@@ -2199,12 +2304,12 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
// __ Mov(temp2, temp);
// // /* HeapReference<Class> */ temp = temp->component_type_
// __ Ldr(temp, HeapOperand(temp, component_offset));
- // codegen_->GenerateReadBarrier(
+ // codegen_->GenerateReadBarrierSlow(
// instruction, temp_loc, temp_loc, temp2_loc, component_offset);
//
// // /* HeapReference<Class> */ temp2 = value->klass_
// __ Ldr(temp2, HeapOperand(Register(value), class_offset));
- // codegen_->GenerateReadBarrier(
+ // codegen_->GenerateReadBarrierSlow(
// instruction, temp2_loc, temp2_loc, value_loc, class_offset, temp_loc);
//
// __ Cmp(temp, temp2);
@@ -2381,12 +2486,8 @@ void InstructionCodeGeneratorARM64::VisitCompare(HCompare* compare) {
} else {
__ Fcmp(left, InputFPRegisterAt(compare, 1));
}
- if (compare->IsGtBias()) {
- __ Cset(result, ne);
- } else {
- __ Csetm(result, ne);
- }
- __ Cneg(result, result, compare->IsGtBias() ? mi : gt);
+ __ Cset(result, ne);
+ __ Cneg(result, result, ARM64FPCondition(kCondLT, compare->IsGtBias()));
break;
}
default:
@@ -2422,7 +2523,6 @@ void InstructionCodeGeneratorARM64::HandleCondition(HCondition* instruction) {
LocationSummary* locations = instruction->GetLocations();
Register res = RegisterFrom(locations->Out(), instruction->GetType());
IfCondition if_cond = instruction->GetCondition();
- Condition arm64_cond = ARM64Condition(if_cond);
if (Primitive::IsFloatingPointType(instruction->InputAt(0)->GetType())) {
FPRegister lhs = InputFPRegisterAt(instruction, 0);
@@ -2433,20 +2533,13 @@ void InstructionCodeGeneratorARM64::HandleCondition(HCondition* instruction) {
} else {
__ Fcmp(lhs, InputFPRegisterAt(instruction, 1));
}
- __ Cset(res, arm64_cond);
- if (instruction->IsFPConditionTrueIfNaN()) {
- // res = IsUnordered(arm64_cond) ? 1 : res <=> res = IsNotUnordered(arm64_cond) ? res : 1
- __ Csel(res, res, Operand(1), vc); // VC for "not unordered".
- } else if (instruction->IsFPConditionFalseIfNaN()) {
- // res = IsUnordered(arm64_cond) ? 0 : res <=> res = IsNotUnordered(arm64_cond) ? res : 0
- __ Csel(res, res, Operand(0), vc); // VC for "not unordered".
- }
+ __ Cset(res, ARM64FPCondition(if_cond, instruction->IsGtBias()));
} else {
// Integer cases.
Register lhs = InputRegisterAt(instruction, 0);
Operand rhs = InputOperandAt(instruction, 1);
__ Cmp(lhs, rhs);
- __ Cset(res, arm64_cond);
+ __ Cset(res, ARM64Condition(if_cond));
}
}
@@ -2816,15 +2909,11 @@ void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruct
} else {
__ Fcmp(lhs, InputFPRegisterAt(condition, 1));
}
- if (condition->IsFPConditionTrueIfNaN()) {
- __ B(vs, true_target == nullptr ? &fallthrough_target : true_target);
- } else if (condition->IsFPConditionFalseIfNaN()) {
- __ B(vs, false_target == nullptr ? &fallthrough_target : false_target);
- }
if (true_target == nullptr) {
- __ B(ARM64Condition(condition->GetOppositeCondition()), false_target);
+ IfCondition opposite_condition = condition->GetOppositeCondition();
+ __ B(ARM64FPCondition(opposite_condition, condition->IsGtBias()), false_target);
} else {
- __ B(ARM64Condition(condition->GetCondition()), true_target);
+ __ B(ARM64FPCondition(condition->GetCondition(), condition->IsGtBias()), true_target);
}
} else {
// Integer cases.
@@ -2841,7 +2930,8 @@ void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruct
non_fallthrough_target = true_target;
}
- if ((arm64_cond != gt && arm64_cond != le) && rhs.IsImmediate() && (rhs.immediate() == 0)) {
+ if ((arm64_cond == eq || arm64_cond == ne || arm64_cond == lt || arm64_cond == ge) &&
+ rhs.IsImmediate() && (rhs.immediate() == 0)) {
switch (arm64_cond) {
case eq:
__ Cbz(lhs, non_fallthrough_target);
@@ -2942,6 +3032,14 @@ void InstructionCodeGeneratorARM64::VisitInstanceFieldSet(HInstanceFieldSet* ins
HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
}
+static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
+ return kEmitCompilerReadBarrier &&
+ (kUseBakerReadBarrier ||
+ type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+ type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+ type_check_kind == TypeCheckKind::kArrayObjectCheck);
+}
+
void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) {
LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
@@ -2968,21 +3066,22 @@ void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) {
locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
// When read barriers are enabled, we need a temporary register for
// some cases.
- if (kEmitCompilerReadBarrier &&
- (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
- type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
- type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+ if (TypeCheckNeedsATemporary(type_check_kind)) {
locations->AddTemp(Location::RequiresRegister());
}
}
void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
+ TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
LocationSummary* locations = instruction->GetLocations();
Location obj_loc = locations->InAt(0);
Register obj = InputRegisterAt(instruction, 0);
Register cls = InputRegisterAt(instruction, 1);
Location out_loc = locations->Out();
Register out = OutputRegister(instruction);
+ Location maybe_temp_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+ locations->GetTemp(0) :
+ Location::NoLocation();
uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
@@ -2998,10 +3097,9 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
}
// /* HeapReference<Class> */ out = obj->klass_
- __ Ldr(out, HeapOperand(obj.W(), class_offset));
- codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc);
- switch (instruction->GetTypeCheckKind()) {
+ switch (type_check_kind) {
case TypeCheckKind::kExactCheck: {
__ Cmp(out, cls);
__ Cset(out, eq);
@@ -3016,17 +3114,8 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
// object to avoid doing a comparison we know will fail.
vixl::Label loop, success;
__ Bind(&loop);
- Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
- if (kEmitCompilerReadBarrier) {
- // Save the value of `out` into `temp` before overwriting it
- // in the following move operation, as we will need it for the
- // read barrier below.
- Register temp = WRegisterFrom(temp_loc);
- __ Mov(temp, out);
- }
// /* HeapReference<Class> */ out = out->super_class_
- __ Ldr(out, HeapOperand(out, super_offset));
- codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
+ GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
// If `out` is null, we use it for the result, and jump to `done`.
__ Cbz(out, &done);
__ Cmp(out, cls);
@@ -3044,17 +3133,8 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
__ Bind(&loop);
__ Cmp(out, cls);
__ B(eq, &success);
- Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
- if (kEmitCompilerReadBarrier) {
- // Save the value of `out` into `temp` before overwriting it
- // in the following move operation, as we will need it for the
- // read barrier below.
- Register temp = WRegisterFrom(temp_loc);
- __ Mov(temp, out);
- }
// /* HeapReference<Class> */ out = out->super_class_
- __ Ldr(out, HeapOperand(out, super_offset));
- codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
+ GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
__ Cbnz(out, &loop);
// If `out` is null, we use it for the result, and jump to `done`.
__ B(&done);
@@ -3072,17 +3152,8 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
__ Cmp(out, cls);
__ B(eq, &exact_check);
// Otherwise, we need to check that the object's class is a non-primitive array.
- Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
- if (kEmitCompilerReadBarrier) {
- // Save the value of `out` into `temp` before overwriting it
- // in the following move operation, as we will need it for the
- // read barrier below.
- Register temp = WRegisterFrom(temp_loc);
- __ Mov(temp, out);
- }
// /* HeapReference<Class> */ out = out->component_type_
- __ Ldr(out, HeapOperand(out, component_offset));
- codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset);
+ GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, maybe_temp_loc);
// If `out` is null, we use it for the result, and jump to `done`.
__ Cbz(out, &done);
__ Ldrh(out, HeapOperand(out, primitive_offset));
@@ -3121,6 +3192,13 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
// HInstanceOf instruction (following the runtime calling
// convention), which might be cluttered by the potential first
// read barrier emission at the beginning of this method.
+ //
+ // TODO: Introduce a new runtime entry point taking the object
+ // to test (instead of its class) as argument, and let it deal
+ // with the read barrier issues. This will let us refactor this
+ // case of the `switch` code as it was previously (with a direct
+ // call to the runtime not using a type checking slow path).
+ // This should also be beneficial for the other cases above.
DCHECK(locations->OnlyCallsOnSlowPath());
slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction,
/* is_fatal */ false);
@@ -3173,30 +3251,29 @@ void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) {
locations->SetInAt(1, Location::RequiresRegister());
// Note that TypeCheckSlowPathARM64 uses this "temp" register too.
locations->AddTemp(Location::RequiresRegister());
- locations->AddTemp(Location::RequiresRegister());
// When read barriers are enabled, we need an additional temporary
// register for some cases.
- if (kEmitCompilerReadBarrier &&
- (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
- type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
- type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
- locations->AddTemp(Location::RequiresRegister());
+ if (TypeCheckNeedsATemporary(type_check_kind)) {
+ locations->AddTemp(Location::RequiresRegister());
}
}
void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
+ TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
LocationSummary* locations = instruction->GetLocations();
Location obj_loc = locations->InAt(0);
Register obj = InputRegisterAt(instruction, 0);
Register cls = InputRegisterAt(instruction, 1);
Location temp_loc = locations->GetTemp(0);
+ Location maybe_temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+ locations->GetTemp(1) :
+ Location::NoLocation();
Register temp = WRegisterFrom(temp_loc);
uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
- TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
bool is_type_check_slow_path_fatal =
(type_check_kind == TypeCheckKind::kExactCheck ||
type_check_kind == TypeCheckKind::kAbstractClassCheck ||
@@ -3215,8 +3292,7 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
}
// /* HeapReference<Class> */ temp = obj->klass_
- __ Ldr(temp, HeapOperand(obj, class_offset));
- codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
switch (type_check_kind) {
case TypeCheckKind::kExactCheck:
@@ -3233,18 +3309,8 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
// object to avoid doing a comparison we know will fail.
vixl::Label loop, compare_classes;
__ Bind(&loop);
- Location temp2_loc =
- kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
- if (kEmitCompilerReadBarrier) {
- // Save the value of `temp` into `temp2` before overwriting it
- // in the following move operation, as we will need it for the
- // read barrier below.
- Register temp2 = WRegisterFrom(temp2_loc);
- __ Mov(temp2, temp);
- }
// /* HeapReference<Class> */ temp = temp->super_class_
- __ Ldr(temp, HeapOperand(temp, super_offset));
- codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+ GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
// If the class reference currently in `temp` is not null, jump
// to the `compare_classes` label to compare it with the checked
@@ -3256,8 +3322,8 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
// going into the slow path, as it has been overwritten in the
// meantime.
// /* HeapReference<Class> */ temp = obj->klass_
- __ Ldr(temp, HeapOperand(obj, class_offset));
- codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(
+ instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
__ B(type_check_slow_path->GetEntryLabel());
__ Bind(&compare_classes);
@@ -3273,18 +3339,8 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
__ Cmp(temp, cls);
__ B(eq, &done);
- Location temp2_loc =
- kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
- if (kEmitCompilerReadBarrier) {
- // Save the value of `temp` into `temp2` before overwriting it
- // in the following move operation, as we will need it for the
- // read barrier below.
- Register temp2 = WRegisterFrom(temp2_loc);
- __ Mov(temp2, temp);
- }
// /* HeapReference<Class> */ temp = temp->super_class_
- __ Ldr(temp, HeapOperand(temp, super_offset));
- codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+ GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
// If the class reference currently in `temp` is not null, jump
// back at the beginning of the loop.
@@ -3295,8 +3351,8 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
// going into the slow path, as it has been overwritten in the
// meantime.
// /* HeapReference<Class> */ temp = obj->klass_
- __ Ldr(temp, HeapOperand(obj, class_offset));
- codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(
+ instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
__ B(type_check_slow_path->GetEntryLabel());
break;
}
@@ -3308,19 +3364,8 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
__ B(eq, &done);
// Otherwise, we need to check that the object's class is a non-primitive array.
- Location temp2_loc =
- kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
- if (kEmitCompilerReadBarrier) {
- // Save the value of `temp` into `temp2` before overwriting it
- // in the following move operation, as we will need it for the
- // read barrier below.
- Register temp2 = WRegisterFrom(temp2_loc);
- __ Mov(temp2, temp);
- }
// /* HeapReference<Class> */ temp = temp->component_type_
- __ Ldr(temp, HeapOperand(temp, component_offset));
- codegen_->MaybeGenerateReadBarrier(
- instruction, temp_loc, temp_loc, temp2_loc, component_offset);
+ GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, maybe_temp2_loc);
// If the component type is not null (i.e. the object is indeed
// an array), jump to label `check_non_primitive_component_type`
@@ -3333,8 +3378,8 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
// going into the slow path, as it has been overwritten in the
// meantime.
// /* HeapReference<Class> */ temp = obj->klass_
- __ Ldr(temp, HeapOperand(obj, class_offset));
- codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(
+ instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
__ B(type_check_slow_path->GetEntryLabel());
__ Bind(&check_non_primitive_component_type);
@@ -3343,8 +3388,8 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
__ Cbz(temp, &done);
// Same comment as above regarding `temp` and the slow path.
// /* HeapReference<Class> */ temp = obj->klass_
- __ Ldr(temp, HeapOperand(obj, class_offset));
- codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(
+ instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
__ B(type_check_slow_path->GetEntryLabel());
break;
}
@@ -3361,6 +3406,13 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
// instruction (following the runtime calling convention), which
// might be cluttered by the potential first read barrier
// emission at the beginning of this method.
+ //
+ // TODO: Introduce a new runtime entry point taking the object
+ // to test (instead of its class) as argument, and let it deal
+ // with the read barrier issues. This will let us refactor this
+ // case of the `switch` code as it was previously (with a direct
+ // call to the runtime not using a type checking slow path).
+ // This should also be beneficial for the other cases above.
__ B(type_check_slow_path->GetEntryLabel());
break;
}
@@ -3462,9 +3514,9 @@ void LocationsBuilderARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
}
void LocationsBuilderARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
- // When we do not run baseline, explicit clinit checks triggered by static
- // invokes must have been pruned by art::PrepareForRegisterAllocation.
- DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+ // Explicit clinit checks triggered by static invokes must have been pruned by
+ // art::PrepareForRegisterAllocation.
+ DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena());
if (intrinsic.TryDispatch(invoke)) {
@@ -3486,7 +3538,7 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM64* codege
HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM64::GetSupportedInvokeStaticOrDirectDispatch(
const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
MethodReference target_method ATTRIBUTE_UNUSED) {
- // On arm64 we support all dispatch types.
+ // On ARM64 we support all dispatch types.
return desired_dispatch_info;
}
@@ -3712,9 +3764,9 @@ vixl::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateMethodCodeLiteral(
void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
- // When we do not run baseline, explicit clinit checks triggered by static
- // invokes must have been pruned by art::PrepareForRegisterAllocation.
- DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+ // Explicit clinit checks triggered by static invokes must have been pruned by
+ // art::PrepareForRegisterAllocation.
+ DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
if (TryGenerateIntrinsicCode(invoke, codegen_)) {
return;
@@ -3763,32 +3815,17 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) {
if (cls->IsReferrersClass()) {
DCHECK(!cls->CanCallRuntime());
DCHECK(!cls->MustGenerateClinitCheck());
- uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
- if (kEmitCompilerReadBarrier) {
- // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
- __ Add(out.X(), current_method.X(), declaring_class_offset);
- // /* mirror::Class* */ out = out->Read()
- codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
- } else {
- // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
- __ Ldr(out, MemOperand(current_method, declaring_class_offset));
- }
+ // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+ GenerateGcRootFieldLoad(
+ cls, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
} else {
MemberOffset resolved_types_offset = ArtMethod::DexCacheResolvedTypesOffset(kArm64PointerSize);
// /* GcRoot<mirror::Class>[] */ out =
// current_method.ptr_sized_fields_->dex_cache_resolved_types_
__ Ldr(out.X(), MemOperand(current_method, resolved_types_offset.Int32Value()));
-
- size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex());
- if (kEmitCompilerReadBarrier) {
- // /* GcRoot<mirror::Class>* */ out = &out[type_index]
- __ Add(out.X(), out.X(), cache_offset);
- // /* mirror::Class* */ out = out->Read()
- codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
- } else {
- // /* GcRoot<mirror::Class> */ out = out[type_index]
- __ Ldr(out, MemOperand(out.X(), cache_offset));
- }
+ // /* GcRoot<mirror::Class> */ out = out[type_index]
+ GenerateGcRootFieldLoad(
+ cls, out_loc, out.X(), CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
DCHECK(cls->CanCallRuntime());
@@ -3851,30 +3888,14 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) {
Register out = OutputRegister(load);
Register current_method = InputRegisterAt(load, 0);
- uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
- if (kEmitCompilerReadBarrier) {
- // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
- __ Add(out.X(), current_method.X(), declaring_class_offset);
- // /* mirror::Class* */ out = out->Read()
- codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
- } else {
- // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
- __ Ldr(out, MemOperand(current_method, declaring_class_offset));
- }
-
+ // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+ GenerateGcRootFieldLoad(
+ load, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
// /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
__ Ldr(out.X(), HeapOperand(out, mirror::Class::DexCacheStringsOffset().Uint32Value()));
-
- size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex());
- if (kEmitCompilerReadBarrier) {
- // /* GcRoot<mirror::String>* */ out = &out[string_index]
- __ Add(out.X(), out.X(), cache_offset);
- // /* mirror::String* */ out = out->Read()
- codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
- } else {
- // /* GcRoot<mirror::String> */ out = out[string_index]
- __ Ldr(out, MemOperand(out.X(), cache_offset));
- }
+ // /* GcRoot<mirror::String> */ out = out[string_index]
+ GenerateGcRootFieldLoad(
+ load, out_loc, out.X(), CodeGenerator::GetCacheOffset(load->GetStringIndex()));
if (!load->IsInDexCache()) {
SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load);
@@ -4243,7 +4264,7 @@ void LocationsBuilderARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
}
void InstructionCodeGeneratorARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
- GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
+ codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
}
void LocationsBuilderARM64::VisitReturn(HReturn* instruction) {
@@ -4628,14 +4649,288 @@ void InstructionCodeGeneratorARM64::VisitPackedSwitch(HPackedSwitch* switch_inst
}
}
-void CodeGeneratorARM64::GenerateReadBarrier(HInstruction* instruction,
- Location out,
- Location ref,
- Location obj,
- uint32_t offset,
- Location index) {
+void InstructionCodeGeneratorARM64::GenerateReferenceLoadOneRegister(HInstruction* instruction,
+ Location out,
+ uint32_t offset,
+ Location maybe_temp) {
+ Primitive::Type type = Primitive::kPrimNot;
+ Register out_reg = RegisterFrom(out, type);
+ if (kEmitCompilerReadBarrier) {
+ Register temp_reg = RegisterFrom(maybe_temp, type);
+ if (kUseBakerReadBarrier) {
+ // Load with fast path based Baker's read barrier.
+ // /* HeapReference<Object> */ out = *(out + offset)
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+ out,
+ out_reg,
+ offset,
+ temp_reg,
+ /* needs_null_check */ false,
+ /* use_load_acquire */ false);
+ } else {
+ // Load with slow path based read barrier.
+ // Save the value of `out` into `maybe_temp` before overwriting it
+ // in the following move operation, as we will need it for the
+ // read barrier below.
+ __ Mov(temp_reg, out_reg);
+ // /* HeapReference<Object> */ out = *(out + offset)
+ __ Ldr(out_reg, HeapOperand(out_reg, offset));
+ codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
+ }
+ } else {
+ // Plain load with no read barrier.
+ // /* HeapReference<Object> */ out = *(out + offset)
+ __ Ldr(out_reg, HeapOperand(out_reg, offset));
+ GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
+ }
+}
+
+void InstructionCodeGeneratorARM64::GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+ Location out,
+ Location obj,
+ uint32_t offset,
+ Location maybe_temp) {
+ Primitive::Type type = Primitive::kPrimNot;
+ Register out_reg = RegisterFrom(out, type);
+ Register obj_reg = RegisterFrom(obj, type);
+ if (kEmitCompilerReadBarrier) {
+ if (kUseBakerReadBarrier) {
+ // Load with fast path based Baker's read barrier.
+ Register temp_reg = RegisterFrom(maybe_temp, type);
+ // /* HeapReference<Object> */ out = *(obj + offset)
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+ out,
+ obj_reg,
+ offset,
+ temp_reg,
+ /* needs_null_check */ false,
+ /* use_load_acquire */ false);
+ } else {
+ // Load with slow path based read barrier.
+ // /* HeapReference<Object> */ out = *(obj + offset)
+ __ Ldr(out_reg, HeapOperand(obj_reg, offset));
+ codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
+ }
+ } else {
+ // Plain load with no read barrier.
+ // /* HeapReference<Object> */ out = *(obj + offset)
+ __ Ldr(out_reg, HeapOperand(obj_reg, offset));
+ GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
+ }
+}
+
+void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(HInstruction* instruction,
+ Location root,
+ vixl::Register obj,
+ uint32_t offset) {
+ Register root_reg = RegisterFrom(root, Primitive::kPrimNot);
+ if (kEmitCompilerReadBarrier) {
+ if (kUseBakerReadBarrier) {
+ // Fast path implementation of art::ReadBarrier::BarrierForRoot when
+ // Baker's read barrier are used:
+ //
+ // root = obj.field;
+ // if (Thread::Current()->GetIsGcMarking()) {
+ // root = ReadBarrier::Mark(root)
+ // }
+
+ // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+ __ Ldr(root_reg, MemOperand(obj, offset));
+ static_assert(
+ sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
+ "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
+ "have different sizes.");
+ static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::CompressedReference<mirror::Object> and int32_t "
+ "have different sizes.");
+
+ // Slow path used to mark the GC root `root`.
+ SlowPathCodeARM64* slow_path =
+ new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, root, root);
+ codegen_->AddSlowPath(slow_path);
+
+ MacroAssembler* masm = GetVIXLAssembler();
+ UseScratchRegisterScope temps(masm);
+ Register temp = temps.AcquireW();
+ // temp = Thread::Current()->GetIsGcMarking()
+ __ Ldr(temp, MemOperand(tr, Thread::IsGcMarkingOffset<kArm64WordSize>().Int32Value()));
+ __ Cbnz(temp, slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+ } else {
+ // GC root loaded through a slow path for read barriers other
+ // than Baker's.
+ // /* GcRoot<mirror::Object>* */ root = obj + offset
+ __ Add(root_reg.X(), obj.X(), offset);
+ // /* mirror::Object* */ root = root->Read()
+ codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
+ }
+ } else {
+ // Plain GC root load with no read barrier.
+ // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+ __ Ldr(root_reg, MemOperand(obj, offset));
+ // Note that GC roots are not affected by heap poisoning, thus we
+ // do not have to unpoison `root_reg` here.
+ }
+}
+
+void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ vixl::Register obj,
+ uint32_t offset,
+ Register temp,
+ bool needs_null_check,
+ bool use_load_acquire) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+
+ // /* HeapReference<Object> */ ref = *(obj + offset)
+ Location no_index = Location::NoLocation();
+ GenerateReferenceLoadWithBakerReadBarrier(
+ instruction, ref, obj, offset, no_index, temp, needs_null_check, use_load_acquire);
+}
+
+void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ vixl::Register obj,
+ uint32_t data_offset,
+ Location index,
+ Register temp,
+ bool needs_null_check) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+
+ // Array cells are never volatile variables, therefore array loads
+ // never use Load-Acquire instructions on ARM64.
+ const bool use_load_acquire = false;
+
+ // /* HeapReference<Object> */ ref =
+ // *(obj + data_offset + index * sizeof(HeapReference<Object>))
+ GenerateReferenceLoadWithBakerReadBarrier(
+ instruction, ref, obj, data_offset, index, temp, needs_null_check, use_load_acquire);
+}
+
+void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ vixl::Register obj,
+ uint32_t offset,
+ Location index,
+ Register temp,
+ bool needs_null_check,
+ bool use_load_acquire) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+ // If `index` is a valid location, then we are emitting an array
+ // load, so we shouldn't be using a Load Acquire instruction.
+ // In other words: `index.IsValid()` => `!use_load_acquire`.
+ DCHECK(!index.IsValid() || !use_load_acquire);
+
+ MacroAssembler* masm = GetVIXLAssembler();
+ UseScratchRegisterScope temps(masm);
+
+ // In slow path based read barriers, the read barrier call is
+ // inserted after the original load. However, in fast path based
+ // Baker's read barriers, we need to perform the load of
+ // mirror::Object::monitor_ *before* the original reference load.
+ // This load-load ordering is required by the read barrier.
+ // The fast path/slow path (for Baker's algorithm) should look like:
+ //
+ // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // HeapReference<Object> ref = *src; // Original reference load.
+ // bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+ // if (is_gray) {
+ // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path.
+ // }
+ //
+ // Note: the original implementation in ReadBarrier::Barrier is
+ // slightly more complex as it performs additional checks that we do
+ // not do here for performance reasons.
+
+ Primitive::Type type = Primitive::kPrimNot;
+ Register ref_reg = RegisterFrom(ref, type);
+ DCHECK(obj.IsW());
+ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+
+ // /* int32_t */ monitor = obj->monitor_
+ __ Ldr(temp, HeapOperand(obj, monitor_offset));
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
+ // /* LockWord */ lock_word = LockWord(monitor)
+ static_assert(sizeof(LockWord) == sizeof(int32_t),
+ "art::LockWord and int32_t have different sizes.");
+ // /* uint32_t */ rb_state = lock_word.ReadBarrierState()
+ __ Lsr(temp, temp, LockWord::kReadBarrierStateShift);
+ __ And(temp, temp, Operand(LockWord::kReadBarrierStateMask));
+ static_assert(
+ LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_,
+ "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_.");
+
+ // Introduce a dependency on the high bits of rb_state, which shall
+ // be all zeroes, to prevent load-load reordering, and without using
+ // a memory barrier (which would be more expensive).
+ // temp2 = rb_state & ~LockWord::kReadBarrierStateMask = 0
+ Register temp2 = temps.AcquireW();
+ __ Bic(temp2, temp, Operand(LockWord::kReadBarrierStateMask));
+ // obj is unchanged by this operation, but its value now depends on
+ // temp2, which depends on temp.
+ __ Add(obj, obj, Operand(temp2));
+ temps.Release(temp2);
+
+ // The actual reference load.
+ if (index.IsValid()) {
+ static_assert(
+ sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+ temp2 = temps.AcquireW();
+ // /* HeapReference<Object> */ ref =
+ // *(obj + offset + index * sizeof(HeapReference<Object>))
+ MemOperand source = HeapOperand(obj);
+ if (index.IsConstant()) {
+ uint32_t computed_offset =
+ offset + (Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type));
+ source = HeapOperand(obj, computed_offset);
+ } else {
+ __ Add(temp2, obj, offset);
+ source = HeapOperand(temp2, XRegisterFrom(index), LSL, Primitive::ComponentSizeShift(type));
+ }
+ Load(type, ref_reg, source);
+ temps.Release(temp2);
+ } else {
+ // /* HeapReference<Object> */ ref = *(obj + offset)
+ MemOperand field = HeapOperand(obj, offset);
+ if (use_load_acquire) {
+ LoadAcquire(instruction, ref_reg, field, /* needs_null_check */ false);
+ } else {
+ Load(type, ref_reg, field);
+ }
+ }
+
+ // Object* ref = ref_addr->AsMirrorPtr()
+ GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
+
+ // Slow path used to mark the object `ref` when it is gray.
+ SlowPathCodeARM64* slow_path =
+ new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, ref, ref);
+ AddSlowPath(slow_path);
+
+ // if (rb_state == ReadBarrier::gray_ptr_)
+ // ref = ReadBarrier::Mark(ref);
+ __ Cmp(temp, ReadBarrier::gray_ptr_);
+ __ B(eq, slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorARM64::GenerateReadBarrierSlow(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index) {
DCHECK(kEmitCompilerReadBarrier);
+ // Insert a slow path based read barrier *after* the reference load.
+ //
// If heap poisoning is enabled, the unpoisoning of the loaded
// reference will be carried out by the runtime within the slow
// path.
@@ -4649,57 +4944,41 @@ void CodeGeneratorARM64::GenerateReadBarrier(HInstruction* instruction,
ReadBarrierForHeapReferenceSlowPathARM64(instruction, out, ref, obj, offset, index);
AddSlowPath(slow_path);
- // TODO: When read barrier has a fast path, add it here.
- /* Currently the read barrier call is inserted after the original load.
- * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the
- * original load. This load-load ordering is required by the read barrier.
- * The fast path/slow path (for Baker's algorithm) should look like:
- *
- * bool isGray = obj.LockWord & kReadBarrierMask;
- * lfence; // load fence or artificial data dependence to prevent load-load reordering
- * ref = obj.field; // this is the original load
- * if (isGray) {
- * ref = Mark(ref); // ideally the slow path just does Mark(ref)
- * }
- */
-
__ B(slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
}
-void CodeGeneratorARM64::MaybeGenerateReadBarrier(HInstruction* instruction,
- Location out,
- Location ref,
- Location obj,
- uint32_t offset,
- Location index) {
+void CodeGeneratorARM64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index) {
if (kEmitCompilerReadBarrier) {
+ // Baker's read barriers shall be handled by the fast path
+ // (CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier).
+ DCHECK(!kUseBakerReadBarrier);
// If heap poisoning is enabled, unpoisoning will be taken care of
// by the runtime within the slow path.
- GenerateReadBarrier(instruction, out, ref, obj, offset, index);
+ GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
} else if (kPoisonHeapReferences) {
GetAssembler()->UnpoisonHeapReference(WRegisterFrom(out));
}
}
-void CodeGeneratorARM64::GenerateReadBarrierForRoot(HInstruction* instruction,
- Location out,
- Location root) {
+void CodeGeneratorARM64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
+ Location out,
+ Location root) {
DCHECK(kEmitCompilerReadBarrier);
+ // Insert a slow path based read barrier *after* the GC root load.
+ //
// Note that GC roots are not affected by heap poisoning, so we do
// not need to do anything special for this here.
SlowPathCodeARM64* slow_path =
new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathARM64(instruction, out, root);
AddSlowPath(slow_path);
- // TODO: Implement a fast path for ReadBarrierForRoot, performing
- // the following operation (for Baker's algorithm):
- //
- // if (thread.tls32_.is_gc_marking) {
- // root = Mark(root);
- // }
-
__ B(slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
}
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index f2ff89488e..a9d1bbde98 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -208,14 +208,53 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator {
private:
void GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path, vixl::Register class_reg);
- void GenerateMemoryBarrier(MemBarrierKind kind);
void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor);
void HandleBinaryOp(HBinaryOperation* instr);
+
void HandleFieldSet(HInstruction* instruction,
const FieldInfo& field_info,
bool value_can_be_null);
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
void HandleCondition(HCondition* instruction);
+
+ // Generate a heap reference load using one register `out`:
+ //
+ // out <- *(out + offset)
+ //
+ // while honoring heap poisoning and/or read barriers (if any).
+ //
+ // Location `maybe_temp` is used when generating a read barrier and
+ // shall be a register in that case; it may be an invalid location
+ // otherwise.
+ void GenerateReferenceLoadOneRegister(HInstruction* instruction,
+ Location out,
+ uint32_t offset,
+ Location maybe_temp);
+ // Generate a heap reference load using two different registers
+ // `out` and `obj`:
+ //
+ // out <- *(obj + offset)
+ //
+ // while honoring heap poisoning and/or read barriers (if any).
+ //
+ // Location `maybe_temp` is used when generating a Baker's (fast
+ // path) read barrier and shall be a register in that case; it may
+ // be an invalid location otherwise.
+ void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+ Location out,
+ Location obj,
+ uint32_t offset,
+ Location maybe_temp);
+ // Generate a GC root reference load:
+ //
+ // root <- *(obj + offset)
+ //
+ // while honoring read barriers (if any).
+ void GenerateGcRootFieldLoad(HInstruction* instruction,
+ Location root,
+ vixl::Register obj,
+ uint32_t offset);
+
void HandleShift(HBinaryOperation* instr);
void GenerateImplicitNullCheck(HNullCheck* instruction);
void GenerateExplicitNullCheck(HNullCheck* instruction);
@@ -337,12 +376,11 @@ class CodeGeneratorARM64 : public CodeGenerator {
// Emit a write barrier.
void MarkGCCard(vixl::Register object, vixl::Register value, bool value_can_be_null);
+ void GenerateMemoryBarrier(MemBarrierKind kind);
+
// Register allocation.
- void SetupBlockedRegisters(bool is_baseline) const OVERRIDE;
- // AllocateFreeRegister() is only used when allocating registers locally
- // during CompileBaseline().
- Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
+ void SetupBlockedRegisters() const OVERRIDE;
Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
@@ -389,9 +427,12 @@ class CodeGeneratorARM64 : public CodeGenerator {
void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE;
void Load(Primitive::Type type, vixl::CPURegister dst, const vixl::MemOperand& src);
- void Store(Primitive::Type type, vixl::CPURegister rt, const vixl::MemOperand& dst);
- void LoadAcquire(HInstruction* instruction, vixl::CPURegister dst, const vixl::MemOperand& src);
- void StoreRelease(Primitive::Type type, vixl::CPURegister rt, const vixl::MemOperand& dst);
+ void Store(Primitive::Type type, vixl::CPURegister src, const vixl::MemOperand& dst);
+ void LoadAcquire(HInstruction* instruction,
+ vixl::CPURegister dst,
+ const vixl::MemOperand& src,
+ bool needs_null_check);
+ void StoreRelease(Primitive::Type type, vixl::CPURegister src, const vixl::MemOperand& dst);
// Generate code to invoke a runtime entry point.
void InvokeRuntime(QuickEntrypointEnum entrypoint,
@@ -426,7 +467,27 @@ class CodeGeneratorARM64 : public CodeGenerator {
void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
- // Generate a read barrier for a heap reference within `instruction`.
+ // Fast path implementation of ReadBarrier::Barrier for a heap
+ // reference field load when Baker's read barriers are used.
+ void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ vixl::Register obj,
+ uint32_t offset,
+ vixl::Register temp,
+ bool needs_null_check,
+ bool use_load_acquire);
+ // Fast path implementation of ReadBarrier::Barrier for a heap
+ // reference array load when Baker's read barriers are used.
+ void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ vixl::Register obj,
+ uint32_t data_offset,
+ Location index,
+ vixl::Register temp,
+ bool needs_null_check);
+
+ // Generate a read barrier for a heap reference within `instruction`
+ // using a slow path.
//
// A read barrier for an object reference read from the heap is
// implemented as a call to the artReadBarrierSlow runtime entry
@@ -443,23 +504,25 @@ class CodeGeneratorARM64 : public CodeGenerator {
// When `index` is provided (i.e. for array accesses), the offset
// value passed to artReadBarrierSlow is adjusted to take `index`
// into account.
- void GenerateReadBarrier(HInstruction* instruction,
- Location out,
- Location ref,
- Location obj,
- uint32_t offset,
- Location index = Location::NoLocation());
-
- // If read barriers are enabled, generate a read barrier for a heap reference.
- // If heap poisoning is enabled, also unpoison the reference in `out`.
- void MaybeGenerateReadBarrier(HInstruction* instruction,
- Location out,
- Location ref,
- Location obj,
- uint32_t offset,
- Location index = Location::NoLocation());
-
- // Generate a read barrier for a GC root within `instruction`.
+ void GenerateReadBarrierSlow(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index = Location::NoLocation());
+
+ // If read barriers are enabled, generate a read barrier for a heap
+ // reference using a slow path. If heap poisoning is enabled, also
+ // unpoison the reference in `out`.
+ void MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index = Location::NoLocation());
+
+ // Generate a read barrier for a GC root within `instruction` using
+ // a slow path.
//
// A read barrier for an object reference GC root is implemented as
// a call to the artReadBarrierForRootSlow runtime entry point,
@@ -469,9 +532,20 @@ class CodeGeneratorARM64 : public CodeGenerator {
//
// The `out` location contains the value returned by
// artReadBarrierForRootSlow.
- void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root);
+ void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root);
private:
+ // Factored implementation of GenerateFieldLoadWithBakerReadBarrier
+ // and GenerateArrayLoadWithBakerReadBarrier.
+ void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ vixl::Register obj,
+ uint32_t offset,
+ Location index,
+ vixl::Register temp,
+ bool needs_null_check,
+ bool use_load_acquire);
+
using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::Literal<uint64_t>*>;
using MethodToLiteralMap = ArenaSafeMap<MethodReference,
vixl::Literal<uint64_t>*,
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index e34767cecd..5bd136a3f0 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -1042,7 +1042,7 @@ void CodeGeneratorMIPS::MarkGCCard(Register object, Register value) {
__ Bind(&done);
}
-void CodeGeneratorMIPS::SetupBlockedRegisters(bool is_baseline) const {
+void CodeGeneratorMIPS::SetupBlockedRegisters() const {
// Don't allocate the dalvik style register pair passing.
blocked_register_pairs_[A1_A2] = true;
@@ -1072,16 +1072,6 @@ void CodeGeneratorMIPS::SetupBlockedRegisters(bool is_baseline) const {
blocked_fpu_registers_[i] = true;
}
- if (is_baseline) {
- for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
- blocked_core_registers_[kCoreCalleeSaves[i]] = true;
- }
-
- for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
- blocked_fpu_registers_[kFpuCalleeSaves[i]] = true;
- }
- }
-
UpdateBlockedPairRegisters();
}
@@ -1096,52 +1086,6 @@ void CodeGeneratorMIPS::UpdateBlockedPairRegisters() const {
}
}
-Location CodeGeneratorMIPS::AllocateFreeRegister(Primitive::Type type) const {
- switch (type) {
- case Primitive::kPrimLong: {
- size_t reg = FindFreeEntry(blocked_register_pairs_, kNumberOfRegisterPairs);
- MipsManagedRegister pair =
- MipsManagedRegister::FromRegisterPair(static_cast<RegisterPair>(reg));
- DCHECK(!blocked_core_registers_[pair.AsRegisterPairLow()]);
- DCHECK(!blocked_core_registers_[pair.AsRegisterPairHigh()]);
-
- blocked_core_registers_[pair.AsRegisterPairLow()] = true;
- blocked_core_registers_[pair.AsRegisterPairHigh()] = true;
- UpdateBlockedPairRegisters();
- return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh());
- }
-
- case Primitive::kPrimByte:
- case Primitive::kPrimBoolean:
- case Primitive::kPrimChar:
- case Primitive::kPrimShort:
- case Primitive::kPrimInt:
- case Primitive::kPrimNot: {
- int reg = FindFreeEntry(blocked_core_registers_, kNumberOfCoreRegisters);
- // Block all register pairs that contain `reg`.
- for (int i = 0; i < kNumberOfRegisterPairs; i++) {
- MipsManagedRegister current =
- MipsManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i));
- if (current.AsRegisterPairLow() == reg || current.AsRegisterPairHigh() == reg) {
- blocked_register_pairs_[i] = true;
- }
- }
- return Location::RegisterLocation(reg);
- }
-
- case Primitive::kPrimFloat:
- case Primitive::kPrimDouble: {
- int reg = FindFreeEntry(blocked_fpu_registers_, kNumberOfFRegisters);
- return Location::FpuRegisterLocation(reg);
- }
-
- case Primitive::kPrimVoid:
- LOG(FATAL) << "Unreachable type " << type;
- }
-
- UNREACHABLE();
-}
-
size_t CodeGeneratorMIPS::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
__ StoreToOffset(kStoreWord, Register(reg_id), SP, stack_index);
return kMipsWordSize;
@@ -3835,9 +3779,9 @@ void LocationsBuilderMIPS::VisitInvokeVirtual(HInvokeVirtual* invoke) {
}
void LocationsBuilderMIPS::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
- // When we do not run baseline, explicit clinit checks triggered by static
- // invokes must have been pruned by art::PrepareForRegisterAllocation.
- DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+ // Explicit clinit checks triggered by static invokes must have been pruned by
+ // art::PrepareForRegisterAllocation.
+ DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
IntrinsicLocationsBuilderMIPS intrinsic(codegen_);
if (intrinsic.TryDispatch(invoke)) {
@@ -3973,9 +3917,9 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke
}
void InstructionCodeGeneratorMIPS::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
- // When we do not run baseline, explicit clinit checks triggered by static
- // invokes must have been pruned by art::PrepareForRegisterAllocation.
- DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+ // Explicit clinit checks triggered by static invokes must have been pruned by
+ // art::PrepareForRegisterAllocation.
+ DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
if (TryGenerateIntrinsicCode(invoke, codegen_)) {
return;
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index c3d4851ee9..2cde0ed90b 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -290,10 +290,7 @@ class CodeGeneratorMIPS : public CodeGenerator {
// Register allocation.
- void SetupBlockedRegisters(bool is_baseline) const OVERRIDE;
- // AllocateFreeRegister() is only used when allocating registers locally
- // during CompileBaseline().
- Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
+ void SetupBlockedRegisters() const OVERRIDE;
Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 79cd56d698..05054867fe 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -979,7 +979,7 @@ void CodeGeneratorMIPS64::MarkGCCard(GpuRegister object, GpuRegister value) {
__ Bind(&done);
}
-void CodeGeneratorMIPS64::SetupBlockedRegisters(bool is_baseline ATTRIBUTE_UNUSED) const {
+void CodeGeneratorMIPS64::SetupBlockedRegisters() const {
// ZERO, K0, K1, GP, SP, RA are always reserved and can't be allocated.
blocked_core_registers_[ZERO] = true;
blocked_core_registers_[K0] = true;
@@ -1003,8 +1003,7 @@ void CodeGeneratorMIPS64::SetupBlockedRegisters(bool is_baseline ATTRIBUTE_UNUSE
// TODO: review; anything else?
- // TODO: make these two for's conditional on is_baseline once
- // all the issues with register saving/restoring are sorted out.
+ // TODO: remove once all the issues with register saving/restoring are sorted out.
for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
blocked_core_registers_[kCoreCalleeSaves[i]] = true;
}
@@ -1014,20 +1013,6 @@ void CodeGeneratorMIPS64::SetupBlockedRegisters(bool is_baseline ATTRIBUTE_UNUSE
}
}
-Location CodeGeneratorMIPS64::AllocateFreeRegister(Primitive::Type type) const {
- if (type == Primitive::kPrimVoid) {
- LOG(FATAL) << "Unreachable type " << type;
- }
-
- if (Primitive::IsFloatingPointType(type)) {
- size_t reg = FindFreeEntry(blocked_fpu_registers_, kNumberOfFpuRegisters);
- return Location::FpuRegisterLocation(reg);
- } else {
- size_t reg = FindFreeEntry(blocked_core_registers_, kNumberOfGpuRegisters);
- return Location::RegisterLocation(reg);
- }
-}
-
size_t CodeGeneratorMIPS64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
__ StoreToOffset(kStoreDoubleword, GpuRegister(reg_id), SP, stack_index);
return kMips64WordSize;
@@ -3031,9 +3016,9 @@ void LocationsBuilderMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
}
void LocationsBuilderMIPS64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
- // When we do not run baseline, explicit clinit checks triggered by static
- // invokes must have been pruned by art::PrepareForRegisterAllocation.
- DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+ // Explicit clinit checks triggered by static invokes must have been pruned by
+ // art::PrepareForRegisterAllocation.
+ DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
IntrinsicLocationsBuilderMIPS64 intrinsic(codegen_);
if (intrinsic.TryDispatch(invoke)) {
@@ -3182,9 +3167,9 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo
}
void InstructionCodeGeneratorMIPS64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
- // When we do not run baseline, explicit clinit checks triggered by static
- // invokes must have been pruned by art::PrepareForRegisterAllocation.
- DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+ // Explicit clinit checks triggered by static invokes must have been pruned by
+ // art::PrepareForRegisterAllocation.
+ DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
if (TryGenerateIntrinsicCode(invoke, codegen_)) {
return;
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index 7182e8e987..140ff95f14 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -289,10 +289,7 @@ class CodeGeneratorMIPS64 : public CodeGenerator {
// Register allocation.
- void SetupBlockedRegisters(bool is_baseline) const OVERRIDE;
- // AllocateFreeRegister() is only used when allocating registers locally
- // during CompileBaseline().
- Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
+ void SetupBlockedRegisters() const OVERRIDE;
Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 6259acded3..f7ccdd8b8f 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -817,65 +817,13 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph,
AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
}
-Location CodeGeneratorX86::AllocateFreeRegister(Primitive::Type type) const {
- switch (type) {
- case Primitive::kPrimLong: {
- size_t reg = FindFreeEntry(blocked_register_pairs_, kNumberOfRegisterPairs);
- X86ManagedRegister pair =
- X86ManagedRegister::FromRegisterPair(static_cast<RegisterPair>(reg));
- DCHECK(!blocked_core_registers_[pair.AsRegisterPairLow()]);
- DCHECK(!blocked_core_registers_[pair.AsRegisterPairHigh()]);
- blocked_core_registers_[pair.AsRegisterPairLow()] = true;
- blocked_core_registers_[pair.AsRegisterPairHigh()] = true;
- UpdateBlockedPairRegisters();
- return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh());
- }
-
- case Primitive::kPrimByte:
- case Primitive::kPrimBoolean:
- case Primitive::kPrimChar:
- case Primitive::kPrimShort:
- case Primitive::kPrimInt:
- case Primitive::kPrimNot: {
- Register reg = static_cast<Register>(
- FindFreeEntry(blocked_core_registers_, kNumberOfCpuRegisters));
- // Block all register pairs that contain `reg`.
- for (int i = 0; i < kNumberOfRegisterPairs; i++) {
- X86ManagedRegister current =
- X86ManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i));
- if (current.AsRegisterPairLow() == reg || current.AsRegisterPairHigh() == reg) {
- blocked_register_pairs_[i] = true;
- }
- }
- return Location::RegisterLocation(reg);
- }
-
- case Primitive::kPrimFloat:
- case Primitive::kPrimDouble: {
- return Location::FpuRegisterLocation(
- FindFreeEntry(blocked_fpu_registers_, kNumberOfXmmRegisters));
- }
-
- case Primitive::kPrimVoid:
- LOG(FATAL) << "Unreachable type " << type;
- }
-
- return Location::NoLocation();
-}
-
-void CodeGeneratorX86::SetupBlockedRegisters(bool is_baseline) const {
+void CodeGeneratorX86::SetupBlockedRegisters() const {
// Don't allocate the dalvik style register pair passing.
blocked_register_pairs_[ECX_EDX] = true;
// Stack register is always reserved.
blocked_core_registers_[ESP] = true;
- if (is_baseline) {
- blocked_core_registers_[EBP] = true;
- blocked_core_registers_[ESI] = true;
- blocked_core_registers_[EDI] = true;
- }
-
UpdateBlockedPairRegisters();
}
@@ -1981,9 +1929,9 @@ void InstructionCodeGeneratorX86::VisitInvokeUnresolved(HInvokeUnresolved* invok
}
void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
- // When we do not run baseline, explicit clinit checks triggered by static
- // invokes must have been pruned by art::PrepareForRegisterAllocation.
- DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+ // Explicit clinit checks triggered by static invokes must have been pruned by
+ // art::PrepareForRegisterAllocation.
+ DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
IntrinsicLocationsBuilderX86 intrinsic(codegen_);
if (intrinsic.TryDispatch(invoke)) {
@@ -1999,17 +1947,6 @@ void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invok
if (invoke->HasPcRelativeDexCache()) {
invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
}
-
- if (codegen_->IsBaseline()) {
- // Baseline does not have enough registers if the current method also
- // needs a register. We therefore do not require a register for it, and let
- // the code generation of the invoke handle it.
- LocationSummary* locations = invoke->GetLocations();
- Location location = locations->InAt(invoke->GetSpecialInputIndex());
- if (location.IsUnallocated() && location.GetPolicy() == Location::kRequiresRegister) {
- locations->SetInAt(invoke->GetSpecialInputIndex(), Location::NoLocation());
- }
- }
}
static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86* codegen) {
@@ -2022,9 +1959,9 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86* codegen)
}
void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
- // When we do not run baseline, explicit clinit checks triggered by static
- // invokes must have been pruned by art::PrepareForRegisterAllocation.
- DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+ // Explicit clinit checks triggered by static invokes must have been pruned by
+ // art::PrepareForRegisterAllocation.
+ DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
if (TryGenerateIntrinsicCode(invoke, codegen_)) {
return;
@@ -4286,7 +4223,7 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
if (current_method.IsRegister()) {
method_reg = current_method.AsRegister<Register>();
} else {
- DCHECK(IsBaseline() || invoke->GetLocations()->Intrinsified());
+ DCHECK(invoke->GetLocations()->Intrinsified());
DCHECK(!current_method.IsValid());
method_reg = reg;
__ movl(reg, Address(ESP, kCurrentMethodStackOffset));
@@ -5076,11 +5013,6 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
}
void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) {
- // This location builder might end up asking to up to four registers, which is
- // not currently possible for baseline. The situation in which we need four
- // registers cannot be met by baseline though, because it has not run any
- // optimization.
-
Primitive::Type value_type = instruction->GetComponentType();
bool needs_write_barrier =
@@ -6077,7 +6009,7 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kUnresolvedCheck:
case TypeCheckKind::kInterfaceCheck: {
// Note that we indeed only call on slow path, but we always go
- // into the slow path for the unresolved & interface check
+ // into the slow path for the unresolved and interface check
// cases.
//
// We cannot directly call the InstanceofNonTrivial runtime
@@ -6308,8 +6240,8 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
case TypeCheckKind::kUnresolvedCheck:
case TypeCheckKind::kInterfaceCheck:
- // We always go into the type check slow path for the unresolved &
- // interface check cases.
+ // We always go into the type check slow path for the unresolved
+ // and interface check cases.
//
// We cannot directly call the CheckCast runtime entry point
// without resorting to a type checking slow path here (i.e. by
@@ -6588,6 +6520,8 @@ void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(HInstruction* instruct
// Plain GC root load with no read barrier.
// /* GcRoot<mirror::Object> */ root = *(obj + offset)
__ movl(root_reg, Address(obj, offset));
+ // Note that GC roots are not affected by heap poisoning, thus we
+ // do not have to unpoison `root_reg` here.
}
}
@@ -6650,7 +6584,9 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
// Note: the original implementation in ReadBarrier::Barrier is
// slightly more complex as:
// - it implements the load-load fence using a data dependency on
- // the high-bits of rb_state, which are expected to be all zeroes;
+ // the high-bits of rb_state, which are expected to be all zeroes
+ // (we use CodeGeneratorX86::GenerateMemoryBarrier instead here,
+ // which is a no-op thanks to the x86 memory model);
// - it performs additional checks that we do not do here for
// performance reasons.
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index c65c423eae..43e9543e41 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -359,9 +359,7 @@ class CodeGeneratorX86 : public CodeGenerator {
return GetLabelOf(block)->Position();
}
- void SetupBlockedRegisters(bool is_baseline) const OVERRIDE;
-
- Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
+ void SetupBlockedRegisters() const OVERRIDE;
Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
@@ -453,7 +451,7 @@ class CodeGeneratorX86 : public CodeGenerator {
// Fast path implementation of ReadBarrier::Barrier for a heap
// reference field load when Baker's read barriers are used.
void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
- Location out,
+ Location ref,
Register obj,
uint32_t offset,
Location temp,
@@ -461,7 +459,7 @@ class CodeGeneratorX86 : public CodeGenerator {
// Fast path implementation of ReadBarrier::Barrier for a heap
// reference array load when Baker's read barriers are used.
void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
- Location out,
+ Location ref,
Register obj,
uint32_t data_offset,
Location index,
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index e024ce2b6c..2ce2d91502 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -1002,47 +1002,12 @@ InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph,
assembler_(codegen->GetAssembler()),
codegen_(codegen) {}
-Location CodeGeneratorX86_64::AllocateFreeRegister(Primitive::Type type) const {
- switch (type) {
- case Primitive::kPrimLong:
- case Primitive::kPrimByte:
- case Primitive::kPrimBoolean:
- case Primitive::kPrimChar:
- case Primitive::kPrimShort:
- case Primitive::kPrimInt:
- case Primitive::kPrimNot: {
- size_t reg = FindFreeEntry(blocked_core_registers_, kNumberOfCpuRegisters);
- return Location::RegisterLocation(reg);
- }
-
- case Primitive::kPrimFloat:
- case Primitive::kPrimDouble: {
- size_t reg = FindFreeEntry(blocked_fpu_registers_, kNumberOfFloatRegisters);
- return Location::FpuRegisterLocation(reg);
- }
-
- case Primitive::kPrimVoid:
- LOG(FATAL) << "Unreachable type " << type;
- }
-
- return Location::NoLocation();
-}
-
-void CodeGeneratorX86_64::SetupBlockedRegisters(bool is_baseline) const {
+void CodeGeneratorX86_64::SetupBlockedRegisters() const {
// Stack register is always reserved.
blocked_core_registers_[RSP] = true;
// Block the register used as TMP.
blocked_core_registers_[TMP] = true;
-
- if (is_baseline) {
- for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
- blocked_core_registers_[kCoreCalleeSaves[i]] = true;
- }
- for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
- blocked_fpu_registers_[kFpuCalleeSaves[i]] = true;
- }
- }
}
static dwarf::Reg DWARFReg(Register reg) {
@@ -2161,9 +2126,9 @@ void InstructionCodeGeneratorX86_64::VisitInvokeUnresolved(HInvokeUnresolved* in
}
void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
- // When we do not run baseline, explicit clinit checks triggered by static
- // invokes must have been pruned by art::PrepareForRegisterAllocation.
- DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+ // Explicit clinit checks triggered by static invokes must have been pruned by
+ // art::PrepareForRegisterAllocation.
+ DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
if (intrinsic.TryDispatch(invoke)) {
@@ -2183,9 +2148,9 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86_64* codeg
}
void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
- // When we do not run baseline, explicit clinit checks triggered by static
- // invokes must have been pruned by art::PrepareForRegisterAllocation.
- DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
+ // Explicit clinit checks triggered by static invokes must have been pruned by
+ // art::PrepareForRegisterAllocation.
+ DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
if (TryGenerateIntrinsicCode(invoke, codegen_)) {
return;
@@ -4698,13 +4663,13 @@ void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) {
bool needs_write_barrier =
CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
- bool may_need_runtime_call = instruction->NeedsTypeCheck();
+ bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
bool object_array_set_with_read_barrier =
kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot);
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
instruction,
- (may_need_runtime_call || object_array_set_with_read_barrier) ?
+ (may_need_runtime_call_for_type_check || object_array_set_with_read_barrier) ?
LocationSummary::kCallOnSlowPath :
LocationSummary::kNoCall);
@@ -4733,7 +4698,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
Location index = locations->InAt(1);
Location value = locations->InAt(2);
Primitive::Type value_type = instruction->GetComponentType();
- bool may_need_runtime_call = instruction->NeedsTypeCheck();
+ bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
bool needs_write_barrier =
CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
@@ -4785,7 +4750,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
__ movl(address, Immediate(0));
codegen_->MaybeRecordImplicitNullCheck(instruction);
DCHECK(!needs_write_barrier);
- DCHECK(!may_need_runtime_call);
+ DCHECK(!may_need_runtime_call_for_type_check);
break;
}
@@ -4794,7 +4759,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
NearLabel done, not_null, do_put;
SlowPathCode* slow_path = nullptr;
CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
- if (may_need_runtime_call) {
+ if (may_need_runtime_call_for_type_check) {
slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathX86_64(instruction);
codegen_->AddSlowPath(slow_path);
if (instruction->GetValueCanBeNull()) {
@@ -4872,7 +4837,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
} else {
__ movl(address, register_value);
}
- if (!may_need_runtime_call) {
+ if (!may_need_runtime_call_for_type_check) {
codegen_->MaybeRecordImplicitNullCheck(instruction);
}
@@ -5661,7 +5626,7 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kUnresolvedCheck:
case TypeCheckKind::kInterfaceCheck: {
// Note that we indeed only call on slow path, but we always go
- // into the slow path for the unresolved & interface check
+ // into the slow path for the unresolved and interface check
// cases.
//
// We cannot directly call the InstanceofNonTrivial runtime
@@ -5892,8 +5857,8 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
case TypeCheckKind::kUnresolvedCheck:
case TypeCheckKind::kInterfaceCheck:
- // We always go into the type check slow path for the unresolved &
- // interface check cases.
+ // We always go into the type check slow path for the unresolved
+ // and interface check cases.
//
// We cannot directly call the CheckCast runtime entry point
// without resorting to a type checking slow path here (i.e. by
@@ -6155,6 +6120,8 @@ void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(HInstruction* instr
// Plain GC root load with no read barrier.
// /* GcRoot<mirror::Object> */ root = *(obj + offset)
__ movl(root_reg, Address(obj, offset));
+ // Note that GC roots are not affected by heap poisoning, thus we
+ // do not have to unpoison `root_reg` here.
}
}
@@ -6217,7 +6184,9 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction
// Note: the original implementation in ReadBarrier::Barrier is
// slightly more complex as:
// - it implements the load-load fence using a data dependency on
- // the high-bits of rb_state, which are expected to be all zeroes;
+ // the high-bits of rb_state, which are expected to be all zeroes
+ // (we use CodeGeneratorX86_64::GenerateMemoryBarrier instead
+ // here, which is a no-op thanks to the x86-64 memory model);
// - it performs additional checks that we do not do here for
// performance reasons.
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 505c9dcdad..82aabb04d3 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -347,8 +347,7 @@ class CodeGeneratorX86_64 : public CodeGenerator {
Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
- void SetupBlockedRegisters(bool is_baseline) const OVERRIDE;
- Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
+ void SetupBlockedRegisters() const OVERRIDE;
void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
void Finalize(CodeAllocator* allocator) OVERRIDE;
@@ -401,7 +400,7 @@ class CodeGeneratorX86_64 : public CodeGenerator {
// Fast path implementation of ReadBarrier::Barrier for a heap
// reference field load when Baker's read barriers are used.
void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
- Location out,
+ Location ref,
CpuRegister obj,
uint32_t offset,
Location temp,
@@ -409,7 +408,7 @@ class CodeGeneratorX86_64 : public CodeGenerator {
// Fast path implementation of ReadBarrier::Barrier for a heap
// reference array load when Baker's read barriers are used.
void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
- Location out,
+ Location ref,
CpuRegister obj,
uint32_t data_offset,
Location index,
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index d970704368..19d63de499 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -40,6 +40,7 @@
#include "dex_file.h"
#include "dex_instruction.h"
#include "driver/compiler_options.h"
+#include "graph_checker.h"
#include "nodes.h"
#include "optimizing_unit_test.h"
#include "prepare_for_register_allocation.h"
@@ -70,8 +71,8 @@ class TestCodeGeneratorARM : public arm::CodeGeneratorARM {
AddAllocatedRegister(Location::RegisterLocation(arm::R7));
}
- void SetupBlockedRegisters(bool is_baseline) const OVERRIDE {
- arm::CodeGeneratorARM::SetupBlockedRegisters(is_baseline);
+ void SetupBlockedRegisters() const OVERRIDE {
+ arm::CodeGeneratorARM::SetupBlockedRegisters();
blocked_core_registers_[arm::R4] = true;
blocked_core_registers_[arm::R6] = false;
blocked_core_registers_[arm::R7] = false;
@@ -90,8 +91,8 @@ class TestCodeGeneratorX86 : public x86::CodeGeneratorX86 {
AddAllocatedRegister(Location::RegisterLocation(x86::EDI));
}
- void SetupBlockedRegisters(bool is_baseline) const OVERRIDE {
- x86::CodeGeneratorX86::SetupBlockedRegisters(is_baseline);
+ void SetupBlockedRegisters() const OVERRIDE {
+ x86::CodeGeneratorX86::SetupBlockedRegisters();
// ebx is a callee-save register in C, but caller-save for ART.
blocked_core_registers_[x86::EBX] = true;
blocked_register_pairs_[x86::EAX_EBX] = true;
@@ -200,259 +201,228 @@ static void Run(const InternalCodeAllocator& allocator,
}
template <typename Expected>
-static void RunCodeBaseline(InstructionSet target_isa,
- HGraph* graph,
- bool has_result,
- Expected expected) {
- InternalCodeAllocator allocator;
-
- CompilerOptions compiler_options;
- std::unique_ptr<const X86InstructionSetFeatures> features_x86(
- X86InstructionSetFeatures::FromCppDefines());
- TestCodeGeneratorX86 codegenX86(graph, *features_x86.get(), compiler_options);
- // We avoid doing a stack overflow check that requires the runtime being setup,
- // by making sure the compiler knows the methods we are running are leaf methods.
- codegenX86.CompileBaseline(&allocator, true);
- if (target_isa == kX86) {
- Run(allocator, codegenX86, has_result, expected);
- }
+static void RunCode(CodeGenerator* codegen,
+ HGraph* graph,
+ std::function<void(HGraph*)> hook_before_codegen,
+ bool has_result,
+ Expected expected) {
+ ASSERT_TRUE(graph->IsInSsaForm());
- std::unique_ptr<const ArmInstructionSetFeatures> features_arm(
- ArmInstructionSetFeatures::FromCppDefines());
- TestCodeGeneratorARM codegenARM(graph, *features_arm.get(), compiler_options);
- codegenARM.CompileBaseline(&allocator, true);
- if (target_isa == kArm || target_isa == kThumb2) {
- Run(allocator, codegenARM, has_result, expected);
- }
-
- std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64(
- X86_64InstructionSetFeatures::FromCppDefines());
- x86_64::CodeGeneratorX86_64 codegenX86_64(graph, *features_x86_64.get(), compiler_options);
- codegenX86_64.CompileBaseline(&allocator, true);
- if (target_isa == kX86_64) {
- Run(allocator, codegenX86_64, has_result, expected);
- }
-
- std::unique_ptr<const Arm64InstructionSetFeatures> features_arm64(
- Arm64InstructionSetFeatures::FromCppDefines());
- arm64::CodeGeneratorARM64 codegenARM64(graph, *features_arm64.get(), compiler_options);
- codegenARM64.CompileBaseline(&allocator, true);
- if (target_isa == kArm64) {
- Run(allocator, codegenARM64, has_result, expected);
- }
-
- std::unique_ptr<const MipsInstructionSetFeatures> features_mips(
- MipsInstructionSetFeatures::FromCppDefines());
- mips::CodeGeneratorMIPS codegenMIPS(graph, *features_mips.get(), compiler_options);
- codegenMIPS.CompileBaseline(&allocator, true);
- if (kRuntimeISA == kMips) {
- Run(allocator, codegenMIPS, has_result, expected);
- }
-
- std::unique_ptr<const Mips64InstructionSetFeatures> features_mips64(
- Mips64InstructionSetFeatures::FromCppDefines());
- mips64::CodeGeneratorMIPS64 codegenMIPS64(graph, *features_mips64.get(), compiler_options);
- codegenMIPS64.CompileBaseline(&allocator, true);
- if (target_isa == kMips64) {
- Run(allocator, codegenMIPS64, has_result, expected);
- }
-}
+ SSAChecker graph_checker(graph);
+ graph_checker.Run();
+ ASSERT_TRUE(graph_checker.IsValid());
-template <typename Expected>
-static void RunCodeOptimized(CodeGenerator* codegen,
- HGraph* graph,
- std::function<void(HGraph*)> hook_before_codegen,
- bool has_result,
- Expected expected) {
- // Tests may have already computed it.
- if (graph->GetReversePostOrder().empty()) {
- graph->BuildDominatorTree();
- }
SsaLivenessAnalysis liveness(graph, codegen);
- liveness.Analyze();
- RegisterAllocator register_allocator(graph->GetArena(), codegen, liveness);
- register_allocator.AllocateRegisters();
+ PrepareForRegisterAllocation(graph).Run();
+ liveness.Analyze();
+ RegisterAllocator(graph->GetArena(), codegen, liveness).AllocateRegisters();
hook_before_codegen(graph);
InternalCodeAllocator allocator;
- codegen->CompileOptimized(&allocator);
+ codegen->Compile(&allocator);
Run(allocator, *codegen, has_result, expected);
}
template <typename Expected>
-static void RunCodeOptimized(InstructionSet target_isa,
- HGraph* graph,
- std::function<void(HGraph*)> hook_before_codegen,
- bool has_result,
- Expected expected) {
+static void RunCode(InstructionSet target_isa,
+ HGraph* graph,
+ std::function<void(HGraph*)> hook_before_codegen,
+ bool has_result,
+ Expected expected) {
CompilerOptions compiler_options;
if (target_isa == kArm || target_isa == kThumb2) {
std::unique_ptr<const ArmInstructionSetFeatures> features_arm(
ArmInstructionSetFeatures::FromCppDefines());
TestCodeGeneratorARM codegenARM(graph, *features_arm.get(), compiler_options);
- RunCodeOptimized(&codegenARM, graph, hook_before_codegen, has_result, expected);
+ RunCode(&codegenARM, graph, hook_before_codegen, has_result, expected);
} else if (target_isa == kArm64) {
std::unique_ptr<const Arm64InstructionSetFeatures> features_arm64(
Arm64InstructionSetFeatures::FromCppDefines());
arm64::CodeGeneratorARM64 codegenARM64(graph, *features_arm64.get(), compiler_options);
- RunCodeOptimized(&codegenARM64, graph, hook_before_codegen, has_result, expected);
+ RunCode(&codegenARM64, graph, hook_before_codegen, has_result, expected);
} else if (target_isa == kX86) {
std::unique_ptr<const X86InstructionSetFeatures> features_x86(
X86InstructionSetFeatures::FromCppDefines());
x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), compiler_options);
- RunCodeOptimized(&codegenX86, graph, hook_before_codegen, has_result, expected);
+ RunCode(&codegenX86, graph, hook_before_codegen, has_result, expected);
} else if (target_isa == kX86_64) {
std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64(
X86_64InstructionSetFeatures::FromCppDefines());
x86_64::CodeGeneratorX86_64 codegenX86_64(graph, *features_x86_64.get(), compiler_options);
- RunCodeOptimized(&codegenX86_64, graph, hook_before_codegen, has_result, expected);
+ RunCode(&codegenX86_64, graph, hook_before_codegen, has_result, expected);
} else if (target_isa == kMips) {
std::unique_ptr<const MipsInstructionSetFeatures> features_mips(
MipsInstructionSetFeatures::FromCppDefines());
mips::CodeGeneratorMIPS codegenMIPS(graph, *features_mips.get(), compiler_options);
- RunCodeOptimized(&codegenMIPS, graph, hook_before_codegen, has_result, expected);
+ RunCode(&codegenMIPS, graph, hook_before_codegen, has_result, expected);
} else if (target_isa == kMips64) {
std::unique_ptr<const Mips64InstructionSetFeatures> features_mips64(
Mips64InstructionSetFeatures::FromCppDefines());
mips64::CodeGeneratorMIPS64 codegenMIPS64(graph, *features_mips64.get(), compiler_options);
- RunCodeOptimized(&codegenMIPS64, graph, hook_before_codegen, has_result, expected);
+ RunCode(&codegenMIPS64, graph, hook_before_codegen, has_result, expected);
}
}
-static void TestCode(InstructionSet target_isa,
- const uint16_t* data,
+static ::std::vector<InstructionSet> GetTargetISAs() {
+ ::std::vector<InstructionSet> v;
+ // Add all ISAs that are executable on hardware or on simulator.
+ const ::std::vector<InstructionSet> executable_isa_candidates = {
+ kArm,
+ kArm64,
+ kThumb2,
+ kX86,
+ kX86_64,
+ kMips,
+ kMips64
+ };
+
+ for (auto target_isa : executable_isa_candidates) {
+ if (CanExecute(target_isa)) {
+ v.push_back(target_isa);
+ }
+ }
+
+ return v;
+}
+
+static void TestCode(const uint16_t* data,
bool has_result = false,
int32_t expected = 0) {
- ArenaPool pool;
- ArenaAllocator arena(&pool);
- HGraph* graph = CreateGraph(&arena);
- HGraphBuilder builder(graph);
- const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
- bool graph_built = builder.BuildGraph(*item);
- ASSERT_TRUE(graph_built);
- // Remove suspend checks, they cannot be executed in this context.
- RemoveSuspendChecks(graph);
- RunCodeBaseline(target_isa, graph, has_result, expected);
-}
-
-static void TestCodeLong(InstructionSet target_isa,
- const uint16_t* data,
+ for (InstructionSet target_isa : GetTargetISAs()) {
+ ArenaPool pool;
+ ArenaAllocator arena(&pool);
+ HGraph* graph = CreateGraph(&arena);
+ HGraphBuilder builder(graph);
+ const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
+ bool graph_built = builder.BuildGraph(*item);
+ ASSERT_TRUE(graph_built);
+ // Remove suspend checks, they cannot be executed in this context.
+ RemoveSuspendChecks(graph);
+ TransformToSsa(graph);
+ RunCode(target_isa, graph, [](HGraph*) {}, has_result, expected);
+ }
+}
+
+static void TestCodeLong(const uint16_t* data,
bool has_result,
int64_t expected) {
- ArenaPool pool;
- ArenaAllocator arena(&pool);
- HGraph* graph = CreateGraph(&arena);
- HGraphBuilder builder(graph, Primitive::kPrimLong);
- const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
- bool graph_built = builder.BuildGraph(*item);
- ASSERT_TRUE(graph_built);
- // Remove suspend checks, they cannot be executed in this context.
- RemoveSuspendChecks(graph);
- RunCodeBaseline(target_isa, graph, has_result, expected);
+ for (InstructionSet target_isa : GetTargetISAs()) {
+ ArenaPool pool;
+ ArenaAllocator arena(&pool);
+ HGraph* graph = CreateGraph(&arena);
+ HGraphBuilder builder(graph, Primitive::kPrimLong);
+ const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
+ bool graph_built = builder.BuildGraph(*item);
+ ASSERT_TRUE(graph_built);
+ // Remove suspend checks, they cannot be executed in this context.
+ RemoveSuspendChecks(graph);
+ TransformToSsa(graph);
+ RunCode(target_isa, graph, [](HGraph*) {}, has_result, expected);
+ }
}
-class CodegenTest: public ::testing::TestWithParam<InstructionSet> {};
+class CodegenTest : public CommonCompilerTest {};
-TEST_P(CodegenTest, ReturnVoid) {
+TEST_F(CodegenTest, ReturnVoid) {
const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(Instruction::RETURN_VOID);
- TestCode(GetParam(), data);
+ TestCode(data);
}
-TEST_P(CodegenTest, CFG1) {
+TEST_F(CodegenTest, CFG1) {
const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
Instruction::GOTO | 0x100,
Instruction::RETURN_VOID);
- TestCode(GetParam(), data);
+ TestCode(data);
}
-TEST_P(CodegenTest, CFG2) {
+TEST_F(CodegenTest, CFG2) {
const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
Instruction::GOTO | 0x100,
Instruction::GOTO | 0x100,
Instruction::RETURN_VOID);
- TestCode(GetParam(), data);
+ TestCode(data);
}
-TEST_P(CodegenTest, CFG3) {
+TEST_F(CodegenTest, CFG3) {
const uint16_t data1[] = ZERO_REGISTER_CODE_ITEM(
Instruction::GOTO | 0x200,
Instruction::RETURN_VOID,
Instruction::GOTO | 0xFF00);
- TestCode(GetParam(), data1);
+ TestCode(data1);
const uint16_t data2[] = ZERO_REGISTER_CODE_ITEM(
Instruction::GOTO_16, 3,
Instruction::RETURN_VOID,
Instruction::GOTO_16, 0xFFFF);
- TestCode(GetParam(), data2);
+ TestCode(data2);
const uint16_t data3[] = ZERO_REGISTER_CODE_ITEM(
Instruction::GOTO_32, 4, 0,
Instruction::RETURN_VOID,
Instruction::GOTO_32, 0xFFFF, 0xFFFF);
- TestCode(GetParam(), data3);
+ TestCode(data3);
}
-TEST_P(CodegenTest, CFG4) {
+TEST_F(CodegenTest, CFG4) {
const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(
Instruction::RETURN_VOID,
Instruction::GOTO | 0x100,
Instruction::GOTO | 0xFE00);
- TestCode(GetParam(), data);
+ TestCode(data);
}
-TEST_P(CodegenTest, CFG5) {
+TEST_F(CodegenTest, CFG5) {
const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::IF_EQ, 3,
Instruction::GOTO | 0x100,
Instruction::RETURN_VOID);
- TestCode(GetParam(), data);
+ TestCode(data);
}
-TEST_P(CodegenTest, IntConstant) {
+TEST_F(CodegenTest, IntConstant) {
const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::RETURN_VOID);
- TestCode(GetParam(), data);
+ TestCode(data);
}
-TEST_P(CodegenTest, Return1) {
+TEST_F(CodegenTest, Return1) {
const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::RETURN | 0);
- TestCode(GetParam(), data, true, 0);
+ TestCode(data, true, 0);
}
-TEST_P(CodegenTest, Return2) {
+TEST_F(CodegenTest, Return2) {
const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::CONST_4 | 0 | 1 << 8,
Instruction::RETURN | 1 << 8);
- TestCode(GetParam(), data, true, 0);
+ TestCode(data, true, 0);
}
-TEST_P(CodegenTest, Return3) {
+TEST_F(CodegenTest, Return3) {
const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::CONST_4 | 1 << 8 | 1 << 12,
Instruction::RETURN | 1 << 8);
- TestCode(GetParam(), data, true, 1);
+ TestCode(data, true, 1);
}
-TEST_P(CodegenTest, ReturnIf1) {
+TEST_F(CodegenTest, ReturnIf1) {
const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::CONST_4 | 1 << 8 | 1 << 12,
@@ -460,10 +430,10 @@ TEST_P(CodegenTest, ReturnIf1) {
Instruction::RETURN | 0 << 8,
Instruction::RETURN | 1 << 8);
- TestCode(GetParam(), data, true, 1);
+ TestCode(data, true, 1);
}
-TEST_P(CodegenTest, ReturnIf2) {
+TEST_F(CodegenTest, ReturnIf2) {
const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
Instruction::CONST_4 | 1 << 8 | 1 << 12,
@@ -471,12 +441,12 @@ TEST_P(CodegenTest, ReturnIf2) {
Instruction::RETURN | 0 << 8,
Instruction::RETURN | 1 << 8);
- TestCode(GetParam(), data, true, 0);
+ TestCode(data, true, 0);
}
// Exercise bit-wise (one's complement) not-int instruction.
#define NOT_INT_TEST(TEST_NAME, INPUT, EXPECTED_OUTPUT) \
-TEST_P(CodegenTest, TEST_NAME) { \
+TEST_F(CodegenTest, TEST_NAME) { \
const int32_t input = INPUT; \
const uint16_t input_lo = Low16Bits(input); \
const uint16_t input_hi = High16Bits(input); \
@@ -485,7 +455,7 @@ TEST_P(CodegenTest, TEST_NAME) { \
Instruction::NOT_INT | 1 << 8 | 0 << 12 , \
Instruction::RETURN | 1 << 8); \
\
- TestCode(GetParam(), data, true, EXPECTED_OUTPUT); \
+ TestCode(data, true, EXPECTED_OUTPUT); \
}
NOT_INT_TEST(ReturnNotIntMinus2, -2, 1)
@@ -501,7 +471,7 @@ NOT_INT_TEST(ReturnNotIntINT32_MAX, 2147483647, -2147483648) // -(2^31)
// Exercise bit-wise (one's complement) not-long instruction.
#define NOT_LONG_TEST(TEST_NAME, INPUT, EXPECTED_OUTPUT) \
-TEST_P(CodegenTest, TEST_NAME) { \
+TEST_F(CodegenTest, TEST_NAME) { \
const int64_t input = INPUT; \
const uint16_t word0 = Low16Bits(Low32Bits(input)); /* LSW. */ \
const uint16_t word1 = High16Bits(Low32Bits(input)); \
@@ -512,7 +482,7 @@ TEST_P(CodegenTest, TEST_NAME) { \
Instruction::NOT_LONG | 2 << 8 | 0 << 12, \
Instruction::RETURN_WIDE | 2 << 8); \
\
- TestCodeLong(GetParam(), data, true, EXPECTED_OUTPUT); \
+ TestCodeLong(data, true, EXPECTED_OUTPUT); \
}
NOT_LONG_TEST(ReturnNotLongMinus2, INT64_C(-2), INT64_C(1))
@@ -551,7 +521,7 @@ NOT_LONG_TEST(ReturnNotLongINT64_MAX,
#undef NOT_LONG_TEST
-TEST_P(CodegenTest, IntToLongOfLongToInt) {
+TEST_F(CodegenTest, IntToLongOfLongToInt) {
const int64_t input = INT64_C(4294967296); // 2^32
const uint16_t word0 = Low16Bits(Low32Bits(input)); // LSW.
const uint16_t word1 = High16Bits(Low32Bits(input));
@@ -565,192 +535,146 @@ TEST_P(CodegenTest, IntToLongOfLongToInt) {
Instruction::INT_TO_LONG | 2 << 8 | 4 << 12,
Instruction::RETURN_WIDE | 2 << 8);
- TestCodeLong(GetParam(), data, true, 1);
+ TestCodeLong(data, true, 1);
}
-TEST_P(CodegenTest, ReturnAdd1) {
+TEST_F(CodegenTest, ReturnAdd1) {
const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 3 << 12 | 0,
Instruction::CONST_4 | 4 << 12 | 1 << 8,
Instruction::ADD_INT, 1 << 8 | 0,
Instruction::RETURN);
- TestCode(GetParam(), data, true, 7);
+ TestCode(data, true, 7);
}
-TEST_P(CodegenTest, ReturnAdd2) {
+TEST_F(CodegenTest, ReturnAdd2) {
const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 3 << 12 | 0,
Instruction::CONST_4 | 4 << 12 | 1 << 8,
Instruction::ADD_INT_2ADDR | 1 << 12,
Instruction::RETURN);
- TestCode(GetParam(), data, true, 7);
+ TestCode(data, true, 7);
}
-TEST_P(CodegenTest, ReturnAdd3) {
+TEST_F(CodegenTest, ReturnAdd3) {
const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 4 << 12 | 0 << 8,
Instruction::ADD_INT_LIT8, 3 << 8 | 0,
Instruction::RETURN);
- TestCode(GetParam(), data, true, 7);
+ TestCode(data, true, 7);
}
-TEST_P(CodegenTest, ReturnAdd4) {
+TEST_F(CodegenTest, ReturnAdd4) {
const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 4 << 12 | 0 << 8,
Instruction::ADD_INT_LIT16, 3,
Instruction::RETURN);
- TestCode(GetParam(), data, true, 7);
-}
-
-TEST_P(CodegenTest, NonMaterializedCondition) {
- ArenaPool pool;
- ArenaAllocator allocator(&pool);
-
- HGraph* graph = CreateGraph(&allocator);
- HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
- graph->AddBlock(entry);
- graph->SetEntryBlock(entry);
- entry->AddInstruction(new (&allocator) HGoto());
-
- HBasicBlock* first_block = new (&allocator) HBasicBlock(graph);
- graph->AddBlock(first_block);
- entry->AddSuccessor(first_block);
- HIntConstant* constant0 = graph->GetIntConstant(0);
- HIntConstant* constant1 = graph->GetIntConstant(1);
- HEqual* equal = new (&allocator) HEqual(constant0, constant0);
- first_block->AddInstruction(equal);
- first_block->AddInstruction(new (&allocator) HIf(equal));
-
- HBasicBlock* then = new (&allocator) HBasicBlock(graph);
- HBasicBlock* else_ = new (&allocator) HBasicBlock(graph);
- HBasicBlock* exit = new (&allocator) HBasicBlock(graph);
-
- graph->AddBlock(then);
- graph->AddBlock(else_);
- graph->AddBlock(exit);
- first_block->AddSuccessor(then);
- first_block->AddSuccessor(else_);
- then->AddSuccessor(exit);
- else_->AddSuccessor(exit);
-
- exit->AddInstruction(new (&allocator) HExit());
- then->AddInstruction(new (&allocator) HReturn(constant0));
- else_->AddInstruction(new (&allocator) HReturn(constant1));
-
- ASSERT_TRUE(equal->NeedsMaterialization());
- graph->BuildDominatorTree();
- PrepareForRegisterAllocation(graph).Run();
- ASSERT_FALSE(equal->NeedsMaterialization());
-
- auto hook_before_codegen = [](HGraph* graph_in) {
- HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0];
- HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena());
- block->InsertInstructionBefore(move, block->GetLastInstruction());
- };
-
- RunCodeOptimized(GetParam(), graph, hook_before_codegen, true, 0);
+ TestCode(data, true, 7);
}
-TEST_P(CodegenTest, ReturnMulInt) {
+TEST_F(CodegenTest, ReturnMulInt) {
const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 3 << 12 | 0,
Instruction::CONST_4 | 4 << 12 | 1 << 8,
Instruction::MUL_INT, 1 << 8 | 0,
Instruction::RETURN);
- TestCode(GetParam(), data, true, 12);
+ TestCode(data, true, 12);
}
-TEST_P(CodegenTest, ReturnMulInt2addr) {
+TEST_F(CodegenTest, ReturnMulInt2addr) {
const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 3 << 12 | 0,
Instruction::CONST_4 | 4 << 12 | 1 << 8,
Instruction::MUL_INT_2ADDR | 1 << 12,
Instruction::RETURN);
- TestCode(GetParam(), data, true, 12);
+ TestCode(data, true, 12);
}
-TEST_P(CodegenTest, ReturnMulLong) {
+TEST_F(CodegenTest, ReturnMulLong) {
const uint16_t data[] = FOUR_REGISTERS_CODE_ITEM(
- Instruction::CONST_4 | 3 << 12 | 0,
- Instruction::CONST_4 | 0 << 12 | 1 << 8,
- Instruction::CONST_4 | 4 << 12 | 2 << 8,
- Instruction::CONST_4 | 0 << 12 | 3 << 8,
+ Instruction::CONST_WIDE | 0 << 8, 3, 0, 0, 0,
+ Instruction::CONST_WIDE | 2 << 8, 4, 0, 0, 0,
Instruction::MUL_LONG, 2 << 8 | 0,
Instruction::RETURN_WIDE);
- TestCodeLong(GetParam(), data, true, 12);
+ TestCodeLong(data, true, 12);
}
-TEST_P(CodegenTest, ReturnMulLong2addr) {
+TEST_F(CodegenTest, ReturnMulLong2addr) {
const uint16_t data[] = FOUR_REGISTERS_CODE_ITEM(
- Instruction::CONST_4 | 3 << 12 | 0 << 8,
- Instruction::CONST_4 | 0 << 12 | 1 << 8,
- Instruction::CONST_4 | 4 << 12 | 2 << 8,
- Instruction::CONST_4 | 0 << 12 | 3 << 8,
+ Instruction::CONST_WIDE | 0 << 8, 3, 0, 0, 0,
+ Instruction::CONST_WIDE | 2 << 8, 4, 0, 0, 0,
Instruction::MUL_LONG_2ADDR | 2 << 12,
Instruction::RETURN_WIDE);
- TestCodeLong(GetParam(), data, true, 12);
+ TestCodeLong(data, true, 12);
}
-TEST_P(CodegenTest, ReturnMulIntLit8) {
+TEST_F(CodegenTest, ReturnMulIntLit8) {
const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 4 << 12 | 0 << 8,
Instruction::MUL_INT_LIT8, 3 << 8 | 0,
Instruction::RETURN);
- TestCode(GetParam(), data, true, 12);
+ TestCode(data, true, 12);
}
-TEST_P(CodegenTest, ReturnMulIntLit16) {
+TEST_F(CodegenTest, ReturnMulIntLit16) {
const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 4 << 12 | 0 << 8,
Instruction::MUL_INT_LIT16, 3,
Instruction::RETURN);
- TestCode(GetParam(), data, true, 12);
+ TestCode(data, true, 12);
}
-TEST_P(CodegenTest, MaterializedCondition1) {
- // Check that condition are materialized correctly. A materialized condition
- // should yield `1` if it evaluated to true, and `0` otherwise.
- // We force the materialization of comparisons for different combinations of
- // inputs and check the results.
-
- int lhs[] = {1, 2, -1, 2, 0xabc};
- int rhs[] = {2, 1, 2, -1, 0xabc};
-
- for (size_t i = 0; i < arraysize(lhs); i++) {
+TEST_F(CodegenTest, NonMaterializedCondition) {
+ for (InstructionSet target_isa : GetTargetISAs()) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = CreateGraph(&allocator);
- HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph);
- graph->AddBlock(entry_block);
- graph->SetEntryBlock(entry_block);
- entry_block->AddInstruction(new (&allocator) HGoto());
- HBasicBlock* code_block = new (&allocator) HBasicBlock(graph);
- graph->AddBlock(code_block);
+ HGraph* graph = CreateGraph(&allocator);
+ HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(entry);
+ graph->SetEntryBlock(entry);
+ entry->AddInstruction(new (&allocator) HGoto());
+
+ HBasicBlock* first_block = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(first_block);
+ entry->AddSuccessor(first_block);
+ HIntConstant* constant0 = graph->GetIntConstant(0);
+ HIntConstant* constant1 = graph->GetIntConstant(1);
+ HEqual* equal = new (&allocator) HEqual(constant0, constant0);
+ first_block->AddInstruction(equal);
+ first_block->AddInstruction(new (&allocator) HIf(equal));
+
+ HBasicBlock* then_block = new (&allocator) HBasicBlock(graph);
+ HBasicBlock* else_block = new (&allocator) HBasicBlock(graph);
HBasicBlock* exit_block = new (&allocator) HBasicBlock(graph);
+ graph->SetExitBlock(exit_block);
+
+ graph->AddBlock(then_block);
+ graph->AddBlock(else_block);
graph->AddBlock(exit_block);
- exit_block->AddInstruction(new (&allocator) HExit());
+ first_block->AddSuccessor(then_block);
+ first_block->AddSuccessor(else_block);
+ then_block->AddSuccessor(exit_block);
+ else_block->AddSuccessor(exit_block);
- entry_block->AddSuccessor(code_block);
- code_block->AddSuccessor(exit_block);
- graph->SetExitBlock(exit_block);
+ exit_block->AddInstruction(new (&allocator) HExit());
+ then_block->AddInstruction(new (&allocator) HReturn(constant0));
+ else_block->AddInstruction(new (&allocator) HReturn(constant1));
- HIntConstant* cst_lhs = graph->GetIntConstant(lhs[i]);
- HIntConstant* cst_rhs = graph->GetIntConstant(rhs[i]);
- HLessThan cmp_lt(cst_lhs, cst_rhs);
- code_block->AddInstruction(&cmp_lt);
- HReturn ret(&cmp_lt);
- code_block->AddInstruction(&ret);
+ ASSERT_TRUE(equal->NeedsMaterialization());
+ TransformToSsa(graph);
+ PrepareForRegisterAllocation(graph).Run();
+ ASSERT_FALSE(equal->NeedsMaterialization());
auto hook_before_codegen = [](HGraph* graph_in) {
HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0];
@@ -758,93 +682,143 @@ TEST_P(CodegenTest, MaterializedCondition1) {
block->InsertInstructionBefore(move, block->GetLastInstruction());
};
- RunCodeOptimized(GetParam(), graph, hook_before_codegen, true, lhs[i] < rhs[i]);
+ RunCode(target_isa, graph, hook_before_codegen, true, 0);
}
}
-TEST_P(CodegenTest, MaterializedCondition2) {
- // Check that HIf correctly interprets a materialized condition.
- // We force the materialization of comparisons for different combinations of
- // inputs. An HIf takes the materialized combination as input and returns a
- // value that we verify.
-
- int lhs[] = {1, 2, -1, 2, 0xabc};
- int rhs[] = {2, 1, 2, -1, 0xabc};
-
-
- for (size_t i = 0; i < arraysize(lhs); i++) {
- ArenaPool pool;
- ArenaAllocator allocator(&pool);
- HGraph* graph = CreateGraph(&allocator);
-
- HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph);
- graph->AddBlock(entry_block);
- graph->SetEntryBlock(entry_block);
- entry_block->AddInstruction(new (&allocator) HGoto());
-
- HBasicBlock* if_block = new (&allocator) HBasicBlock(graph);
- graph->AddBlock(if_block);
- HBasicBlock* if_true_block = new (&allocator) HBasicBlock(graph);
- graph->AddBlock(if_true_block);
- HBasicBlock* if_false_block = new (&allocator) HBasicBlock(graph);
- graph->AddBlock(if_false_block);
- HBasicBlock* exit_block = new (&allocator) HBasicBlock(graph);
- graph->AddBlock(exit_block);
- exit_block->AddInstruction(new (&allocator) HExit());
-
- graph->SetEntryBlock(entry_block);
- entry_block->AddSuccessor(if_block);
- if_block->AddSuccessor(if_true_block);
- if_block->AddSuccessor(if_false_block);
- if_true_block->AddSuccessor(exit_block);
- if_false_block->AddSuccessor(exit_block);
- graph->SetExitBlock(exit_block);
-
- HIntConstant* cst_lhs = graph->GetIntConstant(lhs[i]);
- HIntConstant* cst_rhs = graph->GetIntConstant(rhs[i]);
- HLessThan cmp_lt(cst_lhs, cst_rhs);
- if_block->AddInstruction(&cmp_lt);
- // We insert a temporary to separate the HIf from the HLessThan and force
- // the materialization of the condition.
- HTemporary force_materialization(0);
- if_block->AddInstruction(&force_materialization);
- HIf if_lt(&cmp_lt);
- if_block->AddInstruction(&if_lt);
-
- HIntConstant* cst_lt = graph->GetIntConstant(1);
- HReturn ret_lt(cst_lt);
- if_true_block->AddInstruction(&ret_lt);
- HIntConstant* cst_ge = graph->GetIntConstant(0);
- HReturn ret_ge(cst_ge);
- if_false_block->AddInstruction(&ret_ge);
-
- auto hook_before_codegen = [](HGraph* graph_in) {
- HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0];
- HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena());
- block->InsertInstructionBefore(move, block->GetLastInstruction());
- };
+TEST_F(CodegenTest, MaterializedCondition1) {
+ for (InstructionSet target_isa : GetTargetISAs()) {
+ // Check that condition are materialized correctly. A materialized condition
+ // should yield `1` if it evaluated to true, and `0` otherwise.
+ // We force the materialization of comparisons for different combinations of
+
+ // inputs and check the results.
+
+ int lhs[] = {1, 2, -1, 2, 0xabc};
+ int rhs[] = {2, 1, 2, -1, 0xabc};
+
+ for (size_t i = 0; i < arraysize(lhs); i++) {
+ ArenaPool pool;
+ ArenaAllocator allocator(&pool);
+ HGraph* graph = CreateGraph(&allocator);
+
+ HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(entry_block);
+ graph->SetEntryBlock(entry_block);
+ entry_block->AddInstruction(new (&allocator) HGoto());
+ HBasicBlock* code_block = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(code_block);
+ HBasicBlock* exit_block = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(exit_block);
+ exit_block->AddInstruction(new (&allocator) HExit());
+
+ entry_block->AddSuccessor(code_block);
+ code_block->AddSuccessor(exit_block);
+ graph->SetExitBlock(exit_block);
+
+ HIntConstant* cst_lhs = graph->GetIntConstant(lhs[i]);
+ HIntConstant* cst_rhs = graph->GetIntConstant(rhs[i]);
+ HLessThan cmp_lt(cst_lhs, cst_rhs);
+ code_block->AddInstruction(&cmp_lt);
+ HReturn ret(&cmp_lt);
+ code_block->AddInstruction(&ret);
+
+ TransformToSsa(graph);
+ auto hook_before_codegen = [](HGraph* graph_in) {
+ HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0];
+ HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena());
+ block->InsertInstructionBefore(move, block->GetLastInstruction());
+ };
+ RunCode(target_isa, graph, hook_before_codegen, true, lhs[i] < rhs[i]);
+ }
+ }
+}
- RunCodeOptimized(GetParam(), graph, hook_before_codegen, true, lhs[i] < rhs[i]);
+TEST_F(CodegenTest, MaterializedCondition2) {
+ for (InstructionSet target_isa : GetTargetISAs()) {
+ // Check that HIf correctly interprets a materialized condition.
+ // We force the materialization of comparisons for different combinations of
+ // inputs. An HIf takes the materialized combination as input and returns a
+ // value that we verify.
+
+ int lhs[] = {1, 2, -1, 2, 0xabc};
+ int rhs[] = {2, 1, 2, -1, 0xabc};
+
+
+ for (size_t i = 0; i < arraysize(lhs); i++) {
+ ArenaPool pool;
+ ArenaAllocator allocator(&pool);
+ HGraph* graph = CreateGraph(&allocator);
+
+ HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(entry_block);
+ graph->SetEntryBlock(entry_block);
+ entry_block->AddInstruction(new (&allocator) HGoto());
+
+ HBasicBlock* if_block = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(if_block);
+ HBasicBlock* if_true_block = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(if_true_block);
+ HBasicBlock* if_false_block = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(if_false_block);
+ HBasicBlock* exit_block = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(exit_block);
+ exit_block->AddInstruction(new (&allocator) HExit());
+
+ graph->SetEntryBlock(entry_block);
+ entry_block->AddSuccessor(if_block);
+ if_block->AddSuccessor(if_true_block);
+ if_block->AddSuccessor(if_false_block);
+ if_true_block->AddSuccessor(exit_block);
+ if_false_block->AddSuccessor(exit_block);
+ graph->SetExitBlock(exit_block);
+
+ HIntConstant* cst_lhs = graph->GetIntConstant(lhs[i]);
+ HIntConstant* cst_rhs = graph->GetIntConstant(rhs[i]);
+ HLessThan cmp_lt(cst_lhs, cst_rhs);
+ if_block->AddInstruction(&cmp_lt);
+ // We insert a temporary to separate the HIf from the HLessThan and force
+ // the materialization of the condition.
+ HTemporary force_materialization(0);
+ if_block->AddInstruction(&force_materialization);
+ HIf if_lt(&cmp_lt);
+ if_block->AddInstruction(&if_lt);
+
+ HIntConstant* cst_lt = graph->GetIntConstant(1);
+ HReturn ret_lt(cst_lt);
+ if_true_block->AddInstruction(&ret_lt);
+ HIntConstant* cst_ge = graph->GetIntConstant(0);
+ HReturn ret_ge(cst_ge);
+ if_false_block->AddInstruction(&ret_ge);
+
+ TransformToSsa(graph);
+ auto hook_before_codegen = [](HGraph* graph_in) {
+ HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0];
+ HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena());
+ block->InsertInstructionBefore(move, block->GetLastInstruction());
+ };
+ RunCode(target_isa, graph, hook_before_codegen, true, lhs[i] < rhs[i]);
+ }
}
}
-TEST_P(CodegenTest, ReturnDivIntLit8) {
+TEST_F(CodegenTest, ReturnDivIntLit8) {
const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 4 << 12 | 0 << 8,
Instruction::DIV_INT_LIT8, 3 << 8 | 0,
Instruction::RETURN);
- TestCode(GetParam(), data, true, 1);
+ TestCode(data, true, 1);
}
-TEST_P(CodegenTest, ReturnDivInt2Addr) {
+TEST_F(CodegenTest, ReturnDivInt2Addr) {
const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
Instruction::CONST_4 | 4 << 12 | 0,
Instruction::CONST_4 | 2 << 12 | 1 << 8,
Instruction::DIV_INT_2ADDR | 1 << 12,
Instruction::RETURN);
- TestCode(GetParam(), data, true, 2);
+ TestCode(data, true, 2);
}
// Helper method.
@@ -933,80 +907,55 @@ static void TestComparison(IfCondition condition,
block->AddInstruction(comparison);
block->AddInstruction(new (&allocator) HReturn(comparison));
- auto hook_before_codegen = [](HGraph*) {
- };
- RunCodeOptimized(target_isa, graph, hook_before_codegen, true, expected_result);
-}
-
-TEST_P(CodegenTest, ComparisonsInt) {
- const InstructionSet target_isa = GetParam();
- for (int64_t i = -1; i <= 1; i++) {
- for (int64_t j = -1; j <= 1; j++) {
- TestComparison(kCondEQ, i, j, Primitive::kPrimInt, target_isa);
- TestComparison(kCondNE, i, j, Primitive::kPrimInt, target_isa);
- TestComparison(kCondLT, i, j, Primitive::kPrimInt, target_isa);
- TestComparison(kCondLE, i, j, Primitive::kPrimInt, target_isa);
- TestComparison(kCondGT, i, j, Primitive::kPrimInt, target_isa);
- TestComparison(kCondGE, i, j, Primitive::kPrimInt, target_isa);
- TestComparison(kCondB, i, j, Primitive::kPrimInt, target_isa);
- TestComparison(kCondBE, i, j, Primitive::kPrimInt, target_isa);
- TestComparison(kCondA, i, j, Primitive::kPrimInt, target_isa);
- TestComparison(kCondAE, i, j, Primitive::kPrimInt, target_isa);
+ TransformToSsa(graph);
+ RunCode(target_isa, graph, [](HGraph*) {}, true, expected_result);
+}
+
+TEST_F(CodegenTest, ComparisonsInt) {
+ for (InstructionSet target_isa : GetTargetISAs()) {
+ for (int64_t i = -1; i <= 1; i++) {
+ for (int64_t j = -1; j <= 1; j++) {
+ TestComparison(kCondEQ, i, j, Primitive::kPrimInt, target_isa);
+ TestComparison(kCondNE, i, j, Primitive::kPrimInt, target_isa);
+ TestComparison(kCondLT, i, j, Primitive::kPrimInt, target_isa);
+ TestComparison(kCondLE, i, j, Primitive::kPrimInt, target_isa);
+ TestComparison(kCondGT, i, j, Primitive::kPrimInt, target_isa);
+ TestComparison(kCondGE, i, j, Primitive::kPrimInt, target_isa);
+ TestComparison(kCondB, i, j, Primitive::kPrimInt, target_isa);
+ TestComparison(kCondBE, i, j, Primitive::kPrimInt, target_isa);
+ TestComparison(kCondA, i, j, Primitive::kPrimInt, target_isa);
+ TestComparison(kCondAE, i, j, Primitive::kPrimInt, target_isa);
+ }
}
}
}
-TEST_P(CodegenTest, ComparisonsLong) {
+TEST_F(CodegenTest, ComparisonsLong) {
// TODO: make MIPS work for long
if (kRuntimeISA == kMips || kRuntimeISA == kMips64) {
return;
}
- const InstructionSet target_isa = GetParam();
- if (target_isa == kMips || target_isa == kMips64) {
- return;
- }
-
- for (int64_t i = -1; i <= 1; i++) {
- for (int64_t j = -1; j <= 1; j++) {
- TestComparison(kCondEQ, i, j, Primitive::kPrimLong, target_isa);
- TestComparison(kCondNE, i, j, Primitive::kPrimLong, target_isa);
- TestComparison(kCondLT, i, j, Primitive::kPrimLong, target_isa);
- TestComparison(kCondLE, i, j, Primitive::kPrimLong, target_isa);
- TestComparison(kCondGT, i, j, Primitive::kPrimLong, target_isa);
- TestComparison(kCondGE, i, j, Primitive::kPrimLong, target_isa);
- TestComparison(kCondB, i, j, Primitive::kPrimLong, target_isa);
- TestComparison(kCondBE, i, j, Primitive::kPrimLong, target_isa);
- TestComparison(kCondA, i, j, Primitive::kPrimLong, target_isa);
- TestComparison(kCondAE, i, j, Primitive::kPrimLong, target_isa);
+ for (InstructionSet target_isa : GetTargetISAs()) {
+ if (target_isa == kMips || target_isa == kMips64) {
+ continue;
}
- }
-}
-static ::std::vector<InstructionSet> GetTargetISAs() {
- ::std::vector<InstructionSet> v;
- // Add all ISAs that are executable on hardware or on simulator.
- const ::std::vector<InstructionSet> executable_isa_candidates = {
- kArm,
- kArm64,
- kThumb2,
- kX86,
- kX86_64,
- kMips,
- kMips64
- };
-
- for (auto target_isa : executable_isa_candidates) {
- if (CanExecute(target_isa)) {
- v.push_back(target_isa);
+ for (int64_t i = -1; i <= 1; i++) {
+ for (int64_t j = -1; j <= 1; j++) {
+ TestComparison(kCondEQ, i, j, Primitive::kPrimLong, target_isa);
+ TestComparison(kCondNE, i, j, Primitive::kPrimLong, target_isa);
+ TestComparison(kCondLT, i, j, Primitive::kPrimLong, target_isa);
+ TestComparison(kCondLE, i, j, Primitive::kPrimLong, target_isa);
+ TestComparison(kCondGT, i, j, Primitive::kPrimLong, target_isa);
+ TestComparison(kCondGE, i, j, Primitive::kPrimLong, target_isa);
+ TestComparison(kCondB, i, j, Primitive::kPrimLong, target_isa);
+ TestComparison(kCondBE, i, j, Primitive::kPrimLong, target_isa);
+ TestComparison(kCondA, i, j, Primitive::kPrimLong, target_isa);
+ TestComparison(kCondAE, i, j, Primitive::kPrimLong, target_isa);
+ }
}
}
-
- return v;
}
-INSTANTIATE_TEST_CASE_P(MultipleTargets,
- CodegenTest,
- ::testing::ValuesIn(GetTargetISAs()));
-
} // namespace art
diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc
index 86a695b152..e170e37bdd 100644
--- a/compiler/optimizing/dead_code_elimination.cc
+++ b/compiler/optimizing/dead_code_elimination.cc
@@ -89,15 +89,18 @@ void HDeadCodeElimination::MaybeRecordDeadBlock(HBasicBlock* block) {
}
void HDeadCodeElimination::RemoveDeadBlocks() {
+ if (graph_->HasIrreducibleLoops()) {
+ // Do not eliminate dead blocks if the graph has irreducible loops. We could
+ // support it, but that would require changes in our loop representation to handle
+ // multiple entry points. We decided it was not worth the complexity.
+ return;
+ }
// Classify blocks as reachable/unreachable.
ArenaAllocator* allocator = graph_->GetArena();
ArenaBitVector live_blocks(allocator, graph_->GetBlocks().size(), false);
MarkReachableBlocks(graph_, &live_blocks);
bool removed_one_or_more_blocks = false;
- // If the graph has irreducible loops we need to reset all graph analysis we have done
- // before: the irreducible loop can be turned into a reducible one.
- // For simplicity, we do the full computation regardless of the type of the loops.
bool rerun_dominance_and_loop_analysis = false;
// Remove all dead blocks. Iterate in post order because removal needs the
@@ -105,9 +108,6 @@ void HDeadCodeElimination::RemoveDeadBlocks() {
// inside out.
for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
HBasicBlock* block = it.Current();
- if (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible()) {
- rerun_dominance_and_loop_analysis = true;
- }
int id = block->GetBlockId();
if (!live_blocks.IsBitSet(id)) {
MaybeRecordDeadBlock(block);
diff --git a/compiler/optimizing/dominator_test.cc b/compiler/optimizing/dominator_test.cc
index 91e4a997fd..feb8b2092a 100644
--- a/compiler/optimizing/dominator_test.cc
+++ b/compiler/optimizing/dominator_test.cc
@@ -133,8 +133,9 @@ TEST(OptimizerTest, CFG4) {
const uint32_t dominators[] = {
kInvalidBlockId,
- 0,
- kInvalidBlockId
+ 3,
+ kInvalidBlockId,
+ 0
};
TestCode(data1, dominators, sizeof(dominators) / sizeof(int));
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index 9439ba0c8d..31136772c7 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -484,6 +484,18 @@ void SSAChecker::CheckLoop(HBasicBlock* loop_header) {
loop_information->GetPreHeader()->GetSuccessors().size()));
}
+ if (loop_information->GetSuspendCheck() == nullptr) {
+ AddError(StringPrintf(
+ "Loop with header %d does not have a suspend check.",
+ loop_header->GetBlockId()));
+ }
+
+ if (loop_information->GetSuspendCheck() != loop_header->GetFirstInstructionDisregardMoves()) {
+ AddError(StringPrintf(
+ "Loop header %d does not have the loop suspend check as the first instruction.",
+ loop_header->GetBlockId()));
+ }
+
// Ensure the loop header has only one incoming branch and the remaining
// predecessors are back edges.
size_t num_preds = loop_header->GetPredecessors().size();
@@ -589,6 +601,14 @@ void SSAChecker::VisitInstruction(HInstruction* instruction) {
}
}
+ if (instruction->NeedsEnvironment() && !instruction->HasEnvironment()) {
+ AddError(StringPrintf("Instruction %s:%d in block %d requires an environment "
+ "but does not have one.",
+ instruction->DebugName(),
+ instruction->GetId(),
+ current_block_->GetBlockId()));
+ }
+
// Ensure an instruction having an environment is dominated by the
// instructions contained in the environment.
for (HEnvironment* environment = instruction->GetEnvironment();
diff --git a/compiler/optimizing/graph_test.cc b/compiler/optimizing/graph_test.cc
index d4b9b71952..d5305646a8 100644
--- a/compiler/optimizing/graph_test.cc
+++ b/compiler/optimizing/graph_test.cc
@@ -164,7 +164,7 @@ TEST(GraphTest, IfSuccessorMultipleBackEdges1) {
// Ensure there is only one back edge.
ASSERT_EQ(if_block->GetPredecessors().size(), 2u);
- ASSERT_EQ(if_block->GetPredecessors()[0], entry_block);
+ ASSERT_EQ(if_block->GetPredecessors()[0], entry_block->GetSingleSuccessor());
ASSERT_NE(if_block->GetPredecessors()[1], if_block);
// Ensure the new block is the back edge.
@@ -199,7 +199,7 @@ TEST(GraphTest, IfSuccessorMultipleBackEdges2) {
// Ensure there is only one back edge.
ASSERT_EQ(if_block->GetPredecessors().size(), 2u);
- ASSERT_EQ(if_block->GetPredecessors()[0], entry_block);
+ ASSERT_EQ(if_block->GetPredecessors()[0], entry_block->GetSingleSuccessor());
ASSERT_NE(if_block->GetPredecessors()[1], if_block);
// Ensure the new block is the back edge.
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 293282edbb..2e79df1b84 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -356,12 +356,12 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction,
compare, invoke_instruction->GetDexPc());
// TODO: Extend reference type propagation to understand the guard.
if (cursor != nullptr) {
- bb_cursor->InsertInstructionAfter(load_class, cursor);
+ bb_cursor->InsertInstructionAfter(field_get, cursor);
} else {
- bb_cursor->InsertInstructionBefore(load_class, bb_cursor->GetFirstInstruction());
+ bb_cursor->InsertInstructionBefore(field_get, bb_cursor->GetFirstInstruction());
}
- bb_cursor->InsertInstructionAfter(field_get, load_class);
- bb_cursor->InsertInstructionAfter(compare, field_get);
+ bb_cursor->InsertInstructionAfter(load_class, field_get);
+ bb_cursor->InsertInstructionAfter(compare, load_class);
bb_cursor->InsertInstructionAfter(deoptimize, compare);
deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
@@ -419,7 +419,10 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, ArtMethod* method, bool do
size_t inline_max_code_units = compiler_driver_->GetCompilerOptions().GetInlineMaxCodeUnits();
if (code_item->insns_size_in_code_units_ > inline_max_code_units) {
VLOG(compiler) << "Method " << PrettyMethod(method)
- << " is too big to inline";
+ << " is too big to inline: "
+ << code_item->insns_size_in_code_units_
+ << " > "
+ << inline_max_code_units;
return false;
}
@@ -639,9 +642,12 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method,
for (; !it.Done(); it.Advance()) {
HBasicBlock* block = it.Current();
- if (block->IsLoopHeader()) {
+
+ if (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible()) {
+ // Don't inline methods with irreducible loops, they could prevent some
+ // optimizations to run.
VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
- << " could not be inlined because it contains a loop";
+ << " could not be inlined because it contains an irreducible loop";
return false;
}
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index c6da9a3f5e..5caf077858 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -176,6 +176,16 @@ static Intrinsics GetIntrinsic(InlineMethod method) {
}
// Misc data processing.
+ case kIntrinsicBitCount:
+ switch (GetType(method.d.data, true)) {
+ case Primitive::kPrimInt:
+ return Intrinsics::kIntegerBitCount;
+ case Primitive::kPrimLong:
+ return Intrinsics::kLongBitCount;
+ default:
+ LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
+ UNREACHABLE();
+ }
case kIntrinsicNumberOfLeadingZeros:
switch (GetType(method.d.data, true)) {
case Primitive::kPrimInt:
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index 9f50d1814e..3bf3f7ffae 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h
@@ -85,9 +85,9 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
InvokeDexCallingConventionVisitor* calling_convention_visitor) {
if (kIsDebugBuild && invoke->IsInvokeStaticOrDirect()) {
HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect();
- // When we do not run baseline, explicit clinit checks triggered by static
- // invokes must have been pruned by art::PrepareForRegisterAllocation.
- DCHECK(codegen->IsBaseline() || !invoke_static_or_direct->IsStaticWithExplicitClinitCheck());
+ // Explicit clinit checks triggered by static invokes must have been
+ // pruned by art::PrepareForRegisterAllocation.
+ DCHECK(!invoke_static_or_direct->IsStaticWithExplicitClinitCheck());
}
if (invoke->GetNumberOfArguments() == 0) {
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index b1fbf28204..e72f927e44 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -1577,10 +1577,12 @@ void IntrinsicLocationsBuilderARM::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSE
void IntrinsicCodeGeneratorARM::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
}
+UNIMPLEMENTED_INTRINSIC(IntegerBitCount)
UNIMPLEMENTED_INTRINSIC(IntegerReverse)
UNIMPLEMENTED_INTRINSIC(IntegerReverseBytes)
UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
+UNIMPLEMENTED_INTRINSIC(LongBitCount)
UNIMPLEMENTED_INTRINSIC(LongReverse)
UNIMPLEMENTED_INTRINSIC(LongReverseBytes)
UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 81cab86c83..8cf2d4f393 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -752,21 +752,33 @@ static void GenUnsafeGet(HInvoke* invoke,
Register trg = RegisterFrom(trg_loc, type);
bool use_acquire_release = codegen->GetInstructionSetFeatures().PreferAcquireRelease();
- MemOperand mem_op(base.X(), offset);
- if (is_volatile) {
- if (use_acquire_release) {
- codegen->LoadAcquire(invoke, trg, mem_op);
- } else {
- codegen->Load(type, trg, mem_op);
+ if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // UnsafeGetObject/UnsafeGetObjectVolatile with Baker's read barrier case.
+ UseScratchRegisterScope temps(masm);
+ Register temp = temps.AcquireW();
+ codegen->GenerateArrayLoadWithBakerReadBarrier(
+ invoke, trg_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false);
+ if (is_volatile && !use_acquire_release) {
__ Dmb(InnerShareable, BarrierReads);
}
} else {
- codegen->Load(type, trg, mem_op);
- }
+ // Other cases.
+ MemOperand mem_op(base.X(), offset);
+ if (is_volatile) {
+ if (use_acquire_release) {
+ codegen->LoadAcquire(invoke, trg, mem_op, /* needs_null_check */ true);
+ } else {
+ codegen->Load(type, trg, mem_op);
+ __ Dmb(InnerShareable, BarrierReads);
+ }
+ } else {
+ codegen->Load(type, trg, mem_op);
+ }
- if (type == Primitive::kPrimNot) {
- DCHECK(trg.IsW());
- codegen->MaybeGenerateReadBarrier(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc);
+ if (type == Primitive::kPrimNot) {
+ DCHECK(trg.IsW());
+ codegen->MaybeGenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc);
+ }
}
}
@@ -1026,10 +1038,15 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat
vixl::Label loop_head, exit_loop;
if (use_acquire_release) {
__ Bind(&loop_head);
- __ Ldaxr(tmp_value, MemOperand(tmp_ptr));
- // TODO: Do we need a read barrier here when `type == Primitive::kPrimNot`?
+ // TODO: When `type == Primitive::kPrimNot`, add a read barrier for
+ // the reference stored in the object before attempting the CAS,
+ // similar to the one in the art::Unsafe_compareAndSwapObject JNI
+ // implementation.
+ //
// Note that this code is not (yet) used when read barriers are
// enabled (see IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject).
+ DCHECK(!(type == Primitive::kPrimNot && kEmitCompilerReadBarrier));
+ __ Ldaxr(tmp_value, MemOperand(tmp_ptr));
__ Cmp(tmp_value, expected);
__ B(&exit_loop, ne);
__ Stlxr(tmp_32, value, MemOperand(tmp_ptr));
@@ -1447,8 +1464,10 @@ void IntrinsicLocationsBuilderARM64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNU
void IntrinsicCodeGeneratorARM64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
}
+UNIMPLEMENTED_INTRINSIC(IntegerBitCount)
UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
+UNIMPLEMENTED_INTRINSIC(LongBitCount)
UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
UNIMPLEMENTED_INTRINSIC(LongRotateRight)
UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
diff --git a/compiler/optimizing/intrinsics_list.h b/compiler/optimizing/intrinsics_list.h
index 2e87546282..ea380347da 100644
--- a/compiler/optimizing/intrinsics_list.h
+++ b/compiler/optimizing/intrinsics_list.h
@@ -28,12 +28,14 @@
V(FloatIntBitsToFloat, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
V(IntegerReverse, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
V(IntegerReverseBytes, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+ V(IntegerBitCount, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
V(IntegerNumberOfLeadingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
V(IntegerNumberOfTrailingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
V(IntegerRotateRight, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
V(IntegerRotateLeft, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
V(LongReverse, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
V(LongReverseBytes, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+ V(LongBitCount, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
V(LongNumberOfLeadingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
V(LongNumberOfTrailingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
V(LongRotateRight, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index bc126a2716..81112b1a34 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -935,6 +935,9 @@ void IntrinsicLocationsBuilderMIPS::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUS
void IntrinsicCodeGeneratorMIPS::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
}
+UNIMPLEMENTED_INTRINSIC(IntegerBitCount)
+UNIMPLEMENTED_INTRINSIC(LongBitCount)
+
UNIMPLEMENTED_INTRINSIC(MathAbsDouble)
UNIMPLEMENTED_INTRINSIC(MathAbsFloat)
UNIMPLEMENTED_INTRINSIC(MathAbsInt)
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 8b45ea7c4f..ac969e39fa 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -1724,6 +1724,9 @@ void IntrinsicLocationsBuilderMIPS64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UN
void IntrinsicCodeGeneratorMIPS64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
}
+UNIMPLEMENTED_INTRINSIC(IntegerBitCount)
+UNIMPLEMENTED_INTRINSIC(LongBitCount)
+
UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
UNIMPLEMENTED_INTRINSIC(MathRoundFloat)
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 677f2e9c81..e48bed59d7 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -2303,6 +2303,81 @@ void IntrinsicCodeGeneratorX86::VisitLongReverse(HInvoke* invoke) {
SwapBits(reg_high, temp, 4, 0x0f0f0f0f, assembler);
}
+static void CreateBitCountLocations(
+ ArenaAllocator* arena, CodeGeneratorX86* codegen, HInvoke* invoke, bool is_long) {
+ if (!codegen->GetInstructionSetFeatures().HasPopCnt()) {
+ // Do nothing if there is no popcnt support. This results in generating
+ // a call for the intrinsic rather than direct code.
+ return;
+ }
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ if (is_long) {
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ } else {
+ locations->SetInAt(0, Location::Any());
+ }
+ locations->SetOut(Location::RequiresRegister());
+}
+
+static void GenBitCount(X86Assembler* assembler, HInvoke* invoke, bool is_long) {
+ LocationSummary* locations = invoke->GetLocations();
+ Location src = locations->InAt(0);
+ Register out = locations->Out().AsRegister<Register>();
+
+ if (invoke->InputAt(0)->IsConstant()) {
+ // Evaluate this at compile time.
+ int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
+ value = is_long
+ ? POPCOUNT(static_cast<uint64_t>(value))
+ : POPCOUNT(static_cast<uint32_t>(value));
+ if (value == 0) {
+ __ xorl(out, out);
+ } else {
+ __ movl(out, Immediate(value));
+ }
+ return;
+ }
+
+ // Handle the non-constant cases.
+ if (!is_long) {
+ if (src.IsRegister()) {
+ __ popcntl(out, src.AsRegister<Register>());
+ } else {
+ DCHECK(src.IsStackSlot());
+ __ popcntl(out, Address(ESP, src.GetStackIndex()));
+ }
+ return;
+ }
+
+ // The 64-bit case needs to worry about both parts of the register.
+ DCHECK(src.IsRegisterPair());
+ Register src_lo = src.AsRegisterPairLow<Register>();
+ Register src_hi = src.AsRegisterPairHigh<Register>();
+ Register temp = locations->GetTemp(0).AsRegister<Register>();
+ __ popcntl(temp, src_lo);
+ __ popcntl(out, src_hi);
+ __ addl(out, temp);
+}
+
+void IntrinsicLocationsBuilderX86::VisitIntegerBitCount(HInvoke* invoke) {
+ CreateBitCountLocations(arena_, codegen_, invoke, /* is_long */ false);
+}
+
+void IntrinsicCodeGeneratorX86::VisitIntegerBitCount(HInvoke* invoke) {
+ GenBitCount(GetAssembler(), invoke, /* is_long */ false);
+}
+
+void IntrinsicLocationsBuilderX86::VisitLongBitCount(HInvoke* invoke) {
+ CreateBitCountLocations(arena_, codegen_, invoke, /* is_long */ true);
+}
+
+void IntrinsicCodeGeneratorX86::VisitLongBitCount(HInvoke* invoke) {
+ GenBitCount(GetAssembler(), invoke, /* is_long */ true);
+}
+
static void CreateLeadingZeroLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_long) {
LocationSummary* locations = new (arena) LocationSummary(invoke,
LocationSummary::kNoCall,
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 690cf3d413..23a628f243 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -2368,6 +2368,70 @@ void IntrinsicCodeGeneratorX86_64::VisitLongReverse(HInvoke* invoke) {
SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler);
}
+static void CreateBitCountLocations(
+ ArenaAllocator* arena, CodeGeneratorX86_64* codegen, HInvoke* invoke) {
+ if (!codegen->GetInstructionSetFeatures().HasPopCnt()) {
+ // Do nothing if there is no popcnt support. This results in generating
+ // a call for the intrinsic rather than direct code.
+ return;
+ }
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::Any());
+ locations->SetOut(Location::RequiresRegister());
+}
+
+static void GenBitCount(X86_64Assembler* assembler, HInvoke* invoke, bool is_long) {
+ LocationSummary* locations = invoke->GetLocations();
+ Location src = locations->InAt(0);
+ CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+
+ if (invoke->InputAt(0)->IsConstant()) {
+ // Evaluate this at compile time.
+ int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
+ value = is_long
+ ? POPCOUNT(static_cast<uint64_t>(value))
+ : POPCOUNT(static_cast<uint32_t>(value));
+ if (value == 0) {
+ __ xorl(out, out);
+ } else {
+ __ movl(out, Immediate(value));
+ }
+ return;
+ }
+
+ if (src.IsRegister()) {
+ if (is_long) {
+ __ popcntq(out, src.AsRegister<CpuRegister>());
+ } else {
+ __ popcntl(out, src.AsRegister<CpuRegister>());
+ }
+ } else if (is_long) {
+ DCHECK(src.IsDoubleStackSlot());
+ __ popcntq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
+ } else {
+ DCHECK(src.IsStackSlot());
+ __ popcntl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
+ }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitIntegerBitCount(HInvoke* invoke) {
+ CreateBitCountLocations(arena_, codegen_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitIntegerBitCount(HInvoke* invoke) {
+ GenBitCount(GetAssembler(), invoke, /* is_long */ false);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitLongBitCount(HInvoke* invoke) {
+ CreateBitCountLocations(arena_, codegen_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitLongBitCount(HInvoke* invoke) {
+ GenBitCount(GetAssembler(), invoke, /* is_long */ true);
+}
+
static void CreateLeadingZeroLocations(ArenaAllocator* arena, HInvoke* invoke) {
LocationSummary* locations = new (arena) LocationSummary(invoke,
LocationSummary::kNoCall,
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 854d92a409..adf8734214 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -167,11 +167,7 @@ void HGraph::ClearDominanceInformation() {
void HGraph::ClearLoopInformation() {
SetHasIrreducibleLoops(false);
for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
- HBasicBlock* current = it.Current();
- if (current->IsLoopHeader()) {
- current->RemoveInstruction(current->GetLoopInformation()->GetSuspendCheck());
- }
- current->SetLoopInformation(nullptr);
+ it.Current()->SetLoopInformation(nullptr);
}
}
@@ -180,6 +176,14 @@ void HBasicBlock::ClearDominanceInformation() {
dominator_ = nullptr;
}
+HInstruction* HBasicBlock::GetFirstInstructionDisregardMoves() const {
+ HInstruction* instruction = GetFirstInstruction();
+ while (instruction->IsParallelMove()) {
+ instruction = instruction->GetNext();
+ }
+ return instruction;
+}
+
void HGraph::ComputeDominanceInformation() {
DCHECK(reverse_post_order_.empty());
reverse_post_order_.reserve(blocks_.size());
@@ -284,9 +288,10 @@ void HGraph::SimplifyLoop(HBasicBlock* header) {
// Make sure the loop has only one pre header. This simplifies SSA building by having
// to just look at the pre header to know which locals are initialized at entry of the
- // loop.
+ // loop. Also, don't allow the entry block to be a pre header: this simplifies inlining
+ // this graph.
size_t number_of_incomings = header->GetPredecessors().size() - info->NumberOfBackEdges();
- if (number_of_incomings != 1) {
+ if (number_of_incomings != 1 || (GetEntryBlock()->GetSingleSuccessor() == header)) {
HBasicBlock* pre_header = new (arena_) HBasicBlock(this, header->GetDexPc());
AddBlock(pre_header);
pre_header->AddInstruction(new (arena_) HGoto(header->GetDexPc()));
@@ -457,6 +462,10 @@ void HGraph::SimplifyCFG() {
}
if (block->IsLoopHeader()) {
SimplifyLoop(block);
+ } else if (!block->IsEntryBlock() && block->GetFirstInstruction()->IsSuspendCheck()) {
+ // We are being called by the dead code elimiation pass, and what used to be
+ // a loop got dismantled. Just remove the suspend check.
+ block->RemoveInstruction(block->GetFirstInstruction());
}
}
}
@@ -1829,6 +1838,7 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
DCHECK(GetBlocks()[0]->IsEntryBlock());
DCHECK(GetBlocks()[2]->IsExitBlock());
DCHECK(!body->IsExitBlock());
+ DCHECK(!body->IsInLoop());
HInstruction* last = body->GetLastInstruction();
invoke->GetBlock()->instructions_.AddAfter(invoke, body->GetInstructions());
@@ -1887,7 +1897,7 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
// Update the meta information surrounding blocks:
// (1) the graph they are now in,
// (2) the reverse post order of that graph,
- // (3) the potential loop information they are now in,
+ // (3) their potential loop information, inner and outer,
// (4) try block membership.
// Note that we do not need to update catch phi inputs because they
// correspond to the register file of the outer method which the inlinee
@@ -1916,15 +1926,24 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
HBasicBlock* current = it.Current();
if (current != exit_block_ && current != entry_block_ && current != first) {
- DCHECK(!current->IsInLoop());
DCHECK(current->GetTryCatchInformation() == nullptr);
DCHECK(current->GetGraph() == this);
current->SetGraph(outer_graph);
outer_graph->AddBlock(current);
outer_graph->reverse_post_order_[++index_of_at] = current;
- if (loop_info != nullptr) {
+ if (!current->IsInLoop()) {
current->SetLoopInformation(loop_info);
- for (HLoopInformationOutwardIterator loop_it(*at); !loop_it.Done(); loop_it.Advance()) {
+ } else if (current->IsLoopHeader()) {
+ // Clear the information of which blocks are contained in that loop. Since the
+ // information is stored as a bit vector based on block ids, we have to update
+ // it, as those block ids were specific to the callee graph and we are now adding
+ // these blocks to the caller graph.
+ current->GetLoopInformation()->ClearAllBlocks();
+ }
+ if (current->IsInLoop()) {
+ for (HLoopInformationOutwardIterator loop_it(*current);
+ !loop_it.Done();
+ loop_it.Advance()) {
loop_it.Current()->Add(current);
}
}
@@ -1937,7 +1956,9 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
outer_graph->AddBlock(to);
outer_graph->reverse_post_order_[++index_of_at] = to;
if (loop_info != nullptr) {
- to->SetLoopInformation(loop_info);
+ if (!to->IsInLoop()) {
+ to->SetLoopInformation(loop_info);
+ }
for (HLoopInformationOutwardIterator loop_it(*at); !loop_it.Done(); loop_it.Advance()) {
loop_it.Current()->Add(to);
}
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 859d570b29..5246fd1f05 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -689,6 +689,10 @@ class HLoopInformation : public ArenaObject<kArenaAllocLoopInfo> {
void Add(HBasicBlock* block);
void Remove(HBasicBlock* block);
+ void ClearAllBlocks() {
+ blocks_.ClearAllBits();
+ }
+
private:
// Internal recursive implementation of `Populate`.
void PopulateRecursive(HBasicBlock* block);
@@ -860,6 +864,8 @@ class HBasicBlock : public ArenaObject<kArenaAllocBasicBlock> {
HInstruction* GetLastPhi() const { return phis_.last_instruction_; }
const HInstructionList& GetPhis() const { return phis_; }
+ HInstruction* GetFirstInstructionDisregardMoves() const;
+
void AddSuccessor(HBasicBlock* block) {
successors_.push_back(block);
block->predecessors_.push_back(this);
@@ -3687,19 +3693,13 @@ class HInvokeStaticOrDirect : public HInvoke {
DCHECK(!IsStaticWithExplicitClinitCheck());
}
- HNewInstance* GetThisArgumentOfStringInit() const {
- DCHECK(IsStringInit());
- size_t index = InputCount() - 1;
- DCHECK(InputAt(index)->IsNewInstance());
- return InputAt(index)->AsNewInstance();
- }
-
- void RemoveThisArgumentOfStringInit() {
+ HInstruction* GetAndRemoveThisArgumentOfStringInit() {
DCHECK(IsStringInit());
size_t index = InputCount() - 1;
- DCHECK(InputAt(index)->IsNewInstance());
+ HInstruction* input = InputAt(index);
RemoveAsUserOfInput(index);
inputs_.pop_back();
+ return input;
}
// Is this a call to a static method whose declaring class has an
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index bb840eabdd..fffd00535c 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -127,7 +127,7 @@ class PassObserver : public ValueObject {
timing_logger_enabled_(compiler_driver->GetDumpPasses()),
timing_logger_(timing_logger_enabled_ ? GetMethodName() : "", true, true),
disasm_info_(graph->GetArena()),
- visualizer_enabled_(!compiler_driver->GetDumpCfgFileName().empty()),
+ visualizer_enabled_(!compiler_driver->GetCompilerOptions().GetDumpCfgFileName().empty()),
visualizer_(visualizer_output, graph, *codegen),
graph_in_bad_state_(false) {
if (timing_logger_enabled_ || visualizer_enabled_) {
@@ -305,30 +305,19 @@ class OptimizingCompiler FINAL : public Compiler {
SHARED_REQUIRES(Locks::mutator_lock_);
private:
- // Whether we should run any optimization or register allocation. If false, will
- // just run the code generation after the graph was built.
- const bool run_optimizations_;
-
// Create a 'CompiledMethod' for an optimized graph.
- CompiledMethod* EmitOptimized(ArenaAllocator* arena,
- CodeVectorAllocator* code_allocator,
- CodeGenerator* codegen,
- CompilerDriver* driver) const;
-
- // Create a 'CompiledMethod' for a non-optimized graph.
- CompiledMethod* EmitBaseline(ArenaAllocator* arena,
- CodeVectorAllocator* code_allocator,
- CodeGenerator* codegen,
- CompilerDriver* driver) const;
+ CompiledMethod* Emit(ArenaAllocator* arena,
+ CodeVectorAllocator* code_allocator,
+ CodeGenerator* codegen,
+ CompilerDriver* driver) const;
// Try compiling a method and return the code generator used for
// compiling it.
// This method:
// 1) Builds the graph. Returns null if it failed to build it.
- // 2) If `run_optimizations_` is set:
- // 2.1) Transform the graph to SSA. Returns null if it failed.
- // 2.2) Run optimizations on the graph, including register allocator.
- // 3) Generate code with the `code_allocator` provided.
+ // 2) Transforms the graph to SSA. Returns null if it failed.
+ // 3) Runs optimizations on the graph, including register allocator.
+ // 4) Generates code with the `code_allocator` provided.
CodeGenerator* TryCompile(ArenaAllocator* arena,
CodeVectorAllocator* code_allocator,
const DexFile::CodeItem* code_item,
@@ -350,21 +339,19 @@ class OptimizingCompiler FINAL : public Compiler {
static const int kMaximumCompilationTimeBeforeWarning = 100; /* ms */
OptimizingCompiler::OptimizingCompiler(CompilerDriver* driver)
- : Compiler(driver, kMaximumCompilationTimeBeforeWarning),
- run_optimizations_(
- driver->GetCompilerOptions().GetCompilerFilter() != CompilerOptions::kTime) {}
+ : Compiler(driver, kMaximumCompilationTimeBeforeWarning) {}
void OptimizingCompiler::Init() {
// Enable C1visualizer output. Must be done in Init() because the compiler
// driver is not fully initialized when passed to the compiler's constructor.
CompilerDriver* driver = GetCompilerDriver();
- const std::string cfg_file_name = driver->GetDumpCfgFileName();
+ const std::string cfg_file_name = driver->GetCompilerOptions().GetDumpCfgFileName();
if (!cfg_file_name.empty()) {
CHECK_EQ(driver->GetThreadCount(), 1U)
<< "Graph visualizer requires the compiler to run single-threaded. "
<< "Invoke the compiler with '-j1'.";
std::ios_base::openmode cfg_file_mode =
- driver->GetDumpCfgAppend() ? std::ofstream::app : std::ofstream::out;
+ driver->GetCompilerOptions().GetDumpCfgAppend() ? std::ofstream::app : std::ofstream::out;
visualizer_output_.reset(new std::ofstream(cfg_file_name, cfg_file_mode));
}
if (driver->GetDumpStats()) {
@@ -577,17 +564,6 @@ static void RunOptimizations(HGraph* graph,
AllocateRegisters(graph, codegen, pass_observer);
}
-// The stack map we generate must be 4-byte aligned on ARM. Since existing
-// maps are generated alongside these stack maps, we must also align them.
-static ArrayRef<const uint8_t> AlignVectorSize(ArenaVector<uint8_t>& vector) {
- size_t size = vector.size();
- size_t aligned_size = RoundUp(size, 4);
- for (; size < aligned_size; ++size) {
- vector.push_back(0);
- }
- return ArrayRef<const uint8_t>(vector);
-}
-
static ArenaVector<LinkerPatch> EmitAndSortLinkerPatches(CodeGenerator* codegen) {
ArenaVector<LinkerPatch> linker_patches(codegen->GetGraph()->GetArena()->Adapter());
codegen->EmitLinkerPatches(&linker_patches);
@@ -601,10 +577,10 @@ static ArenaVector<LinkerPatch> EmitAndSortLinkerPatches(CodeGenerator* codegen)
return linker_patches;
}
-CompiledMethod* OptimizingCompiler::EmitOptimized(ArenaAllocator* arena,
- CodeVectorAllocator* code_allocator,
- CodeGenerator* codegen,
- CompilerDriver* compiler_driver) const {
+CompiledMethod* OptimizingCompiler::Emit(ArenaAllocator* arena,
+ CodeVectorAllocator* code_allocator,
+ CodeGenerator* codegen,
+ CompilerDriver* compiler_driver) const {
ArenaVector<LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen);
ArenaVector<uint8_t> stack_map(arena->Adapter(kArenaAllocStackMaps));
stack_map.resize(codegen->ComputeStackMapsSize());
@@ -630,39 +606,6 @@ CompiledMethod* OptimizingCompiler::EmitOptimized(ArenaAllocator* arena,
return compiled_method;
}
-CompiledMethod* OptimizingCompiler::EmitBaseline(
- ArenaAllocator* arena,
- CodeVectorAllocator* code_allocator,
- CodeGenerator* codegen,
- CompilerDriver* compiler_driver) const {
- ArenaVector<LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen);
-
- ArenaVector<uint8_t> mapping_table(arena->Adapter(kArenaAllocBaselineMaps));
- codegen->BuildMappingTable(&mapping_table);
- ArenaVector<uint8_t> vmap_table(arena->Adapter(kArenaAllocBaselineMaps));
- codegen->BuildVMapTable(&vmap_table);
- ArenaVector<uint8_t> gc_map(arena->Adapter(kArenaAllocBaselineMaps));
- codegen->BuildNativeGCMap(&gc_map, *compiler_driver);
-
- CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod(
- compiler_driver,
- codegen->GetInstructionSet(),
- ArrayRef<const uint8_t>(code_allocator->GetMemory()),
- // Follow Quick's behavior and set the frame size to zero if it is
- // considered "empty" (see the definition of
- // art::CodeGenerator::HasEmptyFrame).
- codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(),
- codegen->GetCoreSpillMask(),
- codegen->GetFpuSpillMask(),
- ArrayRef<const SrcMapElem>(),
- AlignVectorSize(mapping_table),
- AlignVectorSize(vmap_table),
- AlignVectorSize(gc_map),
- ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()),
- ArrayRef<const LinkerPatch>(linker_patches));
- return compiled_method;
-}
-
CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena,
CodeVectorAllocator* code_allocator,
const DexFile::CodeItem* code_item,
@@ -775,41 +718,37 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena,
VLOG(compiler) << "Optimizing " << pass_observer.GetMethodName();
- if (run_optimizations_) {
- ScopedObjectAccess soa(Thread::Current());
- StackHandleScopeCollection handles(soa.Self());
- ScopedThreadSuspension sts(soa.Self(), kNative);
-
- {
- PassScope scope(SsaBuilder::kSsaBuilderPassName, &pass_observer);
- GraphAnalysisResult result = graph->TryBuildingSsa(&handles);
- if (result != kAnalysisSuccess) {
- switch (result) {
- case kAnalysisFailThrowCatchLoop:
- MaybeRecordStat(MethodCompilationStat::kNotCompiledThrowCatchLoop);
- break;
- case kAnalysisFailAmbiguousArrayOp:
- MaybeRecordStat(MethodCompilationStat::kNotCompiledAmbiguousArrayOp);
- break;
- case kAnalysisSuccess:
- UNREACHABLE();
- }
- pass_observer.SetGraphInBadState();
- return nullptr;
+ ScopedObjectAccess soa(Thread::Current());
+ StackHandleScopeCollection handles(soa.Self());
+ ScopedThreadSuspension sts(soa.Self(), kNative);
+
+ {
+ PassScope scope(SsaBuilder::kSsaBuilderPassName, &pass_observer);
+ GraphAnalysisResult result = graph->TryBuildingSsa(&handles);
+ if (result != kAnalysisSuccess) {
+ switch (result) {
+ case kAnalysisFailThrowCatchLoop:
+ MaybeRecordStat(MethodCompilationStat::kNotCompiledThrowCatchLoop);
+ break;
+ case kAnalysisFailAmbiguousArrayOp:
+ MaybeRecordStat(MethodCompilationStat::kNotCompiledAmbiguousArrayOp);
+ break;
+ case kAnalysisSuccess:
+ UNREACHABLE();
}
+ pass_observer.SetGraphInBadState();
+ return nullptr;
}
-
- RunOptimizations(graph,
- codegen.get(),
- compiler_driver,
- compilation_stats_.get(),
- dex_compilation_unit,
- &pass_observer,
- &handles);
- codegen->CompileOptimized(code_allocator);
- } else {
- codegen->CompileBaseline(code_allocator);
}
+
+ RunOptimizations(graph,
+ codegen.get(),
+ compiler_driver,
+ compilation_stats_.get(),
+ dex_compilation_unit,
+ &pass_observer,
+ &handles);
+ codegen->Compile(code_allocator);
pass_observer.DumpDisassembly();
if (kArenaAllocatorCountAllocations) {
@@ -861,11 +800,7 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
dex_cache));
if (codegen.get() != nullptr) {
MaybeRecordStat(MethodCompilationStat::kCompiled);
- if (run_optimizations_) {
- method = EmitOptimized(&arena, &code_allocator, codegen.get(), compiler_driver);
- } else {
- method = EmitBaseline(&arena, &code_allocator, codegen.get(), compiler_driver);
- }
+ method = Emit(&arena, &code_allocator, codegen.get(), compiler_driver);
}
} else {
if (compiler_driver->GetCompilerOptions().VerifyAtRuntime()) {
@@ -928,8 +863,6 @@ bool OptimizingCompiler::JitCompile(Thread* self,
{
// Go to native so that we don't block GC during compilation.
ScopedThreadSuspension sts(self, kNative);
-
- DCHECK(run_optimizations_);
codegen.reset(
TryCompile(&arena,
&code_allocator,
diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc
index 9d136f3ae6..be470ccb7d 100644
--- a/compiler/optimizing/parallel_move_resolver.cc
+++ b/compiler/optimizing/parallel_move_resolver.cc
@@ -504,7 +504,7 @@ void ParallelMoveResolverNoSwap::PerformMove(size_t index) {
void ParallelMoveResolverNoSwap::UpdateMoveSource(Location from, Location to) {
// This function is used to reduce the dependencies in the graph after
// (from -> to) has been performed. Since we ensure there is no move with the same
- // destination, (to -> X) can not be blocked while (from -> X) might still be
+ // destination, (to -> X) cannot be blocked while (from -> X) might still be
// blocked. Consider for example the moves (0 -> 1) (1 -> 2) (1 -> 3). After
// (1 -> 2) has been performed, the moves left are (0 -> 1) and (1 -> 3). There is
// a dependency between the two. If we update the source location from 1 to 2, we
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 2bae4bc5c8..d77639d608 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -72,8 +72,7 @@ RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator,
float_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
double_spill_slots_.reserve(kDefaultNumberOfSpillSlots);
- static constexpr bool kIsBaseline = false;
- codegen->SetupBlockedRegisters(kIsBaseline);
+ codegen->SetupBlockedRegisters();
physical_core_register_intervals_.resize(codegen->GetNumberOfCoreRegisters(), nullptr);
physical_fp_register_intervals_.resize(codegen->GetNumberOfFloatingPointRegisters(), nullptr);
// Always reserve for the current method and the graph's max out registers.
@@ -1735,6 +1734,12 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) {
}
}
+static bool IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop(
+ HInstruction* instruction) {
+ return instruction->GetBlock()->GetGraph()->HasIrreducibleLoops() &&
+ (instruction->IsConstant() || instruction->IsCurrentMethod());
+}
+
void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval,
HBasicBlock* from,
HBasicBlock* to) const {
@@ -1751,7 +1756,19 @@ void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval,
// Interval was not split.
return;
}
- DCHECK(destination != nullptr && source != nullptr);
+
+ LiveInterval* parent = interval->GetParent();
+ HInstruction* defined_by = parent->GetDefinedBy();
+ if (destination == nullptr) {
+ // Our live_in fixed point calculation has found that the instruction is live
+ // in the `to` block because it will eventually enter an irreducible loop. Our
+ // live interval computation however does not compute a fixed point, and
+ // therefore will not have a location for that instruction for `to`.
+ // Because the instruction is a constant or the ArtMethod, we don't need to
+ // do anything: it will be materialized in the irreducible loop.
+ DCHECK(IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop(defined_by));
+ return;
+ }
if (!destination->HasRegister()) {
// Values are eagerly spilled. Spill slot already contains appropriate value.
@@ -1762,13 +1779,13 @@ void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval,
// we need to put the moves at the entry of `to`.
if (from->GetNormalSuccessors().size() == 1) {
InsertParallelMoveAtExitOf(from,
- interval->GetParent()->GetDefinedBy(),
+ defined_by,
source->ToLocation(),
destination->ToLocation());
} else {
DCHECK_EQ(to->GetPredecessors().size(), 1u);
InsertParallelMoveAtEntryOf(to,
- interval->GetParent()->GetDefinedBy(),
+ defined_by,
source->ToLocation(),
destination->ToLocation());
}
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index 7494e336b1..165d09d1a5 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -422,6 +422,34 @@ bool SsaBuilder::FixAmbiguousArrayOps() {
return true;
}
+void SsaBuilder::RemoveRedundantUninitializedStrings() {
+ if (GetGraph()->IsDebuggable()) {
+ // Do not perform the optimization for consistency with the interpreter
+ // which always allocates an object for new-instance of String.
+ return;
+ }
+
+ for (HNewInstance* new_instance : uninitialized_strings_) {
+ DCHECK(new_instance->IsStringAlloc());
+
+ // Replace NewInstance of String with NullConstant if not used prior to
+ // calling StringFactory. In case of deoptimization, the interpreter is
+ // expected to skip null check on the `this` argument of the StringFactory call.
+ if (!new_instance->HasNonEnvironmentUses()) {
+ new_instance->ReplaceWith(GetGraph()->GetNullConstant());
+ new_instance->GetBlock()->RemoveInstruction(new_instance);
+
+ // Remove LoadClass if not needed any more.
+ HLoadClass* load_class = new_instance->InputAt(0)->AsLoadClass();
+ DCHECK(load_class != nullptr);
+ DCHECK(!load_class->NeedsAccessCheck()) << "String class is always accessible";
+ if (!load_class->HasUses()) {
+ load_class->GetBlock()->RemoveInstruction(load_class);
+ }
+ }
+ }
+}
+
GraphAnalysisResult SsaBuilder::BuildSsa() {
// 1) Visit in reverse post order. We need to have all predecessors of a block
// visited (with the exception of loops) in order to create the right environment
@@ -487,7 +515,15 @@ GraphAnalysisResult SsaBuilder::BuildSsa() {
// input types.
dead_phi_elimimation.EliminateDeadPhis();
- // 11) Clear locals.
+ // 11) Step 1) replaced uses of NewInstances of String with the results of
+ // their corresponding StringFactory calls. Unless the String objects are used
+ // before they are initialized, they can be replaced with NullConstant.
+ // Note that this optimization is valid only if unsimplified code does not use
+ // the uninitialized value because we assume execution can be deoptimized at
+ // any safepoint. We must therefore perform it before any other optimizations.
+ RemoveRedundantUninitializedStrings();
+
+ // 12) Clear locals.
for (HInstructionIterator it(GetGraph()->GetEntryBlock()->GetInstructions());
!it.Done();
it.Advance()) {
@@ -891,12 +927,21 @@ void SsaBuilder::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
if (invoke->IsStringInit()) {
// This is a StringFactory call which acts as a String constructor. Its
// result replaces the empty String pre-allocated by NewInstance.
- HNewInstance* new_instance = invoke->GetThisArgumentOfStringInit();
- invoke->RemoveThisArgumentOfStringInit();
+ HInstruction* arg_this = invoke->GetAndRemoveThisArgumentOfStringInit();
+
+ // Replacing the NewInstance might render it redundant. Keep a list of these
+ // to be visited once it is clear whether it is has remaining uses.
+ if (arg_this->IsNewInstance()) {
+ uninitialized_strings_.push_back(arg_this->AsNewInstance());
+ } else {
+ DCHECK(arg_this->IsPhi());
+ // NewInstance is not the direct input of the StringFactory call. It might
+ // be redundant but optimizing this case is not worth the effort.
+ }
- // Walk over all vregs and replace any occurrence of `new_instance` with `invoke`.
+ // Walk over all vregs and replace any occurrence of `arg_this` with `invoke`.
for (size_t vreg = 0, e = current_locals_->size(); vreg < e; ++vreg) {
- if ((*current_locals_)[vreg] == new_instance) {
+ if ((*current_locals_)[vreg] == arg_this) {
(*current_locals_)[vreg] = invoke;
}
}
diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h
index 28eef6a40c..ccef8ea380 100644
--- a/compiler/optimizing/ssa_builder.h
+++ b/compiler/optimizing/ssa_builder.h
@@ -57,6 +57,7 @@ class SsaBuilder : public HGraphVisitor {
loop_headers_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
ambiguous_agets_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
ambiguous_asets_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
+ uninitialized_strings_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
locals_for_(graph->GetBlocks().size(),
ArenaVector<HInstruction*>(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)),
graph->GetArena()->Adapter(kArenaAllocSsaBuilder)) {
@@ -105,6 +106,8 @@ class SsaBuilder : public HGraphVisitor {
HPhi* GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive::Type type);
HArrayGet* GetFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget);
+ void RemoveRedundantUninitializedStrings();
+
StackHandleScopeCollection* const handles_;
// True if types of ambiguous ArrayGets have been resolved.
@@ -119,6 +122,7 @@ class SsaBuilder : public HGraphVisitor {
ArenaVector<HArrayGet*> ambiguous_agets_;
ArenaVector<HArraySet*> ambiguous_asets_;
+ ArenaVector<HNewInstance*> uninitialized_strings_;
// HEnvironment for each block.
ArenaVector<ArenaVector<HInstruction*>> locals_for_;
diff --git a/compiler/profile_assistant.cc b/compiler/profile_assistant.cc
index 81f2a5692d..85335efcc4 100644
--- a/compiler/profile_assistant.cc
+++ b/compiler/profile_assistant.cc
@@ -16,54 +16,154 @@
#include "profile_assistant.h"
+#include "base/unix_file/fd_file.h"
+#include "os.h"
+
namespace art {
// Minimum number of new methods that profiles must contain to enable recompilation.
static constexpr const uint32_t kMinNewMethodsForCompilation = 10;
-bool ProfileAssistant::ProcessProfiles(
- const std::vector<std::string>& profile_files,
- const std::vector<std::string>& reference_profile_files,
- /*out*/ ProfileCompilationInfo** profile_compilation_info) {
+bool ProfileAssistant::ProcessProfilesInternal(
+ const std::vector<ScopedFlock>& profile_files,
+ const std::vector<ScopedFlock>& reference_profile_files,
+ /*out*/ ProfileCompilationInfo** profile_compilation_info) {
DCHECK(!profile_files.empty());
- DCHECK(reference_profile_files.empty() ||
+ DCHECK(!reference_profile_files.empty() ||
(profile_files.size() == reference_profile_files.size()));
std::vector<ProfileCompilationInfo> new_info(profile_files.size());
bool should_compile = false;
// Read the main profile files.
- for (size_t i = 0; i < profile_files.size(); i++) {
- if (!new_info[i].Load(profile_files[i])) {
- LOG(WARNING) << "Could not load profile file: " << profile_files[i];
+ for (size_t i = 0; i < new_info.size(); i++) {
+ if (!new_info[i].Load(profile_files[i].GetFile()->Fd())) {
+ LOG(WARNING) << "Could not load profile file at index " << i;
return false;
}
// Do we have enough new profiled methods that will make the compilation worthwhile?
should_compile |= (new_info[i].GetNumberOfMethods() > kMinNewMethodsForCompilation);
}
+
if (!should_compile) {
- *profile_compilation_info = nullptr;
return true;
}
std::unique_ptr<ProfileCompilationInfo> result(new ProfileCompilationInfo());
+ // Merge information.
for (size_t i = 0; i < new_info.size(); i++) {
+ if (!reference_profile_files.empty()) {
+ if (!new_info[i].Load(reference_profile_files[i].GetFile()->Fd())) {
+ LOG(WARNING) << "Could not load reference profile file at index " << i;
+ return false;
+ }
+ }
// Merge all data into a single object.
- result->Load(new_info[i]);
- // If we have any reference profile information merge their information with
- // the current profiles and save them back to disk.
+ if (!result->Load(new_info[i])) {
+ LOG(WARNING) << "Could not merge profile data at index " << i;
+ return false;
+ }
+ }
+ // We were successful in merging all profile information. Update the files.
+ for (size_t i = 0; i < new_info.size(); i++) {
if (!reference_profile_files.empty()) {
- if (!new_info[i].Load(reference_profile_files[i])) {
- LOG(WARNING) << "Could not load reference profile file: " << reference_profile_files[i];
+ if (!reference_profile_files[i].GetFile()->ClearContent()) {
+ PLOG(WARNING) << "Could not clear reference profile file at index " << i;
+ return false;
+ }
+ if (!new_info[i].Save(reference_profile_files[i].GetFile()->Fd())) {
+ LOG(WARNING) << "Could not save reference profile file at index " << i;
return false;
}
- if (!new_info[i].Save(reference_profile_files[i])) {
- LOG(WARNING) << "Could not save reference profile file: " << reference_profile_files[i];
+ if (!profile_files[i].GetFile()->ClearContent()) {
+ PLOG(WARNING) << "Could not clear profile file at index " << i;
return false;
}
}
}
+
*profile_compilation_info = result.release();
return true;
}
+class ScopedCollectionFlock {
+ public:
+ explicit ScopedCollectionFlock(size_t size) : flocks_(size) {}
+
+ // Will block until all the locks are acquired.
+ bool Init(const std::vector<std::string>& filenames, /* out */ std::string* error) {
+ for (size_t i = 0; i < filenames.size(); i++) {
+ if (!flocks_[i].Init(filenames[i].c_str(), O_RDWR, /* block */ true, error)) {
+ *error += " (index=" + std::to_string(i) + ")";
+ return false;
+ }
+ }
+ return true;
+ }
+
+ // Will block until all the locks are acquired.
+ bool Init(const std::vector<uint32_t>& fds, /* out */ std::string* error) {
+ for (size_t i = 0; i < fds.size(); i++) {
+ // We do not own the descriptor, so disable auto-close and don't check usage.
+ File file(fds[i], false);
+ file.DisableAutoClose();
+ if (!flocks_[i].Init(&file, error)) {
+ *error += " (index=" + std::to_string(i) + ")";
+ return false;
+ }
+ }
+ return true;
+ }
+
+ const std::vector<ScopedFlock>& Get() const { return flocks_; }
+
+ private:
+ std::vector<ScopedFlock> flocks_;
+};
+
+bool ProfileAssistant::ProcessProfiles(
+ const std::vector<uint32_t>& profile_files_fd,
+ const std::vector<uint32_t>& reference_profile_files_fd,
+ /*out*/ ProfileCompilationInfo** profile_compilation_info) {
+ *profile_compilation_info = nullptr;
+
+ std::string error;
+ ScopedCollectionFlock profile_files_flocks(profile_files_fd.size());
+ if (!profile_files_flocks.Init(profile_files_fd, &error)) {
+ LOG(WARNING) << "Could not lock profile files: " << error;
+ return false;
+ }
+ ScopedCollectionFlock reference_profile_files_flocks(reference_profile_files_fd.size());
+ if (!reference_profile_files_flocks.Init(reference_profile_files_fd, &error)) {
+ LOG(WARNING) << "Could not lock reference profile files: " << error;
+ return false;
+ }
+
+ return ProcessProfilesInternal(profile_files_flocks.Get(),
+ reference_profile_files_flocks.Get(),
+ profile_compilation_info);
+}
+
+bool ProfileAssistant::ProcessProfiles(
+ const std::vector<std::string>& profile_files,
+ const std::vector<std::string>& reference_profile_files,
+ /*out*/ ProfileCompilationInfo** profile_compilation_info) {
+ *profile_compilation_info = nullptr;
+
+ std::string error;
+ ScopedCollectionFlock profile_files_flocks(profile_files.size());
+ if (!profile_files_flocks.Init(profile_files, &error)) {
+ LOG(WARNING) << "Could not lock profile files: " << error;
+ return false;
+ }
+ ScopedCollectionFlock reference_profile_files_flocks(reference_profile_files.size());
+ if (!reference_profile_files_flocks.Init(reference_profile_files, &error)) {
+ LOG(WARNING) << "Could not lock reference profile files: " << error;
+ return false;
+ }
+
+ return ProcessProfilesInternal(profile_files_flocks.Get(),
+ reference_profile_files_flocks.Get(),
+ profile_compilation_info);
+}
+
} // namespace art
diff --git a/compiler/profile_assistant.h b/compiler/profile_assistant.h
index 088c8bd1c7..ad5e2163cf 100644
--- a/compiler/profile_assistant.h
+++ b/compiler/profile_assistant.h
@@ -20,6 +20,7 @@
#include <string>
#include <vector>
+#include "base/scoped_flock.h"
#include "jit/offline_profiling_info.cc"
namespace art {
@@ -52,7 +53,17 @@ class ProfileAssistant {
const std::vector<std::string>& reference_profile_files,
/*out*/ ProfileCompilationInfo** profile_compilation_info);
+ static bool ProcessProfiles(
+ const std::vector<uint32_t>& profile_files_fd_,
+ const std::vector<uint32_t>& reference_profile_files_fd_,
+ /*out*/ ProfileCompilationInfo** profile_compilation_info);
+
private:
+ static bool ProcessProfilesInternal(
+ const std::vector<ScopedFlock>& profile_files,
+ const std::vector<ScopedFlock>& reference_profile_files,
+ /*out*/ ProfileCompilationInfo** profile_compilation_info);
+
DISALLOW_COPY_AND_ASSIGN(ProfileAssistant);
};
diff --git a/compiler/profile_assistant_test.cc b/compiler/profile_assistant_test.cc
new file mode 100644
index 0000000000..58b7513377
--- /dev/null
+++ b/compiler/profile_assistant_test.cc
@@ -0,0 +1,279 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "base/unix_file/fd_file.h"
+#include "common_runtime_test.h"
+#include "compiler/profile_assistant.h"
+#include "jit/offline_profiling_info.h"
+
+namespace art {
+
+class ProfileAssistantTest : public CommonRuntimeTest {
+ protected:
+ void SetupProfile(const std::string& id,
+ uint32_t checksum,
+ uint16_t number_of_methods,
+ const ScratchFile& profile,
+ ProfileCompilationInfo* info,
+ uint16_t start_method_index = 0) {
+ std::string dex_location1 = "location1" + id;
+ uint32_t dex_location_checksum1 = checksum;
+ std::string dex_location2 = "location2" + id;
+ uint32_t dex_location_checksum2 = 10 * checksum;
+ for (uint16_t i = start_method_index; i < start_method_index + number_of_methods; i++) {
+ ASSERT_TRUE(info->AddData(dex_location1, dex_location_checksum1, i));
+ ASSERT_TRUE(info->AddData(dex_location2, dex_location_checksum2, i));
+ }
+ ASSERT_TRUE(info->Save(GetFd(profile)));
+ ASSERT_EQ(0, profile.GetFile()->Flush());
+ ASSERT_TRUE(profile.GetFile()->ResetOffset());
+ }
+
+ uint32_t GetFd(const ScratchFile& file) const {
+ return static_cast<uint32_t>(file.GetFd());
+ }
+};
+
+TEST_F(ProfileAssistantTest, AdviseCompilationEmptyReferences) {
+ ScratchFile profile1;
+ ScratchFile profile2;
+ ScratchFile reference_profile1;
+ ScratchFile reference_profile2;
+
+ std::vector<uint32_t> profile_fds({
+ GetFd(profile1),
+ GetFd(profile2)});
+ std::vector<uint32_t> reference_profile_fds({
+ GetFd(reference_profile1),
+ GetFd(reference_profile2)});
+
+ const uint16_t kNumberOfMethodsToEnableCompilation = 100;
+ ProfileCompilationInfo info1;
+ SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, profile1, &info1);
+ ProfileCompilationInfo info2;
+ SetupProfile("p2", 2, kNumberOfMethodsToEnableCompilation, profile2, &info2);
+
+ // We should advise compilation.
+ ProfileCompilationInfo* result;
+ ASSERT_TRUE(ProfileAssistant::ProcessProfiles(profile_fds, reference_profile_fds, &result));
+ ASSERT_TRUE(result != nullptr);
+
+ // The resulting compilation info must be equal to the merge of the inputs.
+ ProfileCompilationInfo expected;
+ ASSERT_TRUE(expected.Load(info1));
+ ASSERT_TRUE(expected.Load(info2));
+ ASSERT_TRUE(expected.Equals(*result));
+
+ // The information from profiles must be transfered to the reference profiles.
+ ProfileCompilationInfo file_info1;
+ ASSERT_TRUE(reference_profile1.GetFile()->ResetOffset());
+ ASSERT_TRUE(file_info1.Load(GetFd(reference_profile1)));
+ ASSERT_TRUE(file_info1.Equals(info1));
+
+ ProfileCompilationInfo file_info2;
+ ASSERT_TRUE(reference_profile2.GetFile()->ResetOffset());
+ ASSERT_TRUE(file_info2.Load(GetFd(reference_profile2)));
+ ASSERT_TRUE(file_info2.Equals(info2));
+
+ // Initial profiles must be cleared.
+ ASSERT_EQ(0, profile1.GetFile()->GetLength());
+ ASSERT_EQ(0, profile2.GetFile()->GetLength());
+}
+
+TEST_F(ProfileAssistantTest, AdviseCompilationNonEmptyReferences) {
+ ScratchFile profile1;
+ ScratchFile profile2;
+ ScratchFile reference_profile1;
+ ScratchFile reference_profile2;
+
+ std::vector<uint32_t> profile_fds({
+ GetFd(profile1),
+ GetFd(profile2)});
+ std::vector<uint32_t> reference_profile_fds({
+ GetFd(reference_profile1),
+ GetFd(reference_profile2)});
+
+ // The new profile info will contain the methods with indices 0-100.
+ const uint16_t kNumberOfMethodsToEnableCompilation = 100;
+ ProfileCompilationInfo info1;
+ SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, profile1, &info1);
+ ProfileCompilationInfo info2;
+ SetupProfile("p2", 2, kNumberOfMethodsToEnableCompilation, profile2, &info2);
+
+
+ // The reference profile info will contain the methods with indices 50-150.
+ const uint16_t kNumberOfMethodsAlreadyCompiled = 100;
+ ProfileCompilationInfo reference_info1;
+ SetupProfile("p1", 1, kNumberOfMethodsAlreadyCompiled, reference_profile1,
+ &reference_info1, kNumberOfMethodsToEnableCompilation / 2);
+ ProfileCompilationInfo reference_info2;
+ SetupProfile("p2", 2, kNumberOfMethodsAlreadyCompiled, reference_profile2,
+ &reference_info2, kNumberOfMethodsToEnableCompilation / 2);
+
+ // We should advise compilation.
+ ProfileCompilationInfo* result;
+ ASSERT_TRUE(ProfileAssistant::ProcessProfiles(profile_fds, reference_profile_fds, &result));
+ ASSERT_TRUE(result != nullptr);
+
+ // The resulting compilation info must be equal to the merge of the inputs
+ ProfileCompilationInfo expected;
+ ASSERT_TRUE(expected.Load(info1));
+ ASSERT_TRUE(expected.Load(info2));
+ ASSERT_TRUE(expected.Load(reference_info1));
+ ASSERT_TRUE(expected.Load(reference_info2));
+ ASSERT_TRUE(expected.Equals(*result));
+
+ // The information from profiles must be transfered to the reference profiles.
+ ProfileCompilationInfo file_info1;
+ ProfileCompilationInfo merge1;
+ ASSERT_TRUE(merge1.Load(info1));
+ ASSERT_TRUE(merge1.Load(reference_info1));
+ ASSERT_TRUE(reference_profile1.GetFile()->ResetOffset());
+ ASSERT_TRUE(file_info1.Load(GetFd(reference_profile1)));
+ ASSERT_TRUE(file_info1.Equals(merge1));
+
+ ProfileCompilationInfo file_info2;
+ ProfileCompilationInfo merge2;
+ ASSERT_TRUE(merge2.Load(info2));
+ ASSERT_TRUE(merge2.Load(reference_info2));
+ ASSERT_TRUE(reference_profile2.GetFile()->ResetOffset());
+ ASSERT_TRUE(file_info2.Load(GetFd(reference_profile2)));
+ ASSERT_TRUE(file_info2.Equals(merge2));
+
+ // Initial profiles must be cleared.
+ ASSERT_EQ(0, profile1.GetFile()->GetLength());
+ ASSERT_EQ(0, profile2.GetFile()->GetLength());
+}
+
+TEST_F(ProfileAssistantTest, DoNotAdviseCompilation) {
+ ScratchFile profile1;
+ ScratchFile profile2;
+ ScratchFile reference_profile1;
+ ScratchFile reference_profile2;
+
+ std::vector<uint32_t> profile_fds({
+ GetFd(profile1),
+ GetFd(profile2)});
+ std::vector<uint32_t> reference_profile_fds({
+ GetFd(reference_profile1),
+ GetFd(reference_profile2)});
+
+ const uint16_t kNumberOfMethodsToSkipCompilation = 1;
+ ProfileCompilationInfo info1;
+ SetupProfile("p1", 1, kNumberOfMethodsToSkipCompilation, profile1, &info1);
+ ProfileCompilationInfo info2;
+ SetupProfile("p2", 2, kNumberOfMethodsToSkipCompilation, profile2, &info2);
+
+ // We should not advise compilation.
+ ProfileCompilationInfo* result = nullptr;
+ ASSERT_TRUE(ProfileAssistant::ProcessProfiles(profile_fds, reference_profile_fds, &result));
+ ASSERT_TRUE(result == nullptr);
+
+ // The information from profiles must remain the same.
+ ProfileCompilationInfo file_info1;
+ ASSERT_TRUE(profile1.GetFile()->ResetOffset());
+ ASSERT_TRUE(file_info1.Load(GetFd(profile1)));
+ ASSERT_TRUE(file_info1.Equals(info1));
+
+ ProfileCompilationInfo file_info2;
+ ASSERT_TRUE(profile2.GetFile()->ResetOffset());
+ ASSERT_TRUE(file_info2.Load(GetFd(profile2)));
+ ASSERT_TRUE(file_info2.Equals(info2));
+
+ // Reference profile files must remain empty.
+ ASSERT_EQ(0, reference_profile1.GetFile()->GetLength());
+ ASSERT_EQ(0, reference_profile2.GetFile()->GetLength());
+}
+
+TEST_F(ProfileAssistantTest, FailProcessingBecauseOfProfiles) {
+ ScratchFile profile1;
+ ScratchFile profile2;
+ ScratchFile reference_profile1;
+ ScratchFile reference_profile2;
+
+ std::vector<uint32_t> profile_fds({
+ GetFd(profile1),
+ GetFd(profile2)});
+ std::vector<uint32_t> reference_profile_fds({
+ GetFd(reference_profile1),
+ GetFd(reference_profile2)});
+
+ const uint16_t kNumberOfMethodsToEnableCompilation = 100;
+ // Assign different hashes for the same dex file. This will make merging of information to fail.
+ ProfileCompilationInfo info1;
+ SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, profile1, &info1);
+ ProfileCompilationInfo info2;
+ SetupProfile("p1", 2, kNumberOfMethodsToEnableCompilation, profile2, &info2);
+
+ // We should fail processing.
+ ProfileCompilationInfo* result = nullptr;
+ ASSERT_FALSE(ProfileAssistant::ProcessProfiles(profile_fds, reference_profile_fds, &result));
+ ASSERT_TRUE(result == nullptr);
+
+ // The information from profiles must still remain the same.
+ ProfileCompilationInfo file_info1;
+ ASSERT_TRUE(profile1.GetFile()->ResetOffset());
+ ASSERT_TRUE(file_info1.Load(GetFd(profile1)));
+ ASSERT_TRUE(file_info1.Equals(info1));
+
+ ProfileCompilationInfo file_info2;
+ ASSERT_TRUE(profile2.GetFile()->ResetOffset());
+ ASSERT_TRUE(file_info2.Load(GetFd(profile2)));
+ ASSERT_TRUE(file_info2.Equals(info2));
+
+ // Reference profile files must still remain empty.
+ ASSERT_EQ(0, reference_profile1.GetFile()->GetLength());
+ ASSERT_EQ(0, reference_profile2.GetFile()->GetLength());
+}
+
+TEST_F(ProfileAssistantTest, FailProcessingBecauseOfReferenceProfiles) {
+ ScratchFile profile1;
+ ScratchFile reference_profile;
+
+ std::vector<uint32_t> profile_fds({
+ GetFd(profile1)});
+ std::vector<uint32_t> reference_profile_fds({
+ GetFd(reference_profile)});
+
+ const uint16_t kNumberOfMethodsToEnableCompilation = 100;
+ // Assign different hashes for the same dex file. This will make merging of information to fail.
+ ProfileCompilationInfo info1;
+ SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, profile1, &info1);
+ ProfileCompilationInfo reference_info;
+ SetupProfile("p1", 2, kNumberOfMethodsToEnableCompilation, reference_profile, &reference_info);
+
+ // We should not advise compilation.
+ ProfileCompilationInfo* result = nullptr;
+ ASSERT_TRUE(profile1.GetFile()->ResetOffset());
+ ASSERT_TRUE(reference_profile.GetFile()->ResetOffset());
+ ASSERT_FALSE(ProfileAssistant::ProcessProfiles(profile_fds, reference_profile_fds, &result));
+ ASSERT_TRUE(result == nullptr);
+
+ // The information from profiles must still remain the same.
+ ProfileCompilationInfo file_info1;
+ ASSERT_TRUE(profile1.GetFile()->ResetOffset());
+ ASSERT_TRUE(file_info1.Load(GetFd(profile1)));
+ ASSERT_TRUE(file_info1.Equals(info1));
+
+ ProfileCompilationInfo file_info2;
+ ASSERT_TRUE(reference_profile.GetFile()->ResetOffset());
+ ASSERT_TRUE(file_info2.Load(GetFd(reference_profile)));
+ ASSERT_TRUE(file_info2.Equals(reference_info));
+}
+
+} // namespace art
diff --git a/compiler/utils/test_dex_file_builder.h b/compiler/utils/test_dex_file_builder.h
index b6a228c13c..e57a540669 100644
--- a/compiler/utils/test_dex_file_builder.h
+++ b/compiler/utils/test_dex_file_builder.h
@@ -21,6 +21,7 @@
#include <set>
#include <map>
#include <vector>
+#include <zlib.h>
#include "base/bit_utils.h"
#include "base/logging.h"
@@ -161,7 +162,6 @@ class TestDexFileBuilder {
uint32_t total_size = data_section_offset + data_section_size;
dex_file_data_.resize(total_size);
- std::memcpy(&dex_file_data_[0], header_data.data, sizeof(DexFile::Header));
for (const auto& entry : strings_) {
CHECK_LT(entry.first.size(), 128u);
@@ -210,7 +210,12 @@ class TestDexFileBuilder {
Write32(raw_offset + 4u, GetStringIdx(entry.first.name));
}
- // Leave checksum and signature as zeros.
+ // Leave signature as zeros.
+
+ header->file_size_ = dex_file_data_.size();
+ size_t skip = sizeof(header->magic_) + sizeof(header->checksum_);
+ header->checksum_ = adler32(0u, dex_file_data_.data() + skip, dex_file_data_.size() - skip);
+ std::memcpy(&dex_file_data_[0], header_data.data, sizeof(DexFile::Header));
std::string error_msg;
std::unique_ptr<const DexFile> dex_file(DexFile::Open(
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index d6caa3c338..7138a46890 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -186,6 +186,22 @@ void X86Assembler::bsrl(Register dst, const Address& src) {
EmitOperand(dst, src);
}
+void X86Assembler::popcntl(Register dst, Register src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0xF3);
+ EmitUint8(0x0F);
+ EmitUint8(0xB8);
+ EmitRegisterOperand(dst, src);
+}
+
+void X86Assembler::popcntl(Register dst, const Address& src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0xF3);
+ EmitUint8(0x0F);
+ EmitUint8(0xB8);
+ EmitOperand(dst, src);
+}
+
void X86Assembler::movzxb(Register dst, ByteRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x0F);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 655af9c184..759a41e80e 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -330,11 +330,15 @@ class X86Assembler FINAL : public Assembler {
void movntl(const Address& dst, Register src);
void bswapl(Register dst);
+
void bsfl(Register dst, Register src);
void bsfl(Register dst, const Address& src);
void bsrl(Register dst, Register src);
void bsrl(Register dst, const Address& src);
+ void popcntl(Register dst, Register src);
+ void popcntl(Register dst, const Address& src);
+
void rorl(Register reg, const Immediate& imm);
void rorl(Register operand, Register shifter);
void roll(Register reg, const Immediate& imm);
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index a9b991c7a0..0fd098227a 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -260,6 +260,19 @@ TEST_F(AssemblerX86Test, BsrlAddress) {
DriverStr(expected, "bsrl_address");
}
+TEST_F(AssemblerX86Test, Popcntl) {
+ DriverStr(RepeatRR(&x86::X86Assembler::popcntl, "popcntl %{reg2}, %{reg1}"), "popcntl");
+}
+
+TEST_F(AssemblerX86Test, PopcntlAddress) {
+ GetAssembler()->popcntl(x86::Register(x86::EDI), x86::Address(
+ x86::Register(x86::EDI), x86::Register(x86::EBX), x86::TIMES_4, 12));
+ const char* expected =
+ "popcntl 0xc(%EDI,%EBX,4), %EDI\n";
+
+ DriverStr(expected, "popcntl_address");
+}
+
// Rorl only allows CL as the shift count.
std::string rorl_fn(AssemblerX86Test::Base* assembler_test, x86::X86Assembler* assembler) {
std::ostringstream str;
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index db072678ef..10f5a005e1 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -2247,6 +2247,42 @@ void X86_64Assembler::bsrq(CpuRegister dst, const Address& src) {
EmitOperand(dst.LowBits(), src);
}
+void X86_64Assembler::popcntl(CpuRegister dst, CpuRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0xF3);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0xB8);
+ EmitRegisterOperand(dst.LowBits(), src.LowBits());
+}
+
+void X86_64Assembler::popcntl(CpuRegister dst, const Address& src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0xF3);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0xB8);
+ EmitOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::popcntq(CpuRegister dst, CpuRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0xF3);
+ EmitRex64(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0xB8);
+ EmitRegisterOperand(dst.LowBits(), src.LowBits());
+}
+
+void X86_64Assembler::popcntq(CpuRegister dst, const Address& src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0xF3);
+ EmitRex64(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0xB8);
+ EmitOperand(dst.LowBits(), src);
+}
+
void X86_64Assembler::repne_scasw() {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 01d28e305d..6f0847eb61 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -647,6 +647,11 @@ class X86_64Assembler FINAL : public Assembler {
void bsrq(CpuRegister dst, CpuRegister src);
void bsrq(CpuRegister dst, const Address& src);
+ void popcntl(CpuRegister dst, CpuRegister src);
+ void popcntl(CpuRegister dst, const Address& src);
+ void popcntq(CpuRegister dst, CpuRegister src);
+ void popcntq(CpuRegister dst, const Address& src);
+
void rorl(CpuRegister reg, const Immediate& imm);
void rorl(CpuRegister operand, CpuRegister shifter);
void roll(CpuRegister reg, const Immediate& imm);
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index 00bb5ca36b..8a87fca96a 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -1333,6 +1333,44 @@ TEST_F(AssemblerX86_64Test, BsrqAddress) {
DriverStr(expected, "bsrq_address");
}
+TEST_F(AssemblerX86_64Test, Popcntl) {
+ DriverStr(Repeatrr(&x86_64::X86_64Assembler::popcntl, "popcntl %{reg2}, %{reg1}"), "popcntl");
+}
+
+TEST_F(AssemblerX86_64Test, PopcntlAddress) {
+ GetAssembler()->popcntl(x86_64::CpuRegister(x86_64::R10), x86_64::Address(
+ x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
+ GetAssembler()->popcntl(x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
+ x86_64::CpuRegister(x86_64::R10), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
+ GetAssembler()->popcntl(x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
+ x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12));
+ const char* expected =
+ "popcntl 0xc(%RDI,%RBX,4), %R10d\n"
+ "popcntl 0xc(%R10,%RBX,4), %edi\n"
+ "popcntl 0xc(%RDI,%R9,4), %edi\n";
+
+ DriverStr(expected, "popcntl_address");
+}
+
+TEST_F(AssemblerX86_64Test, Popcntq) {
+ DriverStr(RepeatRR(&x86_64::X86_64Assembler::popcntq, "popcntq %{reg2}, %{reg1}"), "popcntq");
+}
+
+TEST_F(AssemblerX86_64Test, PopcntqAddress) {
+ GetAssembler()->popcntq(x86_64::CpuRegister(x86_64::R10), x86_64::Address(
+ x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
+ GetAssembler()->popcntq(x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
+ x86_64::CpuRegister(x86_64::R10), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
+ GetAssembler()->popcntq(x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
+ x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12));
+ const char* expected =
+ "popcntq 0xc(%RDI,%RBX,4), %R10\n"
+ "popcntq 0xc(%R10,%RBX,4), %RDI\n"
+ "popcntq 0xc(%RDI,%R9,4), %RDI\n";
+
+ DriverStr(expected, "popcntq_address");
+}
+
/////////////////
// Near labels //
/////////////////