summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'compiler')
-rw-r--r--compiler/Android.mk3
-rw-r--r--compiler/common_compiler_test.cc4
-rw-r--r--compiler/dex/quick/quick_cfi_test.cc2
-rw-r--r--compiler/dex/quick/quick_compiler.cc32
-rw-r--r--compiler/dex/quick/quick_compiler.h3
-rw-r--r--compiler/dex/quick/x86/quick_assemble_x86_test.cc2
-rw-r--r--compiler/driver/compiled_method_storage_test.cc2
-rw-r--r--compiler/driver/compiler_driver.cc27
-rw-r--r--compiler/driver/compiler_driver.h10
-rw-r--r--compiler/driver/compiler_options.cc2
-rw-r--r--compiler/driver/compiler_options.h2
-rw-r--r--compiler/dwarf/method_debug_info.h4
-rw-r--r--compiler/elf_builder.h12
-rw-r--r--compiler/elf_writer_debug.cc293
-rw-r--r--compiler/elf_writer_debug.h15
-rw-r--r--compiler/elf_writer_quick.cc2
-rw-r--r--compiler/image_writer.cc303
-rw-r--r--compiler/image_writer.h98
-rw-r--r--compiler/jit/jit_compiler.cc41
-rw-r--r--compiler/jit/jit_compiler.h4
-rw-r--r--compiler/linker/relative_patcher_test.h2
-rw-r--r--compiler/oat_test.cc2
-rw-r--r--compiler/optimizing/bounds_check_elimination.cc65
-rw-r--r--compiler/optimizing/code_generator.cc12
-rw-r--r--compiler/optimizing/code_generator.h120
-rw-r--r--compiler/optimizing/code_generator_arm.cc807
-rw-r--r--compiler/optimizing/code_generator_arm.h110
-rw-r--r--compiler/optimizing/code_generator_arm64.cc30
-rw-r--r--compiler/optimizing/code_generator_arm64.h2
-rw-r--r--compiler/optimizing/code_generator_mips.cc391
-rw-r--r--compiler/optimizing/code_generator_mips.h2
-rw-r--r--compiler/optimizing/code_generator_mips64.cc216
-rw-r--r--compiler/optimizing/code_generator_mips64.h2
-rw-r--r--compiler/optimizing/code_generator_x86.cc22
-rw-r--r--compiler/optimizing/code_generator_x86.h2
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc30
-rw-r--r--compiler/optimizing/code_generator_x86_64.h2
-rw-r--r--compiler/optimizing/instruction_simplifier.cc26
-rw-r--r--compiler/optimizing/intrinsics_arm.cc96
-rw-r--r--compiler/optimizing/intrinsics_arm64.cc6
-rw-r--r--compiler/optimizing/intrinsics_mips.cc12
-rw-r--r--compiler/optimizing/intrinsics_mips.h5
-rw-r--r--compiler/optimizing/nodes.cc28
-rw-r--r--compiler/optimizing/nodes.h26
-rw-r--r--compiler/optimizing/nodes_arm64.h1
-rw-r--r--compiler/optimizing/optimizing_compiler.cc38
-rw-r--r--compiler/optimizing/parallel_move_resolver.cc5
-rw-r--r--compiler/optimizing/prepare_for_register_allocation.cc2
-rw-r--r--compiler/optimizing/register_allocator.cc6
-rw-r--r--compiler/optimizing/stack_map_stream.cc2
-rw-r--r--compiler/optimizing/stack_map_test.cc16
-rw-r--r--compiler/profile_assistant.cc69
-rw-r--r--compiler/profile_assistant.h61
-rw-r--r--compiler/utils/arm/assembler_arm.h2
-rw-r--r--compiler/utils/arm/assembler_thumb2.cc32
-rw-r--r--compiler/utils/assembler_thumb_test.cc70
-rw-r--r--compiler/utils/assembler_thumb_test_expected.cc.inc197
-rw-r--r--compiler/utils/mips/assembler_mips.cc46
-rw-r--r--compiler/utils/mips/assembler_mips.h8
-rw-r--r--compiler/utils/mips/assembler_mips_test.cc24
-rw-r--r--compiler/utils/mips64/assembler_mips64.cc16
-rw-r--r--compiler/utils/mips64/assembler_mips64.h4
-rw-r--r--compiler/utils/mips64/assembler_mips64_test.cc16
-rw-r--r--compiler/utils/swap_space.cc45
-rw-r--r--compiler/utils/swap_space.h82
65 files changed, 2586 insertions, 1033 deletions
diff --git a/compiler/Android.mk b/compiler/Android.mk
index f0bf4997c6..458973684e 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -108,7 +108,8 @@ LIBART_COMPILER_SRC_FILES := \
elf_writer_debug.cc \
elf_writer_quick.cc \
image_writer.cc \
- oat_writer.cc
+ oat_writer.cc \
+ profile_assistant.cc
LIBART_COMPILER_SRC_FILES_arm := \
dex/quick/arm/assemble_arm.cc \
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc
index 278c49017e..b5fd1e074f 100644
--- a/compiler/common_compiler_test.cc
+++ b/compiler/common_compiler_test.cc
@@ -208,8 +208,8 @@ void CommonCompilerTest::CreateCompilerDriver(Compiler::Kind kind, InstructionSe
false,
timer_.get(),
-1,
- /* profile_file */ "",
- /* dex_to_oat_map */ nullptr));
+ /* dex_to_oat_map */ nullptr,
+ /* profile_compilation_info */ nullptr));
// We typically don't generate an image in unit tests, disable this optimization by default.
compiler_driver_->SetSupportBootImageFixup(false);
}
diff --git a/compiler/dex/quick/quick_cfi_test.cc b/compiler/dex/quick/quick_cfi_test.cc
index bcf20c7efa..12568a4ad4 100644
--- a/compiler/dex/quick/quick_cfi_test.cc
+++ b/compiler/dex/quick/quick_cfi_test.cc
@@ -92,7 +92,7 @@ class QuickCFITest : public CFITest {
false,
0,
-1,
- "",
+ nullptr,
nullptr);
ClassLinker* linker = nullptr;
CompilationUnit cu(&pool, isa, &driver, linker);
diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc
index 3260a7a050..ebc9a2c9ea 100644
--- a/compiler/dex/quick/quick_compiler.cc
+++ b/compiler/dex/quick/quick_compiler.cc
@@ -520,7 +520,12 @@ static bool CanCompileShorty(const char* shorty, InstructionSet instruction_set)
// In the rare cases we compile experimental opcodes, the runtime has an option to enable it,
// which will force scanning for any unsupported opcodes.
static bool SkipScanningUnsupportedOpcodes(InstructionSet instruction_set) {
- if (UNLIKELY(kUnsupportedOpcodesSize[instruction_set] == 0U)) {
+ Runtime* runtime = Runtime::Current();
+ if (UNLIKELY(runtime->AreExperimentalFlagsEnabled(ExperimentalFlags::kDefaultMethods))) {
+ // Always need to scan opcodes if we have default methods since invoke-super for interface
+ // methods is never going to be supported in the quick compiler.
+ return false;
+ } else if (UNLIKELY(kUnsupportedOpcodesSize[instruction_set] == 0U)) {
// All opcodes are supported no matter what. Usually not the case
// since experimental opcodes are not implemented in the quick compiler.
return true;
@@ -538,8 +543,28 @@ static bool SkipScanningUnsupportedOpcodes(InstructionSet instruction_set) {
}
}
+bool QuickCompiler::CanCompileInstruction(const MIR* mir,
+ const DexFile& dex_file) const {
+ switch (mir->dalvikInsn.opcode) {
+ // Quick compiler won't support new instruction semantics to invoke-super into an interface
+ // method
+ case Instruction::INVOKE_SUPER: // Fall-through
+ case Instruction::INVOKE_SUPER_RANGE: {
+ DCHECK(mir->dalvikInsn.IsInvoke());
+ uint32_t invoke_method_idx = mir->dalvikInsn.vB;
+ const DexFile::MethodId& method_id = dex_file.GetMethodId(invoke_method_idx);
+ const DexFile::ClassDef* class_def = dex_file.FindClassDef(method_id.class_idx_);
+ // False if we are an interface i.e. !(java_access_flags & kAccInterface)
+ return class_def != nullptr && ((class_def->GetJavaAccessFlags() & kAccInterface) == 0);
+ }
+ default:
+ return true;
+ }
+}
+
// Skip the method that we do not support currently.
-bool QuickCompiler::CanCompileMethod(uint32_t method_idx, const DexFile& dex_file,
+bool QuickCompiler::CanCompileMethod(uint32_t method_idx,
+ const DexFile& dex_file,
CompilationUnit* cu) const {
// This is a limitation in mir_graph. See MirGraph::SetNumSSARegs.
if (cu->mir_graph->GetNumOfCodeAndTempVRs() > kMaxAllowedDalvikRegisters) {
@@ -580,6 +605,9 @@ bool QuickCompiler::CanCompileMethod(uint32_t method_idx, const DexFile& dex_fil
<< MIRGraph::extended_mir_op_names_[opcode - kMirOpFirst];
}
return false;
+ } else if (!CanCompileInstruction(mir, dex_file)) {
+ VLOG(compiler) << "Cannot compile dalvik opcode : " << mir->dalvikInsn.opcode;
+ return false;
}
// Check if it invokes a prototype that we cannot support.
if (std::find(kInvokeOpcodes, kInvokeOpcodes + arraysize(kInvokeOpcodes), opcode)
diff --git a/compiler/dex/quick/quick_compiler.h b/compiler/dex/quick/quick_compiler.h
index d512b256cd..55f45f1ab0 100644
--- a/compiler/dex/quick/quick_compiler.h
+++ b/compiler/dex/quick/quick_compiler.h
@@ -18,6 +18,7 @@
#define ART_COMPILER_DEX_QUICK_QUICK_COMPILER_H_
#include "compiler.h"
+#include "dex/mir_graph.h"
namespace art {
@@ -74,6 +75,8 @@ class QuickCompiler : public Compiler {
explicit QuickCompiler(CompilerDriver* driver);
private:
+ bool CanCompileInstruction(const MIR* mir, const DexFile& dex_file) const;
+
std::unique_ptr<PassManager> pre_opt_pass_manager_;
std::unique_ptr<PassManager> post_opt_pass_manager_;
DISALLOW_COPY_AND_ASSIGN(QuickCompiler);
diff --git a/compiler/dex/quick/x86/quick_assemble_x86_test.cc b/compiler/dex/quick/x86/quick_assemble_x86_test.cc
index 9deabc02e9..b39fe4da4f 100644
--- a/compiler/dex/quick/x86/quick_assemble_x86_test.cc
+++ b/compiler/dex/quick/x86/quick_assemble_x86_test.cc
@@ -73,7 +73,7 @@ class QuickAssembleX86TestBase : public testing::Test {
false,
0,
-1,
- "",
+ nullptr,
nullptr));
cu_.reset(new CompilationUnit(pool_.get(), isa_, compiler_driver_.get(), nullptr));
DexFile::CodeItem* code_item = static_cast<DexFile::CodeItem*>(
diff --git a/compiler/driver/compiled_method_storage_test.cc b/compiler/driver/compiled_method_storage_test.cc
index 84fb4324b5..f18fa67ea5 100644
--- a/compiler/driver/compiled_method_storage_test.cc
+++ b/compiler/driver/compiled_method_storage_test.cc
@@ -45,7 +45,7 @@ TEST(CompiledMethodStorage, Deduplicate) {
false,
nullptr,
-1,
- "",
+ nullptr,
nullptr);
CompiledMethodStorage* storage = driver.GetCompiledMethodStorage();
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index afb4b71ccf..043bd93bd7 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -347,8 +347,8 @@ CompilerDriver::CompilerDriver(
size_t thread_count, bool dump_stats, bool dump_passes,
const std::string& dump_cfg_file_name, bool dump_cfg_append,
CumulativeLogger* timer, int swap_fd,
- const std::string& profile_file,
- const std::unordered_map<const DexFile*, const char*>* dex_to_oat_map)
+ const std::unordered_map<const DexFile*, const char*>* dex_to_oat_map,
+ const ProfileCompilationInfo* profile_compilation_info)
: compiler_options_(compiler_options),
verification_results_(verification_results),
method_inliner_map_(method_inliner_map),
@@ -377,7 +377,8 @@ CompilerDriver::CompilerDriver(
support_boot_image_fixup_(instruction_set != kMips && instruction_set != kMips64),
dex_files_for_oat_file_(nullptr),
dex_file_oat_filename_map_(dex_to_oat_map),
- compiled_method_storage_(swap_fd) {
+ compiled_method_storage_(swap_fd),
+ profile_compilation_info_(profile_compilation_info) {
DCHECK(compiler_options_ != nullptr);
DCHECK(verification_results_ != nullptr);
DCHECK(method_inliner_map_ != nullptr);
@@ -385,12 +386,6 @@ CompilerDriver::CompilerDriver(
compiler_->Init();
CHECK_EQ(boot_image_, image_classes_.get() != nullptr);
-
- // Read the profile file if one is provided.
- if (!profile_file.empty()) {
- profile_compilation_info_.reset(new ProfileCompilationInfo(profile_file));
- LOG(INFO) << "Using profile data from file " << profile_file;
- }
}
CompilerDriver::~CompilerDriver() {
@@ -2306,15 +2301,11 @@ void CompilerDriver::InitializeClasses(jobject class_loader,
void CompilerDriver::Compile(jobject class_loader, const std::vector<const DexFile*>& dex_files,
ThreadPool* thread_pool, TimingLogger* timings) {
- if (profile_compilation_info_ != nullptr) {
- if (!profile_compilation_info_->Load(dex_files)) {
- LOG(WARNING) << "Failed to load offline profile info from "
- << profile_compilation_info_->GetFilename()
- << ". No methods will be compiled";
- } else if (kDebugProfileGuidedCompilation) {
- LOG(INFO) << "[ProfileGuidedCompilation] "
- << profile_compilation_info_->DumpInfo();
- }
+ if (kDebugProfileGuidedCompilation) {
+ LOG(INFO) << "[ProfileGuidedCompilation] " <<
+ ((profile_compilation_info_ == nullptr)
+ ? "null"
+ : profile_compilation_info_->DumpInfo(&dex_files));
}
for (size_t i = 0; i != dex_files.size(); ++i) {
const DexFile* dex_file = dex_files[i];
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index fa0cb9a412..3847c8183e 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -97,8 +97,8 @@ class CompilerDriver {
size_t thread_count, bool dump_stats, bool dump_passes,
const std::string& dump_cfg_file_name, bool dump_cfg_append,
CumulativeLogger* timer, int swap_fd,
- const std::string& profile_file,
- const std::unordered_map<const DexFile*, const char*>* dex_to_oat_map);
+ const std::unordered_map<const DexFile*, const char*>* dex_to_oat_map,
+ const ProfileCompilationInfo* profile_compilation_info);
~CompilerDriver();
@@ -657,9 +657,6 @@ class CompilerDriver {
// This option may be restricted to the boot image, depending on a flag in the implementation.
std::unique_ptr<std::unordered_set<std::string>> methods_to_compile_;
- // Info for profile guided compilation.
- std::unique_ptr<ProfileCompilationInfo> profile_compilation_info_;
-
bool had_hard_verifier_failure_;
size_t thread_count_;
@@ -689,6 +686,9 @@ class CompilerDriver {
CompiledMethodStorage compiled_method_storage_;
+ // Info for profile guided compilation.
+ const ProfileCompilationInfo* const profile_compilation_info_;
+
friend class CompileClassVisitor;
DISALLOW_COPY_AND_ASSIGN(CompilerDriver);
};
diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc
index 209bb5a3c2..385f34a9f9 100644
--- a/compiler/driver/compiler_options.cc
+++ b/compiler/driver/compiler_options.cc
@@ -211,11 +211,9 @@ bool CompilerOptions::ParseCompilerOption(const StringPiece& option, UsageFn Usa
generate_debug_info_ = false;
} else if (option == "--debuggable") {
debuggable_ = true;
- generate_debug_info_ = true;
} else if (option == "--native-debuggable") {
native_debuggable_ = true;
debuggable_ = true;
- generate_debug_info_ = true;
} else if (option.starts_with("--top-k-profile-threshold=")) {
ParseDouble(option.data(), '=', 0.0, 100.0, &top_k_profile_threshold_, Usage);
} else if (option == "--include-patch-information") {
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index f8032bb514..f14bdc4a2f 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -50,7 +50,7 @@ class CompilerOptions FINAL {
static const size_t kDefaultNumDexMethodsThreshold = 900;
static constexpr double kDefaultTopKProfileThreshold = 90.0;
static const bool kDefaultNativeDebuggable = false;
- static const bool kDefaultGenerateDebugInfo = kIsDebugBuild;
+ static const bool kDefaultGenerateDebugInfo = false;
static const bool kDefaultIncludePatchInformation = false;
static const size_t kDefaultInlineDepthLimit = 3;
static const size_t kDefaultInlineMaxCodeUnits = 32;
diff --git a/compiler/dwarf/method_debug_info.h b/compiler/dwarf/method_debug_info.h
index a391e4d08a..e8ba9148e8 100644
--- a/compiler/dwarf/method_debug_info.h
+++ b/compiler/dwarf/method_debug_info.h
@@ -30,8 +30,8 @@ struct MethodDebugInfo {
uint32_t access_flags_;
const DexFile::CodeItem* code_item_;
bool deduped_;
- uint32_t low_pc_;
- uint32_t high_pc_;
+ uintptr_t low_pc_;
+ uintptr_t high_pc_;
CompiledMethod* compiled_method_;
};
diff --git a/compiler/elf_builder.h b/compiler/elf_builder.h
index bb07cc2913..a7461a5525 100644
--- a/compiler/elf_builder.h
+++ b/compiler/elf_builder.h
@@ -148,6 +148,12 @@ class ElfBuilder FINAL {
}
}
+ // Returns true if the section was written to disk.
+ // (Used to check whether we have .text when writing JIT debug info)
+ bool Exists() const {
+ return finished_;
+ }
+
// Get the location of this section in virtual memory.
Elf_Addr GetAddress() const {
CHECK(started_);
@@ -247,16 +253,18 @@ class ElfBuilder FINAL {
}
// Buffer symbol for this section. It will be written later.
+ // If the symbol's section is null, it will be considered absolute (SHN_ABS).
+ // (we use this in JIT to reference code which is stored outside the debug ELF file)
void Add(Elf_Word name, const Section* section,
Elf_Addr addr, bool is_relative, Elf_Word size,
uint8_t binding, uint8_t type, uint8_t other = 0) {
- CHECK(section != nullptr);
Elf_Sym sym = Elf_Sym();
sym.st_name = name;
sym.st_value = addr + (is_relative ? section->GetAddress() : 0);
sym.st_size = size;
sym.st_other = other;
- sym.st_shndx = section->GetSectionIndex();
+ sym.st_shndx = (section != nullptr ? section->GetSectionIndex()
+ : static_cast<Elf_Word>(SHN_ABS));
sym.st_info = (binding << 4) + (type & 0xf);
symbols_.push_back(sym);
}
diff --git a/compiler/elf_writer_debug.cc b/compiler/elf_writer_debug.cc
index 2bc8c89f73..dd50f69b71 100644
--- a/compiler/elf_writer_debug.cc
+++ b/compiler/elf_writer_debug.cc
@@ -22,16 +22,20 @@
#include "base/casts.h"
#include "base/stl_util.h"
#include "compiled_method.h"
-#include "driver/compiler_driver.h"
#include "dex_file-inl.h"
+#include "driver/compiler_driver.h"
#include "dwarf/dedup_vector.h"
#include "dwarf/headers.h"
#include "dwarf/method_debug_info.h"
#include "dwarf/register.h"
#include "elf_builder.h"
+#include "linker/vector_output_stream.h"
+#include "mirror/array.h"
+#include "mirror/class-inl.h"
+#include "mirror/class.h"
#include "oat_writer.h"
-#include "utils.h"
#include "stack_map.h"
+#include "utils.h"
namespace art {
namespace dwarf {
@@ -219,6 +223,10 @@ void WriteCFISection(ElfBuilder<ElfTypes>* builder,
CHECK(format == DW_DEBUG_FRAME_FORMAT || format == DW_EH_FRAME_FORMAT);
typedef typename ElfTypes::Addr Elf_Addr;
+ if (method_infos.empty()) {
+ return;
+ }
+
std::vector<uint32_t> binary_search_table;
std::vector<uintptr_t> patch_locations;
if (format == DW_EH_FRAME_FORMAT) {
@@ -234,7 +242,9 @@ void WriteCFISection(ElfBuilder<ElfTypes>* builder,
{
cfi_section->Start();
const bool is64bit = Is64BitInstructionSet(builder->GetIsa());
- const Elf_Addr text_address = builder->GetText()->GetAddress();
+ const Elf_Addr text_address = builder->GetText()->Exists()
+ ? builder->GetText()->GetAddress()
+ : 0;
const Elf_Addr cfi_address = cfi_section->GetAddress();
const Elf_Addr cie_address = cfi_address;
Elf_Addr buffer_address = cfi_address;
@@ -305,8 +315,8 @@ namespace {
struct CompilationUnit {
std::vector<const MethodDebugInfo*> methods_;
size_t debug_line_offset_ = 0;
- uint32_t low_pc_ = 0xFFFFFFFFU;
- uint32_t high_pc_ = 0;
+ uintptr_t low_pc_ = std::numeric_limits<uintptr_t>::max();
+ uintptr_t high_pc_ = 0;
};
typedef std::vector<DexFile::LocalInfo> LocalInfos;
@@ -439,14 +449,17 @@ class DebugInfoWriter {
void Write(const CompilationUnit& compilation_unit) {
CHECK(!compilation_unit.methods_.empty());
- const Elf_Addr text_address = owner_->builder_->GetText()->GetAddress();
+ const Elf_Addr text_address = owner_->builder_->GetText()->Exists()
+ ? owner_->builder_->GetText()->GetAddress()
+ : 0;
+ const uintptr_t cu_size = compilation_unit.high_pc_ - compilation_unit.low_pc_;
info_.StartTag(DW_TAG_compile_unit);
info_.WriteStrp(DW_AT_producer, owner_->WriteString("Android dex2oat"));
info_.WriteData1(DW_AT_language, DW_LANG_Java);
info_.WriteStrp(DW_AT_comp_dir, owner_->WriteString("$JAVA_SRC_ROOT"));
info_.WriteAddr(DW_AT_low_pc, text_address + compilation_unit.low_pc_);
- info_.WriteUdata(DW_AT_high_pc, compilation_unit.high_pc_ - compilation_unit.low_pc_);
+ info_.WriteUdata(DW_AT_high_pc, dchecked_integral_cast<uint32_t>(cu_size));
info_.WriteSecOffset(DW_AT_stmt_list, compilation_unit.debug_line_offset_);
const char* last_dex_class_desc = nullptr;
@@ -464,8 +477,16 @@ class DebugInfoWriter {
if (last_dex_class_desc != nullptr) {
EndClassTag(last_dex_class_desc);
}
- size_t offset = StartClassTag(dex_class_desc);
- type_cache_.emplace(dex_class_desc, offset);
+ // Write reference tag for the class we are about to declare.
+ size_t reference_tag_offset = info_.StartTag(DW_TAG_reference_type);
+ type_cache_.emplace(std::string(dex_class_desc), reference_tag_offset);
+ size_t type_attrib_offset = info_.size();
+ info_.WriteRef4(DW_AT_type, 0);
+ info_.EndTag();
+ // Declare the class that owns this method.
+ size_t class_offset = StartClassTag(dex_class_desc);
+ info_.UpdateUint32(type_attrib_offset, class_offset);
+ info_.WriteFlag(DW_AT_declaration, true);
// Check that each class is defined only once.
bool unique = owner_->defined_dex_classes_.insert(dex_class_desc).second;
CHECK(unique) << "Redefinition of " << dex_class_desc;
@@ -476,7 +497,7 @@ class DebugInfoWriter {
info_.StartTag(DW_TAG_subprogram);
WriteName(dex->GetMethodName(dex_method));
info_.WriteAddr(DW_AT_low_pc, text_address + mi->low_pc_);
- info_.WriteUdata(DW_AT_high_pc, mi->high_pc_ - mi->low_pc_);
+ info_.WriteUdata(DW_AT_high_pc, dchecked_integral_cast<uint32_t>(mi->high_pc_-mi->low_pc_));
uint8_t frame_base[] = { DW_OP_call_frame_cfa };
info_.WriteExprLoc(DW_AT_frame_base, &frame_base, sizeof(frame_base));
WriteLazyType(dex->GetReturnTypeDescriptor(dex_proto));
@@ -562,6 +583,92 @@ class DebugInfoWriter {
owner_->builder_->GetDebugInfo()->WriteFully(buffer.data(), buffer.size());
}
+ void Write(const ArrayRef<mirror::Class*>& types) SHARED_REQUIRES(Locks::mutator_lock_) {
+ info_.StartTag(DW_TAG_compile_unit);
+ info_.WriteStrp(DW_AT_producer, owner_->WriteString("Android dex2oat"));
+ info_.WriteData1(DW_AT_language, DW_LANG_Java);
+
+ for (mirror::Class* type : types) {
+ if (type->IsPrimitive()) {
+ // For primitive types the definition and the declaration is the same.
+ if (type->GetPrimitiveType() != Primitive::kPrimVoid) {
+ WriteTypeDeclaration(type->GetDescriptor(nullptr));
+ }
+ } else if (type->IsArrayClass()) {
+ mirror::Class* element_type = type->GetComponentType();
+ uint32_t component_size = type->GetComponentSize();
+ uint32_t data_offset = mirror::Array::DataOffset(component_size).Uint32Value();
+ uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
+
+ info_.StartTag(DW_TAG_array_type);
+ std::string descriptor_string;
+ WriteLazyType(element_type->GetDescriptor(&descriptor_string));
+ info_.WriteUdata(DW_AT_data_member_location, data_offset);
+ info_.StartTag(DW_TAG_subrange_type);
+ DCHECK_LT(length_offset, 32u);
+ uint8_t count[] = {
+ DW_OP_push_object_address,
+ static_cast<uint8_t>(DW_OP_lit0 + length_offset),
+ DW_OP_plus,
+ DW_OP_deref_size,
+ 4 // Array length is always 32-bit wide.
+ };
+ info_.WriteExprLoc(DW_AT_count, &count, sizeof(count));
+ info_.EndTag(); // DW_TAG_subrange_type.
+ info_.EndTag(); // DW_TAG_array_type.
+ } else {
+ std::string descriptor_string;
+ const char* desc = type->GetDescriptor(&descriptor_string);
+ StartClassTag(desc);
+
+ if (!type->IsVariableSize()) {
+ info_.WriteUdata(DW_AT_byte_size, type->GetObjectSize());
+ }
+
+ // Base class.
+ mirror::Class* base_class = type->GetSuperClass();
+ if (base_class != nullptr) {
+ info_.StartTag(DW_TAG_inheritance);
+ WriteLazyType(base_class->GetDescriptor(&descriptor_string));
+ info_.WriteUdata(DW_AT_data_member_location, 0);
+ info_.WriteSdata(DW_AT_accessibility, DW_ACCESS_public);
+ info_.EndTag(); // DW_TAG_inheritance.
+ }
+
+ // Member variables.
+ for (uint32_t i = 0, count = type->NumInstanceFields(); i < count; ++i) {
+ ArtField* field = type->GetInstanceField(i);
+ info_.StartTag(DW_TAG_member);
+ WriteName(field->GetName());
+ WriteLazyType(field->GetTypeDescriptor());
+ info_.WriteUdata(DW_AT_data_member_location, field->GetOffset().Uint32Value());
+ uint32_t access_flags = field->GetAccessFlags();
+ if (access_flags & kAccPublic) {
+ info_.WriteSdata(DW_AT_accessibility, DW_ACCESS_public);
+ } else if (access_flags & kAccProtected) {
+ info_.WriteSdata(DW_AT_accessibility, DW_ACCESS_protected);
+ } else if (access_flags & kAccPrivate) {
+ info_.WriteSdata(DW_AT_accessibility, DW_ACCESS_private);
+ }
+ info_.EndTag(); // DW_TAG_member.
+ }
+
+ EndClassTag(desc);
+ }
+ }
+
+ CHECK_EQ(info_.Depth(), 1);
+ FinishLazyTypes();
+ info_.EndTag(); // DW_TAG_compile_unit.
+ std::vector<uint8_t> buffer;
+ buffer.reserve(info_.data()->size() + KB);
+ const size_t offset = owner_->builder_->GetDebugInfo()->GetSize();
+ const size_t debug_abbrev_offset =
+ owner_->debug_abbrev_.Insert(debug_abbrev_.data(), debug_abbrev_.size());
+ WriteDebugInfoCU(debug_abbrev_offset, info_, offset, &buffer, &owner_->debug_info_patches_);
+ owner_->builder_->GetDebugInfo()->WriteFully(buffer.data(), buffer.size());
+ }
+
// Write table into .debug_loc which describes location of dex register.
// The dex register might be valid only at some points and it might
// move between machine registers and stack.
@@ -715,14 +822,14 @@ class DebugInfoWriter {
// just define all types lazily at the end of compilation unit.
void WriteLazyType(const char* type_descriptor) {
if (type_descriptor != nullptr && type_descriptor[0] != 'V') {
- lazy_types_.emplace(type_descriptor, info_.size());
+ lazy_types_.emplace(std::string(type_descriptor), info_.size());
info_.WriteRef4(DW_AT_type, 0);
}
}
void FinishLazyTypes() {
for (const auto& lazy_type : lazy_types_) {
- info_.UpdateUint32(lazy_type.second, WriteType(lazy_type.first));
+ info_.UpdateUint32(lazy_type.second, WriteTypeDeclaration(lazy_type.first));
}
lazy_types_.clear();
}
@@ -747,30 +854,39 @@ class DebugInfoWriter {
// Convert dex type descriptor to DWARF.
// Returns offset in the compilation unit.
- size_t WriteType(const char* desc) {
+ size_t WriteTypeDeclaration(const std::string& desc) {
+ DCHECK(!desc.empty());
const auto& it = type_cache_.find(desc);
if (it != type_cache_.end()) {
return it->second;
}
size_t offset;
- if (*desc == 'L') {
+ if (desc[0] == 'L') {
// Class type. For example: Lpackage/name;
- offset = StartClassTag(desc);
+ size_t class_offset = StartClassTag(desc.c_str());
info_.WriteFlag(DW_AT_declaration, true);
- EndClassTag(desc);
- } else if (*desc == '[') {
+ EndClassTag(desc.c_str());
+ // Reference to the class type.
+ offset = info_.StartTag(DW_TAG_reference_type);
+ info_.WriteRef(DW_AT_type, class_offset);
+ info_.EndTag();
+ } else if (desc[0] == '[') {
// Array type.
- size_t element_type = WriteType(desc + 1);
- offset = info_.StartTag(DW_TAG_array_type);
+ size_t element_type = WriteTypeDeclaration(desc.substr(1));
+ size_t array_type = info_.StartTag(DW_TAG_array_type);
+ info_.WriteFlag(DW_AT_declaration, true);
info_.WriteRef(DW_AT_type, element_type);
info_.EndTag();
+ offset = info_.StartTag(DW_TAG_reference_type);
+ info_.WriteRef4(DW_AT_type, array_type);
+ info_.EndTag();
} else {
// Primitive types.
const char* name;
uint32_t encoding;
uint32_t byte_size;
- switch (*desc) {
+ switch (desc[0]) {
case 'B':
name = "byte";
encoding = DW_ATE_signed;
@@ -815,7 +931,7 @@ class DebugInfoWriter {
LOG(FATAL) << "Void type should not be encoded";
UNREACHABLE();
default:
- LOG(FATAL) << "Unknown dex type descriptor: " << desc;
+ LOG(FATAL) << "Unknown dex type descriptor: \"" << desc << "\"";
UNREACHABLE();
}
offset = info_.StartTag(DW_TAG_base_type);
@@ -865,9 +981,10 @@ class DebugInfoWriter {
// Temporary buffer to create and store the entries.
DebugInfoEntryWriter<> info_;
// Cache of already translated type descriptors.
- std::map<const char*, size_t, CStringLess> type_cache_; // type_desc -> definition_offset.
+ std::map<std::string, size_t> type_cache_; // type_desc -> definition_offset.
// 32-bit references which need to be resolved to a type later.
- std::multimap<const char*, size_t, CStringLess> lazy_types_; // type_desc -> patch_offset.
+ // Given type may be used multiple times. Therefore we need a multimap.
+ std::multimap<std::string, size_t> lazy_types_; // type_desc -> patch_offset.
};
public:
@@ -883,6 +1000,11 @@ class DebugInfoWriter {
writer.Write(compilation_unit);
}
+ void WriteTypes(const ArrayRef<mirror::Class*>& types) SHARED_REQUIRES(Locks::mutator_lock_) {
+ CompilationUnitWriter writer(this);
+ writer.Write(types);
+ }
+
void End() {
builder_->GetDebugInfo()->End();
builder_->WritePatches(".debug_info.oat_patches",
@@ -924,7 +1046,9 @@ class DebugLineWriter {
// Returns the number of bytes written.
size_t WriteCompilationUnit(CompilationUnit& compilation_unit) {
const bool is64bit = Is64BitInstructionSet(builder_->GetIsa());
- const Elf_Addr text_address = builder_->GetText()->GetAddress();
+ const Elf_Addr text_address = builder_->GetText()->Exists()
+ ? builder_->GetText()->GetAddress()
+ : 0;
compilation_unit.debug_line_offset_ = builder_->GetDebugLine()->GetSize();
@@ -1102,9 +1226,27 @@ class DebugLineWriter {
std::vector<uintptr_t> debug_line_patches;
};
+// Get all types loaded by the runtime.
+static std::vector<mirror::Class*> GetLoadedRuntimeTypes() SHARED_REQUIRES(Locks::mutator_lock_) {
+ std::vector<mirror::Class*> result;
+ class CollectClasses : public ClassVisitor {
+ public:
+ virtual bool Visit(mirror::Class* klass) {
+ classes_->push_back(klass);
+ return true;
+ }
+ std::vector<mirror::Class*>* classes_;
+ };
+ CollectClasses visitor;
+ visitor.classes_ = &result;
+ Runtime::Current()->GetClassLinker()->VisitClasses(&visitor);
+ return result;
+}
+
template<typename ElfTypes>
-void WriteDebugSections(ElfBuilder<ElfTypes>* builder,
- const ArrayRef<const MethodDebugInfo>& method_infos) {
+static void WriteDebugSections(ElfBuilder<ElfTypes>* builder,
+ bool write_loaded_runtime_types,
+ const ArrayRef<const MethodDebugInfo>& method_infos) {
// Group the methods into compilation units based on source file.
std::vector<CompilationUnit> compilation_units;
const char* last_source_file = nullptr;
@@ -1122,7 +1264,7 @@ void WriteDebugSections(ElfBuilder<ElfTypes>* builder,
}
// Write .debug_line section.
- {
+ if (!compilation_units.empty()) {
DebugLineWriter<ElfTypes> line_writer(builder);
line_writer.Start();
for (auto& compilation_unit : compilation_units) {
@@ -1132,12 +1274,19 @@ void WriteDebugSections(ElfBuilder<ElfTypes>* builder,
}
// Write .debug_info section.
- {
+ if (!compilation_units.empty() || write_loaded_runtime_types) {
DebugInfoWriter<ElfTypes> info_writer(builder);
info_writer.Start();
for (const auto& compilation_unit : compilation_units) {
info_writer.WriteCompilationUnit(compilation_unit);
}
+ if (write_loaded_runtime_types) {
+ Thread* self = Thread::Current();
+ // The lock prevents the classes being moved by the GC.
+ ReaderMutexLock mu(self, *Locks::mutator_lock_);
+ std::vector<mirror::Class*> types = GetLoadedRuntimeTypes();
+ info_writer.WriteTypes(ArrayRef<mirror::Class*>(types.data(), types.size()));
+ }
info_writer.End();
}
}
@@ -1173,11 +1322,13 @@ void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder,
name += " [DEDUPED]";
}
+ const auto* text = builder->GetText()->Exists() ? builder->GetText() : nullptr;
+ const bool is_relative = (text != nullptr);
uint32_t low_pc = info.low_pc_;
// Add in code delta, e.g., thumb bit 0 for Thumb2 code.
low_pc += info.compiled_method_->CodeDelta();
- symtab->Add(strtab->Write(name), builder->GetText(), low_pc,
- true, info.high_pc_ - info.low_pc_, STB_GLOBAL, STT_FUNC);
+ symtab->Add(strtab->Write(name), text, low_pc,
+ is_relative, info.high_pc_ - info.low_pc_, STB_GLOBAL, STT_FUNC);
// Conforming to aaelf, add $t mapping symbol to indicate start of a sequence of thumb2
// instructions, so that disassembler tools can correctly disassemble.
@@ -1185,8 +1336,8 @@ void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder,
// requires it to match function symbol. Just address 0 does not work.
if (info.compiled_method_->GetInstructionSet() == kThumb2) {
if (!generated_mapping_symbol || !kGenerateSingleArmMappingSymbol) {
- symtab->Add(strtab->Write("$t"), builder->GetText(), info.low_pc_ & ~1,
- true, 0, STB_LOCAL, STT_NOTYPE);
+ symtab->Add(strtab->Write("$t"), text, info.low_pc_ & ~1,
+ is_relative, 0, STB_LOCAL, STT_NOTYPE);
generated_mapping_symbol = true;
}
}
@@ -1202,25 +1353,89 @@ void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder,
template <typename ElfTypes>
void WriteDebugInfo(ElfBuilder<ElfTypes>* builder,
+ bool write_loaded_runtime_types,
const ArrayRef<const MethodDebugInfo>& method_infos,
CFIFormat cfi_format) {
- if (!method_infos.empty()) {
- // Add methods to .symtab.
- WriteDebugSymbols(builder, method_infos);
- // Generate CFI (stack unwinding information).
- WriteCFISection(builder, method_infos, cfi_format);
- // Write DWARF .debug_* sections.
- WriteDebugSections(builder, method_infos);
+ // Add methods to .symtab.
+ WriteDebugSymbols(builder, method_infos);
+ // Generate CFI (stack unwinding information).
+ WriteCFISection(builder, method_infos, cfi_format);
+ // Write DWARF .debug_* sections.
+ WriteDebugSections(builder, write_loaded_runtime_types, method_infos);
+}
+
+template <typename ElfTypes>
+static ArrayRef<const uint8_t> WriteDebugElfFileForMethodInternal(
+ const dwarf::MethodDebugInfo& method_info) {
+ const InstructionSet isa = method_info.compiled_method_->GetInstructionSet();
+ std::vector<uint8_t> buffer;
+ buffer.reserve(KB);
+ VectorOutputStream out("Debug ELF file", &buffer);
+ std::unique_ptr<ElfBuilder<ElfTypes>> builder(new ElfBuilder<ElfTypes>(isa, &out));
+ builder->Start();
+ WriteDebugInfo(builder.get(),
+ false,
+ ArrayRef<const MethodDebugInfo>(&method_info, 1),
+ DW_DEBUG_FRAME_FORMAT);
+ builder->End();
+ CHECK(builder->Good());
+ // Make a copy of the buffer. We want to shrink it anyway.
+ uint8_t* result = new uint8_t[buffer.size()];
+ CHECK(result != nullptr);
+ memcpy(result, buffer.data(), buffer.size());
+ return ArrayRef<const uint8_t>(result, buffer.size());
+}
+
+ArrayRef<const uint8_t> WriteDebugElfFileForMethod(const dwarf::MethodDebugInfo& method_info) {
+ const InstructionSet isa = method_info.compiled_method_->GetInstructionSet();
+ if (Is64BitInstructionSet(isa)) {
+ return WriteDebugElfFileForMethodInternal<ElfTypes64>(method_info);
+ } else {
+ return WriteDebugElfFileForMethodInternal<ElfTypes32>(method_info);
+ }
+}
+
+template <typename ElfTypes>
+static ArrayRef<const uint8_t> WriteDebugElfFileForClassInternal(const InstructionSet isa,
+ mirror::Class* type)
+ SHARED_REQUIRES(Locks::mutator_lock_) {
+ std::vector<uint8_t> buffer;
+ buffer.reserve(KB);
+ VectorOutputStream out("Debug ELF file", &buffer);
+ std::unique_ptr<ElfBuilder<ElfTypes>> builder(new ElfBuilder<ElfTypes>(isa, &out));
+ builder->Start();
+
+ DebugInfoWriter<ElfTypes> info_writer(builder.get());
+ info_writer.Start();
+ info_writer.WriteTypes(ArrayRef<mirror::Class*>(&type, 1));
+ info_writer.End();
+
+ builder->End();
+ CHECK(builder->Good());
+ // Make a copy of the buffer. We want to shrink it anyway.
+ uint8_t* result = new uint8_t[buffer.size()];
+ CHECK(result != nullptr);
+ memcpy(result, buffer.data(), buffer.size());
+ return ArrayRef<const uint8_t>(result, buffer.size());
+}
+
+ArrayRef<const uint8_t> WriteDebugElfFileForClass(const InstructionSet isa, mirror::Class* type) {
+ if (Is64BitInstructionSet(isa)) {
+ return WriteDebugElfFileForClassInternal<ElfTypes64>(isa, type);
+ } else {
+ return WriteDebugElfFileForClassInternal<ElfTypes32>(isa, type);
}
}
// Explicit instantiations
template void WriteDebugInfo<ElfTypes32>(
ElfBuilder<ElfTypes32>* builder,
+ bool write_loaded_runtime_types,
const ArrayRef<const MethodDebugInfo>& method_infos,
CFIFormat cfi_format);
template void WriteDebugInfo<ElfTypes64>(
ElfBuilder<ElfTypes64>* builder,
+ bool write_loaded_runtime_types,
const ArrayRef<const MethodDebugInfo>& method_infos,
CFIFormat cfi_format);
diff --git a/compiler/elf_writer_debug.h b/compiler/elf_writer_debug.h
index 7ec0be185a..91da00f97a 100644
--- a/compiler/elf_writer_debug.h
+++ b/compiler/elf_writer_debug.h
@@ -17,19 +17,30 @@
#ifndef ART_COMPILER_ELF_WRITER_DEBUG_H_
#define ART_COMPILER_ELF_WRITER_DEBUG_H_
-#include "elf_builder.h"
+#include "base/macros.h"
+#include "base/mutex.h"
#include "dwarf/dwarf_constants.h"
-#include "oat_writer.h"
+#include "elf_builder.h"
#include "utils/array_ref.h"
namespace art {
+namespace mirror {
+class Class;
+}
namespace dwarf {
+struct MethodDebugInfo;
template <typename ElfTypes>
void WriteDebugInfo(ElfBuilder<ElfTypes>* builder,
+ bool write_loaded_runtime_types,
const ArrayRef<const MethodDebugInfo>& method_infos,
CFIFormat cfi_format);
+ArrayRef<const uint8_t> WriteDebugElfFileForMethod(const dwarf::MethodDebugInfo& method_info);
+
+ArrayRef<const uint8_t> WriteDebugElfFileForClass(const InstructionSet isa, mirror::Class* type)
+ SHARED_REQUIRES(Locks::mutator_lock_);
+
} // namespace dwarf
} // namespace art
diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc
index 7b1bdd72e5..a67f3bd1a9 100644
--- a/compiler/elf_writer_quick.cc
+++ b/compiler/elf_writer_quick.cc
@@ -152,7 +152,7 @@ template <typename ElfTypes>
void ElfWriterQuick<ElfTypes>::WriteDebugInfo(
const ArrayRef<const dwarf::MethodDebugInfo>& method_infos) {
if (compiler_options_->GetGenerateDebugInfo()) {
- dwarf::WriteDebugInfo(builder_.get(), method_infos, kCFIFormat);
+ dwarf::WriteDebugInfo(builder_.get(), /* write_types */ true, method_infos, kCFIFormat);
}
}
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 17d0f61a34..d0bb201d69 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -76,23 +76,35 @@ static constexpr bool kBinObjects = true;
// Return true if an object is already in an image space.
bool ImageWriter::IsInBootImage(const void* obj) const {
+ gc::Heap* const heap = Runtime::Current()->GetHeap();
if (!compile_app_image_) {
- DCHECK(boot_image_space_ == nullptr);
+ DCHECK(heap->GetBootImageSpaces().empty());
return false;
}
- const uint8_t* image_begin = boot_image_space_->Begin();
- // Real image end including ArtMethods and ArtField sections.
- const uint8_t* image_end = image_begin + boot_image_space_->GetImageHeader().GetImageSize();
- return image_begin <= obj && obj < image_end;
+ for (gc::space::ImageSpace* boot_image_space : heap->GetBootImageSpaces()) {
+ const uint8_t* image_begin = boot_image_space->Begin();
+ // Real image end including ArtMethods and ArtField sections.
+ const uint8_t* image_end = image_begin + boot_image_space->GetImageHeader().GetImageSize();
+ if (image_begin <= obj && obj < image_end) {
+ return true;
+ }
+ }
+ return false;
}
bool ImageWriter::IsInBootOatFile(const void* ptr) const {
+ gc::Heap* const heap = Runtime::Current()->GetHeap();
if (!compile_app_image_) {
- DCHECK(boot_image_space_ == nullptr);
+ DCHECK(heap->GetBootImageSpaces().empty());
return false;
}
- const ImageHeader& image_header = boot_image_space_->GetImageHeader();
- return image_header.GetOatFileBegin() <= ptr && ptr < image_header.GetOatFileEnd();
+ for (gc::space::ImageSpace* boot_image_space : heap->GetBootImageSpaces()) {
+ const ImageHeader& image_header = boot_image_space->GetImageHeader();
+ if (image_header.GetOatFileBegin() <= ptr && ptr < image_header.GetOatFileEnd()) {
+ return true;
+ }
+ }
+ return false;
}
static void CheckNoDexObjectsCallback(Object* obj, void* arg ATTRIBUTE_UNUSED)
@@ -109,14 +121,6 @@ static void CheckNoDexObjects() {
bool ImageWriter::PrepareImageAddressSpace() {
target_ptr_size_ = InstructionSetPointerSize(compiler_driver_.GetInstructionSet());
gc::Heap* const heap = Runtime::Current()->GetHeap();
- // Cache boot image space.
- for (gc::space::ContinuousSpace* space : heap->GetContinuousSpaces()) {
- if (space->IsImageSpace()) {
- CHECK(compile_app_image_);
- CHECK(boot_image_space_ == nullptr) << "Multiple image spaces";
- boot_image_space_ = space->AsImageSpace();
- }
- }
{
ScopedObjectAccess soa(Thread::Current());
PruneNonImageClasses(); // Remove junk
@@ -205,9 +209,6 @@ bool ImageWriter::Write(int image_fd,
oat_header.GetQuickResolutionTrampolineOffset();
image_info.oat_address_offsets_[kOatAddressQuickToInterpreterBridge] =
oat_header.GetQuickToInterpreterBridgeOffset();
- } else {
- // Other oat files use the primary trampolines.
- // TODO: Dummy values to protect usage? b/26317072
}
@@ -635,11 +636,11 @@ ImageWriter::BinSlot ImageWriter::GetImageBinSlot(mirror::Object* object) const
bool ImageWriter::AllocMemory() {
for (const char* oat_filename : oat_filenames_) {
ImageInfo& image_info = GetImageInfo(oat_filename);
- const size_t length = RoundUp(image_objects_offset_begin_ +
- GetBinSizeSum(image_info) +
- intern_table_bytes_ +
- class_table_bytes_,
- kPageSize);
+ ImageSection unused_sections[ImageHeader::kSectionCount];
+ const size_t length = RoundUp(
+ image_info.CreateImageSections(target_ptr_size_, unused_sections),
+ kPageSize);
+
std::string error_msg;
image_info.image_.reset(MemMap::MapAnonymous("image writer image",
nullptr,
@@ -909,14 +910,17 @@ void ImageWriter::CalculateObjectBinSlots(Object* obj) {
DCHECK(obj != nullptr);
// if it is a string, we want to intern it if its not interned.
if (obj->GetClass()->IsStringClass()) {
+ const char* oat_filename = GetOatFilename(obj);
+ ImageInfo& image_info = GetImageInfo(oat_filename);
+
// we must be an interned string that was forward referenced and already assigned
if (IsImageBinSlotAssigned(obj)) {
- DCHECK_EQ(obj, obj->AsString()->Intern());
+ DCHECK_EQ(obj, image_info.intern_table_->InternStrongImageString(obj->AsString()));
return;
}
// InternImageString allows us to intern while holding the heap bitmap lock. This is safe since
// we are guaranteed to not have GC during image writing.
- mirror::String* const interned = Runtime::Current()->GetInternTable()->InternStrongImageString(
+ mirror::String* const interned = image_info.intern_table_->InternStrongImageString(
obj->AsString());
if (obj != interned) {
if (!IsImageBinSlotAssigned(interned)) {
@@ -1067,6 +1071,13 @@ void ImageWriter::WalkFieldsInOrder(mirror::Object* obj) {
};
const char* oat_file = GetOatFilenameForDexCache(dex_cache);
ImageInfo& image_info = GetImageInfo(oat_file);
+ {
+ // Note: This table is only accessed from the image writer, so the lock is technically
+ // unnecessary.
+ WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
+ // Insert in the class table for this iamge.
+ image_info.class_table_->Insert(as_klass);
+ }
for (LengthPrefixedArray<ArtField>* cur_fields : fields) {
// Total array length including header.
if (cur_fields != nullptr) {
@@ -1249,6 +1260,18 @@ void ImageWriter::CalculateNewObjectOffsets() {
// Calculate size of the dex cache arrays slot and prepare offsets.
PrepareDexCacheArraySlots();
+ // Calculate the sizes of the intern tables and class tables.
+ for (const char* oat_filename : oat_filenames_) {
+ ImageInfo& image_info = GetImageInfo(oat_filename);
+ // Calculate how big the intern table will be after being serialized.
+ InternTable* const intern_table = image_info.intern_table_.get();
+ CHECK_EQ(intern_table->WeakSize(), 0u) << " should have strong interned all the strings";
+ image_info.intern_table_bytes_ = intern_table->WriteToMemory(nullptr);
+ // Calculate the size of the class table.
+ ReaderMutexLock mu(self, *Locks::classlinker_classes_lock_);
+ image_info.class_table_bytes_ += image_info.class_table_->WriteToMemory(nullptr);
+ }
+
// Calculate bin slot offsets.
for (const char* oat_filename : oat_filenames_) {
ImageInfo& image_info = GetImageInfo(oat_filename);
@@ -1275,18 +1298,11 @@ void ImageWriter::CalculateNewObjectOffsets() {
ImageInfo& image_info = GetImageInfo(oat_filename);
image_info.image_begin_ = global_image_begin_ + image_offset;
image_info.image_offset_ = image_offset;
- size_t native_sections_size = image_info.bin_slot_sizes_[kBinArtField] +
- image_info.bin_slot_sizes_[kBinArtMethodDirty] +
- image_info.bin_slot_sizes_[kBinArtMethodClean] +
- image_info.bin_slot_sizes_[kBinDexCacheArray] +
- intern_table_bytes_ +
- class_table_bytes_;
- size_t image_objects = RoundUp(image_info.image_end_, kPageSize);
- size_t bitmap_size =
- RoundUp(gc::accounting::ContinuousSpaceBitmap::ComputeBitmapSize(image_objects), kPageSize);
- size_t heap_size = gc::accounting::ContinuousSpaceBitmap::ComputeHeapSize(bitmap_size);
- size_t max = std::max(heap_size, image_info.image_end_ + native_sections_size + bitmap_size);
- image_info.image_size_ = RoundUp(max, kPageSize);
+ ImageSection unused_sections[ImageHeader::kSectionCount];
+ image_info.image_size_ = RoundUp(
+ image_info.CreateImageSections(target_ptr_size_, unused_sections),
+ kPageSize);
+ // There should be no gaps until the next image.
image_offset += image_info.image_size_;
}
@@ -1310,89 +1326,69 @@ void ImageWriter::CalculateNewObjectOffsets() {
relocation.offset += image_info.bin_slot_offsets_[bin_type];
}
- /* TODO: Reenable the intern table and class table. b/26317072
- // Calculate how big the intern table will be after being serialized.
- InternTable* const intern_table = runtime->GetInternTable();
- CHECK_EQ(intern_table->WeakSize(), 0u) << " should have strong interned all the strings";
- intern_table_bytes_ = intern_table->WriteToMemory(nullptr);
-
- // Write out the class table.
- ClassLinker* class_linker = runtime->GetClassLinker();
- if (boot_image_space_ == nullptr) {
- // Compiling the boot image, add null class loader.
- class_loaders_.insert(nullptr);
- }
- // class_loaders_ usually will not be empty, but may be empty if we attempt to create an image
- // with no classes.
- if (class_loaders_.size() == 1u) {
- // Only write the class table if we have exactly one class loader. There may be cases where
- // there are multiple class loaders if a class path is passed to dex2oat.
- ReaderMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
- for (mirror::ClassLoader* loader : class_loaders_) {
- ClassTable* table = class_linker->ClassTableForClassLoader(loader);
- CHECK(table != nullptr);
- class_table_bytes_ += table->WriteToMemory(nullptr);
- }
- }
- */
-
// Note that image_info.image_end_ is left at end of used mirror object section.
}
-void ImageWriter::CreateHeader(size_t oat_loaded_size, size_t oat_data_offset) {
- CHECK_NE(0U, oat_loaded_size);
- const char* oat_filename = oat_file_->GetLocation().c_str();
- ImageInfo& image_info = GetImageInfo(oat_filename);
- const uint8_t* oat_file_begin = GetOatFileBegin(oat_filename);
- const uint8_t* oat_file_end = oat_file_begin + oat_loaded_size;
- image_info.oat_data_begin_ = const_cast<uint8_t*>(oat_file_begin) + oat_data_offset;
- const uint8_t* oat_data_end = image_info.oat_data_begin_ + oat_file_->Size();
- image_info.oat_size_ = oat_file_->Size();
-
- // Create the image sections.
- ImageSection sections[ImageHeader::kSectionCount];
+size_t ImageWriter::ImageInfo::CreateImageSections(size_t target_ptr_size,
+ ImageSection* out_sections) const {
+ DCHECK(out_sections != nullptr);
// Objects section
- auto* objects_section = &sections[ImageHeader::kSectionObjects];
- *objects_section = ImageSection(0u, image_info.image_end_);
+ auto* objects_section = &out_sections[ImageHeader::kSectionObjects];
+ *objects_section = ImageSection(0u, image_end_);
size_t cur_pos = objects_section->End();
// Add field section.
- auto* field_section = &sections[ImageHeader::kSectionArtFields];
- *field_section = ImageSection(cur_pos, image_info.bin_slot_sizes_[kBinArtField]);
- CHECK_EQ(image_info.bin_slot_offsets_[kBinArtField], field_section->Offset());
+ auto* field_section = &out_sections[ImageHeader::kSectionArtFields];
+ *field_section = ImageSection(cur_pos, bin_slot_sizes_[kBinArtField]);
+ CHECK_EQ(bin_slot_offsets_[kBinArtField], field_section->Offset());
cur_pos = field_section->End();
// Round up to the alignment the required by the method section.
- cur_pos = RoundUp(cur_pos, ArtMethod::Alignment(target_ptr_size_));
+ cur_pos = RoundUp(cur_pos, ArtMethod::Alignment(target_ptr_size));
// Add method section.
- auto* methods_section = &sections[ImageHeader::kSectionArtMethods];
+ auto* methods_section = &out_sections[ImageHeader::kSectionArtMethods];
*methods_section = ImageSection(cur_pos,
- image_info.bin_slot_sizes_[kBinArtMethodClean] +
- image_info.bin_slot_sizes_[kBinArtMethodDirty]);
- CHECK_EQ(image_info.bin_slot_offsets_[kBinArtMethodClean], methods_section->Offset());
+ bin_slot_sizes_[kBinArtMethodClean] +
+ bin_slot_sizes_[kBinArtMethodDirty]);
+ CHECK_EQ(bin_slot_offsets_[kBinArtMethodClean], methods_section->Offset());
cur_pos = methods_section->End();
// Add dex cache arrays section.
- auto* dex_cache_arrays_section = &sections[ImageHeader::kSectionDexCacheArrays];
- *dex_cache_arrays_section = ImageSection(cur_pos, image_info.bin_slot_sizes_[kBinDexCacheArray]);
- CHECK_EQ(image_info.bin_slot_offsets_[kBinDexCacheArray], dex_cache_arrays_section->Offset());
+ auto* dex_cache_arrays_section = &out_sections[ImageHeader::kSectionDexCacheArrays];
+ *dex_cache_arrays_section = ImageSection(cur_pos, bin_slot_sizes_[kBinDexCacheArray]);
+ CHECK_EQ(bin_slot_offsets_[kBinDexCacheArray], dex_cache_arrays_section->Offset());
cur_pos = dex_cache_arrays_section->End();
// Round up to the alignment the string table expects. See HashSet::WriteToMemory.
cur_pos = RoundUp(cur_pos, sizeof(uint64_t));
// Calculate the size of the interned strings.
- auto* interned_strings_section = &sections[ImageHeader::kSectionInternedStrings];
+ auto* interned_strings_section = &out_sections[ImageHeader::kSectionInternedStrings];
*interned_strings_section = ImageSection(cur_pos, intern_table_bytes_);
cur_pos = interned_strings_section->End();
// Round up to the alignment the class table expects. See HashSet::WriteToMemory.
cur_pos = RoundUp(cur_pos, sizeof(uint64_t));
// Calculate the size of the class table section.
- auto* class_table_section = &sections[ImageHeader::kSectionClassTable];
+ auto* class_table_section = &out_sections[ImageHeader::kSectionClassTable];
*class_table_section = ImageSection(cur_pos, class_table_bytes_);
cur_pos = class_table_section->End();
// Image end goes right before the start of the image bitmap.
- const size_t image_end = static_cast<uint32_t>(cur_pos);
+ return cur_pos;
+}
+
+void ImageWriter::CreateHeader(size_t oat_loaded_size, size_t oat_data_offset) {
+ CHECK_NE(0U, oat_loaded_size);
+ const char* oat_filename = oat_file_->GetLocation().c_str();
+ ImageInfo& image_info = GetImageInfo(oat_filename);
+ const uint8_t* oat_file_begin = GetOatFileBegin(oat_filename);
+ const uint8_t* oat_file_end = oat_file_begin + oat_loaded_size;
+ image_info.oat_data_begin_ = const_cast<uint8_t*>(oat_file_begin) + oat_data_offset;
+ const uint8_t* oat_data_end = image_info.oat_data_begin_ + oat_file_->Size();
+ image_info.oat_size_ = oat_file_->Size();
+
+ // Create the image sections.
+ ImageSection sections[ImageHeader::kSectionCount];
+ const size_t image_end = image_info.CreateImageSections(target_ptr_size_, sections);
+
// Finally bitmap section.
const size_t bitmap_bytes = image_info.image_bitmap_->Size();
auto* bitmap_section = &sections[ImageHeader::kSectionImageBitmap];
- *bitmap_section = ImageSection(RoundUp(cur_pos, kPageSize), RoundUp(bitmap_bytes, kPageSize));
- cur_pos = bitmap_section->End();
+ *bitmap_section = ImageSection(RoundUp(image_end, kPageSize), RoundUp(bitmap_bytes, kPageSize));
if (VLOG_IS_ON(compiler)) {
LOG(INFO) << "Creating header for " << oat_filename;
size_t idx = 0;
@@ -1444,7 +1440,7 @@ class FixupRootVisitor : public RootVisitor {
void VisitRoots(mirror::Object*** roots, size_t count, const RootInfo& info ATTRIBUTE_UNUSED)
OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
for (size_t i = 0; i < count; ++i) {
- *roots[i] = ImageAddress(*roots[i]);
+ *roots[i] = image_writer_->GetImageAddress(*roots[i]);
}
}
@@ -1452,19 +1448,12 @@ class FixupRootVisitor : public RootVisitor {
const RootInfo& info ATTRIBUTE_UNUSED)
OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
for (size_t i = 0; i < count; ++i) {
- roots[i]->Assign(ImageAddress(roots[i]->AsMirrorPtr()));
+ roots[i]->Assign(image_writer_->GetImageAddress(roots[i]->AsMirrorPtr()));
}
}
private:
ImageWriter* const image_writer_;
-
- mirror::Object* ImageAddress(mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_) {
- const size_t offset = image_writer_->GetImageOffset(obj);
- auto* const dest = reinterpret_cast<Object*>(image_writer_->global_image_begin_ + offset);
- VLOG(compiler) << "Update root from " << obj << " to " << dest;
- return dest;
- }
};
void ImageWriter::CopyAndFixupNativeData() {
@@ -1536,54 +1525,48 @@ void ImageWriter::CopyAndFixupNativeData() {
}
FixupRootVisitor root_visitor(this);
- /* TODO: Reenable the intern table and class table
// Write the intern table into the image.
- const ImageSection& intern_table_section = image_header->GetImageSection(
- ImageHeader::kSectionInternedStrings);
- Runtime* const runtime = Runtime::Current();
- InternTable* const intern_table = runtime->GetInternTable();
- uint8_t* const intern_table_memory_ptr =
- image_info.image_->Begin() + intern_table_section.Offset();
- const size_t intern_table_bytes = intern_table->WriteToMemory(intern_table_memory_ptr);
- CHECK_EQ(intern_table_bytes, intern_table_bytes_);
- // Fixup the pointers in the newly written intern table to contain image addresses.
- InternTable temp_intern_table;
- // Note that we require that ReadFromMemory does not make an internal copy of the elements so that
- // the VisitRoots() will update the memory directly rather than the copies.
- // This also relies on visit roots not doing any verification which could fail after we update
- // the roots to be the image addresses.
- temp_intern_table.ReadFromMemory(intern_table_memory_ptr);
- CHECK_EQ(temp_intern_table.Size(), intern_table->Size());
- temp_intern_table.VisitRoots(&root_visitor, kVisitRootFlagAllRoots);
-
+ if (image_info.intern_table_bytes_ > 0) {
+ const ImageSection& intern_table_section = image_header->GetImageSection(
+ ImageHeader::kSectionInternedStrings);
+ InternTable* const intern_table = image_info.intern_table_.get();
+ uint8_t* const intern_table_memory_ptr =
+ image_info.image_->Begin() + intern_table_section.Offset();
+ const size_t intern_table_bytes = intern_table->WriteToMemory(intern_table_memory_ptr);
+ CHECK_EQ(intern_table_bytes, image_info.intern_table_bytes_);
+ // Fixup the pointers in the newly written intern table to contain image addresses.
+ InternTable temp_intern_table;
+ // Note that we require that ReadFromMemory does not make an internal copy of the elements so that
+ // the VisitRoots() will update the memory directly rather than the copies.
+ // This also relies on visit roots not doing any verification which could fail after we update
+ // the roots to be the image addresses.
+ temp_intern_table.AddTableFromMemory(intern_table_memory_ptr);
+ CHECK_EQ(temp_intern_table.Size(), intern_table->Size());
+ temp_intern_table.VisitRoots(&root_visitor, kVisitRootFlagAllRoots);
+ }
// Write the class table(s) into the image. class_table_bytes_ may be 0 if there are multiple
// class loaders. Writing multiple class tables into the image is currently unsupported.
- if (class_table_bytes_ > 0u) {
- ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
+ if (image_info.class_table_bytes_ > 0u) {
const ImageSection& class_table_section = image_header->GetImageSection(
ImageHeader::kSectionClassTable);
uint8_t* const class_table_memory_ptr =
image_info.image_->Begin() + class_table_section.Offset();
ReaderMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
- size_t class_table_bytes = 0;
- for (mirror::ClassLoader* loader : class_loaders_) {
- ClassTable* table = class_linker->ClassTableForClassLoader(loader);
- CHECK(table != nullptr);
- uint8_t* memory_ptr = class_table_memory_ptr + class_table_bytes;
- class_table_bytes += table->WriteToMemory(memory_ptr);
- // Fixup the pointers in the newly written class table to contain image addresses. See
- // above comment for intern tables.
- ClassTable temp_class_table;
- temp_class_table.ReadFromMemory(memory_ptr);
- CHECK_EQ(temp_class_table.NumZygoteClasses(), table->NumNonZygoteClasses() +
- table->NumZygoteClasses());
- BufferedRootVisitor<kDefaultBufferedRootCount> buffered_visitor(&root_visitor,
- RootInfo(kRootUnknown));
- temp_class_table.VisitRoots(buffered_visitor);
- }
- CHECK_EQ(class_table_bytes, class_table_bytes_);
+
+ ClassTable* table = image_info.class_table_.get();
+ CHECK(table != nullptr);
+ const size_t class_table_bytes = table->WriteToMemory(class_table_memory_ptr);
+ CHECK_EQ(class_table_bytes, image_info.class_table_bytes_);
+ // Fixup the pointers in the newly written class table to contain image addresses. See
+ // above comment for intern tables.
+ ClassTable temp_class_table;
+ temp_class_table.ReadFromMemory(class_table_memory_ptr);
+ CHECK_EQ(temp_class_table.NumZygoteClasses(), table->NumNonZygoteClasses() +
+ table->NumZygoteClasses());
+ BufferedRootVisitor<kDefaultBufferedRootCount> buffered_visitor(&root_visitor,
+ RootInfo(kRootUnknown));
+ temp_class_table.VisitRoots(buffered_visitor);
}
- */
}
void ImageWriter::CopyAndFixupObjects() {
@@ -1991,7 +1974,7 @@ void ImageWriter::CopyAndFixupMethod(ArtMethod* orig,
copy->SetDeclaringClass(GetImageAddress(orig->GetDeclaringClassUnchecked()));
const char* oat_filename;
- if (orig->IsRuntimeMethod()) {
+ if (orig->IsRuntimeMethod() || compile_app_image_) {
oat_filename = default_oat_filename_;
} else {
auto it = dex_file_oat_filename_map_.find(orig->GetDexFile());
@@ -2110,7 +2093,6 @@ uint32_t ImageWriter::BinSlot::GetIndex() const {
}
uint8_t* ImageWriter::GetOatFileBegin(const char* oat_filename) const {
- // DCHECK_GT(intern_table_bytes_, 0u); TODO: Reenable intern table and class table.
uintptr_t last_image_end = 0;
for (const char* oat_fn : oat_filenames_) {
const ImageInfo& image_info = GetConstImageInfo(oat_fn);
@@ -2197,4 +2179,37 @@ void ImageWriter::UpdateOatFile(const char* oat_filename) {
}
}
+ImageWriter::ImageWriter(
+ const CompilerDriver& compiler_driver,
+ uintptr_t image_begin,
+ bool compile_pic,
+ bool compile_app_image,
+ ImageHeader::StorageMode image_storage_mode,
+ const std::vector<const char*> oat_filenames,
+ const std::unordered_map<const DexFile*, const char*>& dex_file_oat_filename_map)
+ : compiler_driver_(compiler_driver),
+ global_image_begin_(reinterpret_cast<uint8_t*>(image_begin)),
+ image_objects_offset_begin_(0),
+ oat_file_(nullptr),
+ compile_pic_(compile_pic),
+ compile_app_image_(compile_app_image),
+ target_ptr_size_(InstructionSetPointerSize(compiler_driver_.GetInstructionSet())),
+ image_method_array_(ImageHeader::kImageMethodsCount),
+ dirty_methods_(0u),
+ clean_methods_(0u),
+ image_storage_mode_(image_storage_mode),
+ dex_file_oat_filename_map_(dex_file_oat_filename_map),
+ oat_filenames_(oat_filenames),
+ default_oat_filename_(oat_filenames[0]) {
+ CHECK_NE(image_begin, 0U);
+ for (const char* oat_filename : oat_filenames) {
+ image_info_map_.emplace(oat_filename, ImageInfo());
+ }
+ std::fill_n(image_methods_, arraysize(image_methods_), nullptr);
+}
+
+ImageWriter::ImageInfo::ImageInfo()
+ : intern_table_(new InternTable),
+ class_table_(new ClassTable) {}
+
} // namespace art
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index 78297ae645..ad690389e9 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -47,6 +47,8 @@ class ImageSpace;
} // namespace space
} // namespace gc
+class ClassTable;
+
static constexpr int kInvalidImageFd = -1;
// Write a Space built during compilation for use during execution.
@@ -58,33 +60,7 @@ class ImageWriter FINAL {
bool compile_app_image,
ImageHeader::StorageMode image_storage_mode,
const std::vector<const char*> oat_filenames,
- const std::unordered_map<const DexFile*, const char*>& dex_file_oat_filename_map)
- : compiler_driver_(compiler_driver),
- global_image_begin_(reinterpret_cast<uint8_t*>(image_begin)),
- image_objects_offset_begin_(0),
- oat_file_(nullptr),
- compile_pic_(compile_pic),
- compile_app_image_(compile_app_image),
- boot_image_space_(nullptr),
- target_ptr_size_(InstructionSetPointerSize(compiler_driver_.GetInstructionSet())),
- intern_table_bytes_(0u),
- image_method_array_(ImageHeader::kImageMethodsCount),
- dirty_methods_(0u),
- clean_methods_(0u),
- class_table_bytes_(0u),
- image_storage_mode_(image_storage_mode),
- dex_file_oat_filename_map_(dex_file_oat_filename_map),
- oat_filenames_(oat_filenames),
- default_oat_filename_(oat_filenames[0]) {
- CHECK_NE(image_begin, 0U);
- for (const char* oat_filename : oat_filenames) {
- image_info_map_.emplace(oat_filename, ImageInfo());
- }
- std::fill_n(image_methods_, arraysize(image_methods_), nullptr);
- }
-
- ~ImageWriter() {
- }
+ const std::unordered_map<const DexFile*, const char*>& dex_file_oat_filename_map);
bool PrepareImageAddressSpace();
@@ -237,41 +213,40 @@ class ImageWriter FINAL {
};
struct ImageInfo {
- explicit ImageInfo()
- : image_begin_(nullptr),
- image_end_(RoundUp(sizeof(ImageHeader), kObjectAlignment)),
- image_roots_address_(0),
- image_offset_(0),
- image_size_(0),
- oat_offset_(0),
- bin_slot_sizes_(),
- bin_slot_offsets_(),
- bin_slot_count_() {}
+ ImageInfo();
+ ImageInfo(ImageInfo&&) = default;
+
+ // Create the image sections into the out sections variable, returns the size of the image
+ // excluding the bitmap.
+ size_t CreateImageSections(size_t target_ptr_size, ImageSection* out_sections) const;
std::unique_ptr<MemMap> image_; // Memory mapped for generating the image.
// Target begin of this image. Notes: It is not valid to write here, this is the address
// of the target image, not necessarily where image_ is mapped. The address is only valid
// after layouting (otherwise null).
- uint8_t* image_begin_;
+ uint8_t* image_begin_ = nullptr;
- size_t image_end_; // Offset to the free space in image_, initially size of image header.
- uint32_t image_roots_address_; // The image roots address in the image.
- size_t image_offset_; // Offset of this image from the start of the first image.
+ // Offset to the free space in image_, initially size of image header.
+ size_t image_end_ = RoundUp(sizeof(ImageHeader), kObjectAlignment);
+ uint32_t image_roots_address_ = 0; // The image roots address in the image.
+ size_t image_offset_ = 0; // Offset of this image from the start of the first image.
// Image size is the *address space* covered by this image. As the live bitmap is aligned
// to the page size, the live bitmap will cover more address space than necessary. But live
// bitmaps may not overlap, so an image has a "shadow," which is accounted for in the size.
// The next image may only start at image_begin_ + image_size_ (which is guaranteed to be
// page-aligned).
- size_t image_size_;
+ size_t image_size_ = 0;
// Oat data.
- size_t oat_offset_; // Offset of the oat file for this image from start of oat files. This is
- // valid when the previous oat file has been written.
- uint8_t* oat_data_begin_; // Start of oatdata in the corresponding oat file. This is
- // valid when the images have been layed out.
- size_t oat_size_; // Size of the corresponding oat data.
+ // Offset of the oat file for this image from start of oat files. This is
+ // valid when the previous oat file has been written.
+ size_t oat_offset_ = 0;
+ // Start of oatdata in the corresponding oat file. This is
+ // valid when the images have been layed out.
+ uint8_t* oat_data_begin_ = nullptr;
+ size_t oat_size_ = 0; // Size of the corresponding oat data.
// Image bitmap which lets us know where the objects inside of the image reside.
std::unique_ptr<gc::accounting::ContinuousSpaceBitmap> image_bitmap_;
@@ -280,12 +255,24 @@ class ImageWriter FINAL {
SafeMap<const DexFile*, size_t> dex_cache_array_starts_;
// Offset from oat_data_begin_ to the stubs.
- uint32_t oat_address_offsets_[kOatAddressCount];
+ uint32_t oat_address_offsets_[kOatAddressCount] = {};
// Bin slot tracking for dirty object packing.
- size_t bin_slot_sizes_[kBinSize]; // Number of bytes in a bin.
- size_t bin_slot_offsets_[kBinSize]; // Number of bytes in previous bins.
- size_t bin_slot_count_[kBinSize]; // Number of objects in a bin.
+ size_t bin_slot_sizes_[kBinSize] = {}; // Number of bytes in a bin.
+ size_t bin_slot_offsets_[kBinSize] = {}; // Number of bytes in previous bins.
+ size_t bin_slot_count_[kBinSize] = {}; // Number of objects in a bin.
+
+ // Cached size of the intern table for when we allocate memory.
+ size_t intern_table_bytes_ = 0;
+
+ // Number of image class table bytes.
+ size_t class_table_bytes_ = 0;
+
+ // Intern table associated with this image for serialization.
+ std::unique_ptr<InternTable> intern_table_;
+
+ // Class table associated with this image for serialization.
+ std::unique_ptr<ClassTable> class_table_;
};
// We use the lock word to store the offset of the object in the image.
@@ -483,18 +470,12 @@ class ImageWriter FINAL {
const bool compile_pic_;
const bool compile_app_image_;
- // Cache the boot image space in this class for faster lookups.
- gc::space::ImageSpace* boot_image_space_;
-
// Size of pointers on the target architecture.
size_t target_ptr_size_;
// Mapping of oat filename to image data.
std::unordered_map<std::string, ImageInfo> image_info_map_;
- // Cached size of the intern table for when we allocate memory.
- size_t intern_table_bytes_;
-
// ArtField, ArtMethod relocating map. These are allocated as array of structs but we want to
// have one entry per art field for convenience. ArtFields are placed right after the end of the
// image objects (aka sum of bin_slot_sizes_). ArtMethods are placed right after the ArtFields.
@@ -528,9 +509,6 @@ class ImageWriter FINAL {
// null is a valid entry.
std::unordered_set<mirror::ClassLoader*> class_loaders_;
- // Number of image class table bytes.
- size_t class_table_bytes_;
-
// Which mode the image is stored as, see image.h
const ImageHeader::StorageMode image_storage_mode_;
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index b323d24038..bc51ed6e6a 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -22,6 +22,7 @@
#include "base/stringpiece.h"
#include "base/time_utils.h"
#include "base/timing_logger.h"
+#include "base/unix_file/fd_file.h"
#include "compiler_callbacks.h"
#include "dex/pass_manager.h"
#include "dex/quick_compiler_callbacks.h"
@@ -42,11 +43,12 @@ JitCompiler* JitCompiler::Create() {
return new JitCompiler();
}
-extern "C" void* jit_load(CompilerCallbacks** callbacks) {
+extern "C" void* jit_load(CompilerCallbacks** callbacks, bool* generate_debug_info) {
VLOG(jit) << "loading jit compiler";
auto* const jit_compiler = JitCompiler::Create();
CHECK(jit_compiler != nullptr);
*callbacks = jit_compiler->GetCompilerCallbacks();
+ *generate_debug_info = jit_compiler->GetCompilerOptions()->GetGenerateDebugInfo();
VLOG(jit) << "Done loading jit compiler";
return jit_compiler;
}
@@ -155,14 +157,33 @@ JitCompiler::JitCompiler() : total_time_(0) {
/* dump_cfg_append */ false,
cumulative_logger_.get(),
/* swap_fd */ -1,
- /* profile_file */ "",
- /* dex to oat map */ nullptr));
+ /* dex to oat map */ nullptr,
+ /* profile_compilation_info */ nullptr));
// Disable dedupe so we can remove compiled methods.
compiler_driver_->SetDedupeEnabled(false);
compiler_driver_->SetSupportBootImageFixup(false);
+
+ if (compiler_options_->GetGenerateDebugInfo()) {
+#ifdef __ANDROID__
+ const char* prefix = GetAndroidData();
+#else
+ const char* prefix = "/tmp";
+#endif
+ DCHECK_EQ(compiler_driver_->GetThreadCount(), 1u)
+ << "Generating debug info only works with one compiler thread";
+ std::string perf_filename = std::string(prefix) + "/perf-" + std::to_string(getpid()) + ".map";
+ perf_file_.reset(OS::CreateEmptyFileWriteOnly(perf_filename.c_str()));
+ if (perf_file_ == nullptr) {
+ LOG(FATAL) << "Could not create perf file at " << perf_filename;
+ }
+ }
}
JitCompiler::~JitCompiler() {
+ if (perf_file_ != nullptr) {
+ UNUSED(perf_file_->Flush());
+ UNUSED(perf_file_->Close());
+ }
}
bool JitCompiler::CompileMethod(Thread* self, ArtMethod* method) {
@@ -188,6 +209,20 @@ bool JitCompiler::CompileMethod(Thread* self, ArtMethod* method) {
ArtMethod* method_to_compile = method->GetInterfaceMethodIfProxy(sizeof(void*));
JitCodeCache* const code_cache = runtime->GetJit()->GetCodeCache();
success = compiler_driver_->GetCompiler()->JitCompile(self, code_cache, method_to_compile);
+ if (success && compiler_options_->GetGenerateDebugInfo()) {
+ const void* ptr = method_to_compile->GetEntryPointFromQuickCompiledCode();
+ std::ostringstream stream;
+ stream << std::hex
+ << reinterpret_cast<uintptr_t>(ptr)
+ << " "
+ << code_cache->GetMemorySizeOfCodePointer(ptr)
+ << " "
+ << PrettyMethod(method_to_compile)
+ << std::endl;
+ std::string str = stream.str();
+ bool res = perf_file_->WriteFully(str.c_str(), str.size());
+ CHECK(res);
+ }
}
// Trim maps to reduce memory usage.
diff --git a/compiler/jit/jit_compiler.h b/compiler/jit/jit_compiler.h
index 913a6d00ae..037a18ac7a 100644
--- a/compiler/jit/jit_compiler.h
+++ b/compiler/jit/jit_compiler.h
@@ -43,6 +43,9 @@ class JitCompiler {
size_t GetTotalCompileTime() const {
return total_time_;
}
+ CompilerOptions* GetCompilerOptions() const {
+ return compiler_options_.get();
+ }
private:
uint64_t total_time_;
@@ -53,6 +56,7 @@ class JitCompiler {
std::unique_ptr<CompilerCallbacks> callbacks_;
std::unique_ptr<CompilerDriver> compiler_driver_;
std::unique_ptr<const InstructionSetFeatures> instruction_set_features_;
+ std::unique_ptr<File> perf_file_;
JitCompiler();
diff --git a/compiler/linker/relative_patcher_test.h b/compiler/linker/relative_patcher_test.h
index 877a674674..b10cc3534c 100644
--- a/compiler/linker/relative_patcher_test.h
+++ b/compiler/linker/relative_patcher_test.h
@@ -47,7 +47,7 @@ class RelativePatcherTest : public testing::Test {
driver_(&compiler_options_, &verification_results_, &inliner_map_,
Compiler::kQuick, instruction_set, nullptr,
false, nullptr, nullptr, nullptr, 1u,
- false, false, "", false, nullptr, -1, "", nullptr),
+ false, false, "", false, nullptr, -1, nullptr, nullptr),
error_msg_(),
instruction_set_(instruction_set),
features_(InstructionSetFeatures::FromVariant(instruction_set, variant, &error_msg_)),
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 58f46d69a2..9f7ffa5ace 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -121,7 +121,7 @@ class OatTest : public CommonCompilerTest {
false,
timer_.get(),
-1,
- "",
+ nullptr,
nullptr));
}
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index da2f9cbed5..eee6116098 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -1142,7 +1142,7 @@ class BCEVisitor : public HGraphVisitor {
loop->IsDefinedOutOfTheLoop(array_get->InputAt(1))) {
SideEffects loop_effects = side_effects_.GetLoopEffects(loop->GetHeader());
if (!array_get->GetSideEffects().MayDependOn(loop_effects)) {
- HoistToPreheaderOrDeoptBlock(loop, array_get);
+ HoistToPreHeaderOrDeoptBlock(loop, array_get);
}
}
}
@@ -1280,7 +1280,8 @@ class BCEVisitor : public HGraphVisitor {
// as runtime test. By restricting dynamic bce to unit strides (with a maximum of 32-bit
// iterations) and by not combining access (e.g. a[i], a[i-3], a[i+5] etc.), these tests
// correctly guard against any possible OOB (including arithmetic wrap-around cases).
- HBasicBlock* block = TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test);
+ TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test);
+ HBasicBlock* block = GetPreHeader(loop, instruction);
induction_range_.GenerateRangeCode(instruction, index, GetGraph(), block, &lower, &upper);
if (lower != nullptr) {
InsertDeopt(loop, block, new (GetGraph()->GetArena()) HAbove(lower, upper));
@@ -1358,7 +1359,7 @@ class BCEVisitor : public HGraphVisitor {
return true;
} else if (length->IsArrayLength() && length->GetBlock()->GetLoopInformation() == loop) {
if (CanHandleNullCheck(loop, length->InputAt(0), needs_taken_test)) {
- HoistToPreheaderOrDeoptBlock(loop, length);
+ HoistToPreHeaderOrDeoptBlock(loop, length);
return true;
}
}
@@ -1376,7 +1377,8 @@ class BCEVisitor : public HGraphVisitor {
HInstruction* array = check->InputAt(0);
if (loop->IsDefinedOutOfTheLoop(array)) {
// Generate: if (array == null) deoptimize;
- HBasicBlock* block = TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test);
+ TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test);
+ HBasicBlock* block = GetPreHeader(loop, check);
HInstruction* cond =
new (GetGraph()->GetArena()) HEqual(array, GetGraph()->GetNullConstant());
InsertDeopt(loop, block, cond);
@@ -1423,6 +1425,28 @@ class BCEVisitor : public HGraphVisitor {
return true;
}
+ /**
+ * Returns appropriate preheader for the loop, depending on whether the
+ * instruction appears in the loop header or proper loop-body.
+ */
+ HBasicBlock* GetPreHeader(HLoopInformation* loop, HInstruction* instruction) {
+ // Use preheader unless there is an earlier generated deoptimization block since
+ // hoisted expressions may depend on and/or used by the deoptimization tests.
+ HBasicBlock* header = loop->GetHeader();
+ const uint32_t loop_id = header->GetBlockId();
+ auto it = taken_test_loop_.find(loop_id);
+ if (it != taken_test_loop_.end()) {
+ HBasicBlock* block = it->second;
+ // If always taken, keep it that way by returning the original preheader,
+ // which can be found by following the predecessor of the true-block twice.
+ if (instruction->GetBlock() == header) {
+ return block->GetSinglePredecessor()->GetSinglePredecessor();
+ }
+ return block;
+ }
+ return loop->GetPreHeader();
+ }
+
/** Inserts a deoptimization test. */
void InsertDeopt(HLoopInformation* loop, HBasicBlock* block, HInstruction* condition) {
HInstruction* suspend = loop->GetSuspendCheck();
@@ -1437,28 +1461,17 @@ class BCEVisitor : public HGraphVisitor {
}
/** Hoists instruction out of the loop to preheader or deoptimization block. */
- void HoistToPreheaderOrDeoptBlock(HLoopInformation* loop, HInstruction* instruction) {
- // Use preheader unless there is an earlier generated deoptimization block since
- // hoisted expressions may depend on and/or used by the deoptimization tests.
- const uint32_t loop_id = loop->GetHeader()->GetBlockId();
- HBasicBlock* preheader = loop->GetPreHeader();
- HBasicBlock* block = preheader;
- auto it = taken_test_loop_.find(loop_id);
- if (it != taken_test_loop_.end()) {
- block = it->second;
- }
- // Hoist the instruction.
+ void HoistToPreHeaderOrDeoptBlock(HLoopInformation* loop, HInstruction* instruction) {
+ HBasicBlock* block = GetPreHeader(loop, instruction);
DCHECK(!instruction->HasEnvironment());
instruction->MoveBefore(block->GetLastInstruction());
}
/**
- * Adds a new taken-test structure to a loop if needed (and not already done).
+ * Adds a new taken-test structure to a loop if needed and not already done.
* The taken-test protects range analysis evaluation code to avoid any
* deoptimization caused by incorrect trip-count evaluation in non-taken loops.
*
- * Returns block in which deoptimizations/invariants can be put.
- *
* old_preheader
* |
* if_block <- taken-test protects deoptimization block
@@ -1490,16 +1503,11 @@ class BCEVisitor : public HGraphVisitor {
* array[i] = 0;
* }
*/
- HBasicBlock* TransformLoopForDeoptimizationIfNeeded(HLoopInformation* loop, bool needs_taken_test) {
- // Not needed (can use preheader), or already done (can reuse)?
+ void TransformLoopForDeoptimizationIfNeeded(HLoopInformation* loop, bool needs_taken_test) {
+ // Not needed (can use preheader) or already done (can reuse)?
const uint32_t loop_id = loop->GetHeader()->GetBlockId();
- if (!needs_taken_test) {
- return loop->GetPreHeader();
- } else {
- auto it = taken_test_loop_.find(loop_id);
- if (it != taken_test_loop_.end()) {
- return it->second;
- }
+ if (!needs_taken_test || taken_test_loop_.find(loop_id) != taken_test_loop_.end()) {
+ return;
}
// Generate top test structure.
@@ -1528,7 +1536,6 @@ class BCEVisitor : public HGraphVisitor {
if_block->AddInstruction(new (GetGraph()->GetArena()) HIf(condition));
taken_test_loop_.Put(loop_id, true_block);
- return true_block;
}
/**
@@ -1543,7 +1550,7 @@ class BCEVisitor : public HGraphVisitor {
* \ /
* x_1 = phi(x_0, null) <- synthetic phi
* |
- * header
+ * new_preheader
*/
void InsertPhiNodes() {
// Scan all new deoptimization blocks.
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 53d3615a41..ea0b9eca9a 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -997,6 +997,12 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction,
stack_map_stream_.EndStackMapEntry();
}
+bool CodeGenerator::HasStackMapAtCurrentPc() {
+ uint32_t pc = GetAssembler()->CodeSize();
+ size_t count = stack_map_stream_.GetNumberOfStackMaps();
+ return count > 0 && stack_map_stream_.GetStackMap(count - 1).native_pc_offset == pc;
+}
+
void CodeGenerator::RecordCatchBlockInfo() {
ArenaAllocator* arena = graph_->GetArena();
@@ -1320,12 +1326,6 @@ void CodeGenerator::ValidateInvokeRuntime(HInstruction* instruction, SlowPathCod
<< "instruction->DebugName()=" << instruction->DebugName()
<< " slow_path->GetDescription()=" << slow_path->GetDescription();
DCHECK(instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC()) ||
- // Control flow would not come back into the code if a fatal slow
- // path is taken, so we do not care if it triggers GC.
- slow_path->IsFatal() ||
- // HDeoptimize is a special case: we know we are not coming back from
- // it into the code.
- instruction->IsDeoptimize() ||
// When read barriers are enabled, some instructions use a
// slow path to emit a read barrier, which does not trigger
// GC, is not fatal, nor is emitted by HDeoptimize
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index eade05d7b6..5958cd89bc 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -269,6 +269,8 @@ class CodeGenerator {
// Record native to dex mapping for a suspend point. Required by runtime.
void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr);
+ // Check whether we have already recorded mapping at this PC.
+ bool HasStackMapAtCurrentPc();
bool CanMoveNullCheckToUser(HNullCheck* null_check);
void MaybeRecordImplicitNullCheck(HInstruction* instruction);
@@ -611,7 +613,7 @@ class CodeGenerator {
ArenaVector<SlowPathCode*> slow_paths_;
- // The current slow path that we're generating code for.
+ // The current slow-path that we're generating code for.
SlowPathCode* current_slow_path_;
// The current block index in `block_order_` of the block
@@ -672,6 +674,122 @@ class CallingConvention {
DISALLOW_COPY_AND_ASSIGN(CallingConvention);
};
+/**
+ * A templated class SlowPathGenerator with a templated method NewSlowPath()
+ * that can be used by any code generator to share equivalent slow-paths with
+ * the objective of reducing generated code size.
+ *
+ * InstructionType: instruction that requires SlowPathCodeType
+ * SlowPathCodeType: subclass of SlowPathCode, with constructor SlowPathCodeType(InstructionType *)
+ */
+template <typename InstructionType>
+class SlowPathGenerator {
+ static_assert(std::is_base_of<HInstruction, InstructionType>::value,
+ "InstructionType is not a subclass of art::HInstruction");
+
+ public:
+ SlowPathGenerator(HGraph* graph, CodeGenerator* codegen)
+ : graph_(graph),
+ codegen_(codegen),
+ slow_path_map_(std::less<uint32_t>(), graph->GetArena()->Adapter(kArenaAllocSlowPaths)) {}
+
+ // Creates and adds a new slow-path, if needed, or returns existing one otherwise.
+ // Templating the method (rather than the whole class) on the slow-path type enables
+ // keeping this code at a generic, non architecture-specific place.
+ //
+ // NOTE: This approach assumes each InstructionType only generates one SlowPathCodeType.
+ // To relax this requirement, we would need some RTTI on the stored slow-paths,
+ // or template the class as a whole on SlowPathType.
+ template <typename SlowPathCodeType>
+ SlowPathCodeType* NewSlowPath(InstructionType* instruction) {
+ static_assert(std::is_base_of<SlowPathCode, SlowPathCodeType>::value,
+ "SlowPathCodeType is not a subclass of art::SlowPathCode");
+ static_assert(std::is_constructible<SlowPathCodeType, InstructionType*>::value,
+ "SlowPathCodeType is not constructible from InstructionType*");
+ // Iterate over potential candidates for sharing. Currently, only same-typed
+ // slow-paths with exactly the same dex-pc are viable candidates.
+ // TODO: pass dex-pc/slow-path-type to run-time to allow even more sharing?
+ const uint32_t dex_pc = instruction->GetDexPc();
+ auto iter = slow_path_map_.find(dex_pc);
+ if (iter != slow_path_map_.end()) {
+ auto candidates = iter->second;
+ for (const auto& it : candidates) {
+ InstructionType* other_instruction = it.first;
+ SlowPathCodeType* other_slow_path = down_cast<SlowPathCodeType*>(it.second);
+ // Determine if the instructions allow for slow-path sharing.
+ if (HaveSameLiveRegisters(instruction, other_instruction) &&
+ HaveSameStackMap(instruction, other_instruction)) {
+ // Can share: reuse existing one.
+ return other_slow_path;
+ }
+ }
+ } else {
+ // First time this dex-pc is seen.
+ iter = slow_path_map_.Put(dex_pc, {{}, {graph_->GetArena()->Adapter(kArenaAllocSlowPaths)}});
+ }
+ // Cannot share: create and add new slow-path for this particular dex-pc.
+ SlowPathCodeType* slow_path = new (graph_->GetArena()) SlowPathCodeType(instruction);
+ iter->second.emplace_back(std::make_pair(instruction, slow_path));
+ codegen_->AddSlowPath(slow_path);
+ return slow_path;
+ }
+
+ private:
+ // Tests if both instructions have same set of live physical registers. This ensures
+ // the slow-path has exactly the same preamble on saving these registers to stack.
+ bool HaveSameLiveRegisters(const InstructionType* i1, const InstructionType* i2) const {
+ const uint32_t core_spill = ~codegen_->GetCoreSpillMask();
+ const uint32_t fpu_spill = ~codegen_->GetFpuSpillMask();
+ RegisterSet* live1 = i1->GetLocations()->GetLiveRegisters();
+ RegisterSet* live2 = i2->GetLocations()->GetLiveRegisters();
+ return (((live1->GetCoreRegisters() & core_spill) ==
+ (live2->GetCoreRegisters() & core_spill)) &&
+ ((live1->GetFloatingPointRegisters() & fpu_spill) ==
+ (live2->GetFloatingPointRegisters() & fpu_spill)));
+ }
+
+ // Tests if both instructions have the same stack map. This ensures the interpreter
+ // will find exactly the same dex-registers at the same entries.
+ bool HaveSameStackMap(const InstructionType* i1, const InstructionType* i2) const {
+ DCHECK(i1->HasEnvironment());
+ DCHECK(i2->HasEnvironment());
+ // We conservatively test if the two instructions find exactly the same instructions
+ // and location in each dex-register. This guarantees they will have the same stack map.
+ HEnvironment* e1 = i1->GetEnvironment();
+ HEnvironment* e2 = i2->GetEnvironment();
+ if (e1->GetParent() != e2->GetParent() || e1->Size() != e2->Size()) {
+ return false;
+ }
+ for (size_t i = 0, sz = e1->Size(); i < sz; ++i) {
+ if (e1->GetInstructionAt(i) != e2->GetInstructionAt(i) ||
+ !e1->GetLocationAt(i).Equals(e2->GetLocationAt(i))) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ HGraph* const graph_;
+ CodeGenerator* const codegen_;
+
+ // Map from dex-pc to vector of already existing instruction/slow-path pairs.
+ ArenaSafeMap<uint32_t, ArenaVector<std::pair<InstructionType*, SlowPathCode*>>> slow_path_map_;
+
+ DISALLOW_COPY_AND_ASSIGN(SlowPathGenerator);
+};
+
+class InstructionCodeGenerator : public HGraphVisitor {
+ public:
+ InstructionCodeGenerator(HGraph* graph, CodeGenerator* codegen)
+ : HGraphVisitor(graph),
+ deopt_slow_paths_(graph, codegen) {}
+
+ protected:
+ // Add slow-path generator for each instruction/slow-path combination that desires sharing.
+ // TODO: under current regime, only deopt sharing make sense; extend later.
+ SlowPathGenerator<HDeoptimize> deopt_slow_paths_;
+};
+
} // namespace art
#endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 9a1f2b8717..d64b8784e1 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -350,24 +350,24 @@ class TypeCheckSlowPathARM : public SlowPathCode {
class DeoptimizationSlowPathARM : public SlowPathCode {
public:
- explicit DeoptimizationSlowPathARM(HInstruction* instruction)
+ explicit DeoptimizationSlowPathARM(HDeoptimize* instruction)
: instruction_(instruction) {}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, instruction_->GetLocations());
- DCHECK(instruction_->IsDeoptimize());
- HDeoptimize* deoptimize = instruction_->AsDeoptimize();
- uint32_t dex_pc = deoptimize->GetDexPc();
- CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
- arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this);
+ arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize),
+ instruction_,
+ instruction_->GetDexPc(),
+ this);
CheckEntrypointTypes<kQuickDeoptimize, void, void>();
}
const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM"; }
private:
- HInstruction* const instruction_;
+ HDeoptimize* const instruction_;
DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM);
};
@@ -417,6 +417,56 @@ class ArraySetSlowPathARM : public SlowPathCode {
DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM);
};
+// Slow path marking an object during a read barrier.
+class ReadBarrierMarkSlowPathARM : public SlowPathCode {
+ public:
+ ReadBarrierMarkSlowPathARM(HInstruction* instruction, Location out, Location obj)
+ : instruction_(instruction), out_(out), obj_(obj) {
+ DCHECK(kEmitCompilerReadBarrier);
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARM"; }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
+ Register reg_out = out_.AsRegister<Register>();
+ DCHECK(locations->CanCall());
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+ DCHECK(instruction_->IsInstanceFieldGet() ||
+ instruction_->IsStaticFieldGet() ||
+ instruction_->IsArrayGet() ||
+ instruction_->IsLoadClass() ||
+ instruction_->IsLoadString() ||
+ instruction_->IsInstanceOf() ||
+ instruction_->IsCheckCast())
+ << "Unexpected instruction in read barrier marking slow path: "
+ << instruction_->DebugName();
+
+ __ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations);
+
+ InvokeRuntimeCallingConvention calling_convention;
+ CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
+ arm_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), obj_);
+ arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierMark),
+ instruction_,
+ instruction_->GetDexPc(),
+ this);
+ CheckEntrypointTypes<kQuickReadBarrierMark, mirror::Object*, mirror::Object*>();
+ arm_codegen->Move32(out_, Location::RegisterLocation(R0));
+
+ RestoreLiveRegisters(codegen, locations);
+ __ b(GetExitLabel());
+ }
+
+ private:
+ HInstruction* const instruction_;
+ const Location out_;
+ const Location obj_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM);
+};
+
// Slow path generating a read barrier for a heap reference.
class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode {
public:
@@ -438,7 +488,7 @@ class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode {
// to be instrumented, e.g.:
//
// __ LoadFromOffset(kLoadWord, out, out, offset);
- // codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset);
+ // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
//
// In that case, we have lost the information about the original
// object, and the emitted read barrier cannot work properly.
@@ -454,7 +504,9 @@ class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode {
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
DCHECK(!instruction_->IsInvoke() ||
(instruction_->IsInvokeStaticOrDirect() &&
- instruction_->GetLocations()->Intrinsified()));
+ instruction_->GetLocations()->Intrinsified()))
+ << "Unexpected instruction in read barrier for heap reference slow path: "
+ << instruction_->DebugName();
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
@@ -596,14 +648,18 @@ class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode {
class ReadBarrierForRootSlowPathARM : public SlowPathCode {
public:
ReadBarrierForRootSlowPathARM(HInstruction* instruction, Location out, Location root)
- : instruction_(instruction), out_(out), root_(root) {}
+ : instruction_(instruction), out_(out), root_(root) {
+ DCHECK(kEmitCompilerReadBarrier);
+ }
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
LocationSummary* locations = instruction_->GetLocations();
Register reg_out = out_.AsRegister<Register>();
DCHECK(locations->CanCall());
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
- DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString());
+ DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
+ << "Unexpected instruction in read barrier for GC root slow path: "
+ << instruction_->DebugName();
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
@@ -857,7 +913,7 @@ void CodeGeneratorARM::UpdateBlockedPairRegisters() const {
}
InstructionCodeGeneratorARM::InstructionCodeGeneratorARM(HGraph* graph, CodeGeneratorARM* codegen)
- : HGraphVisitor(graph),
+ : InstructionCodeGenerator(graph, codegen),
assembler_(codegen->GetAssembler()),
codegen_(codegen) {}
@@ -1358,17 +1414,6 @@ void LocationsBuilderARM::VisitExit(HExit* exit) {
void InstructionCodeGeneratorARM::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
}
-void InstructionCodeGeneratorARM::GenerateCompareWithImmediate(Register left, int32_t right) {
- ShifterOperand operand;
- if (GetAssembler()->ShifterOperandCanHold(R0, left, CMP, right, &operand)) {
- __ cmp(left, operand);
- } else {
- Register temp = IP;
- __ LoadImmediate(temp, right);
- __ cmp(left, ShifterOperand(temp));
- }
-}
-
void InstructionCodeGeneratorARM::GenerateFPJumps(HCondition* cond,
Label* true_label,
Label* false_label) {
@@ -1434,7 +1479,7 @@ void InstructionCodeGeneratorARM::GenerateLongComparesAndJumps(HCondition* cond,
int32_t val_low = Low32Bits(value);
int32_t val_high = High32Bits(value);
- GenerateCompareWithImmediate(left_high, val_high);
+ __ CmpConstant(left_high, val_high);
if (if_cond == kCondNE) {
__ b(true_label, ARMCondition(true_high_cond));
} else if (if_cond == kCondEQ) {
@@ -1444,7 +1489,7 @@ void InstructionCodeGeneratorARM::GenerateLongComparesAndJumps(HCondition* cond,
__ b(false_label, ARMCondition(false_high_cond));
}
// Must be equal high, so compare the lows.
- GenerateCompareWithImmediate(left_low, val_low);
+ __ CmpConstant(left_low, val_low);
} else {
Register right_high = right.AsRegisterPairHigh<Register>();
Register right_low = right.AsRegisterPairLow<Register>();
@@ -1568,7 +1613,7 @@ void InstructionCodeGeneratorARM::GenerateTestAndBranch(HInstruction* instructio
__ cmp(left, ShifterOperand(right.AsRegister<Register>()));
} else {
DCHECK(right.IsConstant());
- GenerateCompareWithImmediate(left, CodeGenerator::GetInt32ValueOf(right.GetConstant()));
+ __ CmpConstant(left, CodeGenerator::GetInt32ValueOf(right.GetConstant()));
}
if (true_target == nullptr) {
__ b(false_target, ARMCondition(condition->GetOppositeCondition()));
@@ -1610,8 +1655,7 @@ void LocationsBuilderARM::VisitDeoptimize(HDeoptimize* deoptimize) {
}
void InstructionCodeGeneratorARM::VisitDeoptimize(HDeoptimize* deoptimize) {
- SlowPathCode* slow_path = new (GetGraph()->GetArena()) DeoptimizationSlowPathARM(deoptimize);
- codegen_->AddSlowPath(slow_path);
+ SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARM>(deoptimize);
GenerateTestAndBranch(deoptimize,
/* condition_input_index */ 0,
slow_path->GetEntryLabel(),
@@ -1623,6 +1667,10 @@ void LocationsBuilderARM::VisitNativeDebugInfo(HNativeDebugInfo* info) {
}
void InstructionCodeGeneratorARM::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+ if (codegen_->HasStackMapAtCurrentPc()) {
+ // Ensure that we do not collide with the stack map of the previous instruction.
+ __ nop();
+ }
codegen_->RecordPcInfo(info, info->GetDexPc());
}
@@ -1675,8 +1723,8 @@ void InstructionCodeGeneratorARM::HandleCondition(HCondition* cond) {
__ cmp(left.AsRegister<Register>(), ShifterOperand(right.AsRegister<Register>()));
} else {
DCHECK(right.IsConstant());
- GenerateCompareWithImmediate(left.AsRegister<Register>(),
- CodeGenerator::GetInt32ValueOf(right.GetConstant()));
+ __ CmpConstant(left.AsRegister<Register>(),
+ CodeGenerator::GetInt32ValueOf(right.GetConstant()));
}
__ it(ARMCondition(cond->GetCondition()), kItElse);
__ mov(locations->Out().AsRegister<Register>(), ShifterOperand(1),
@@ -1891,7 +1939,7 @@ void LocationsBuilderARM::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
}
void InstructionCodeGeneratorARM::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
- GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
+ codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
}
void LocationsBuilderARM::VisitReturnVoid(HReturnVoid* ret) {
@@ -2846,8 +2894,7 @@ void InstructionCodeGeneratorARM::DivRemByPowerOfTwo(HBinaryOperation* instructi
Register dividend = locations->InAt(0).AsRegister<Register>();
Register temp = locations->GetTemp(0).AsRegister<Register>();
int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
- uint32_t abs_imm = static_cast<uint32_t>(std::abs(imm));
- DCHECK(IsPowerOfTwo(abs_imm));
+ uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
int ctz_imm = CTZ(abs_imm);
if (ctz_imm == 1) {
@@ -2923,7 +2970,7 @@ void InstructionCodeGeneratorARM::GenerateDivRemConstantIntegral(HBinaryOperatio
// Do not generate anything. DivZeroCheck would prevent any code to be executed.
} else if (imm == 1 || imm == -1) {
DivRemOneOrMinusOne(instruction);
- } else if (IsPowerOfTwo(std::abs(imm))) {
+ } else if (IsPowerOfTwo(AbsOrMin(imm))) {
DivRemByPowerOfTwo(instruction);
} else {
DCHECK(imm <= -2 || imm >= 2);
@@ -2952,12 +2999,12 @@ void LocationsBuilderARM::VisitDiv(HDiv* div) {
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::ConstantLocation(div->InputAt(1)->AsConstant()));
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
- int32_t abs_imm = std::abs(div->InputAt(1)->AsIntConstant()->GetValue());
- if (abs_imm <= 1) {
+ int32_t value = div->InputAt(1)->AsIntConstant()->GetValue();
+ if (value == 1 || value == 0 || value == -1) {
// No temp register required.
} else {
locations->AddTemp(Location::RequiresRegister());
- if (!IsPowerOfTwo(abs_imm)) {
+ if (!IsPowerOfTwo(AbsOrMin(value))) {
locations->AddTemp(Location::RequiresRegister());
}
}
@@ -3078,12 +3125,12 @@ void LocationsBuilderARM::VisitRem(HRem* rem) {
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::ConstantLocation(rem->InputAt(1)->AsConstant()));
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
- int32_t abs_imm = std::abs(rem->InputAt(1)->AsIntConstant()->GetValue());
- if (abs_imm <= 1) {
+ int32_t value = rem->InputAt(1)->AsIntConstant()->GetValue();
+ if (value == 1 || value == 0 || value == -1) {
// No temp register required.
} else {
locations->AddTemp(Location::RequiresRegister());
- if (!IsPowerOfTwo(abs_imm)) {
+ if (!IsPowerOfTwo(AbsOrMin(value))) {
locations->AddTemp(Location::RequiresRegister());
}
}
@@ -3437,7 +3484,7 @@ void InstructionCodeGeneratorARM::HandleShift(HBinaryOperation* op) {
Register first_reg = first.AsRegister<Register>();
if (second.IsRegister()) {
Register second_reg = second.AsRegister<Register>();
- // Arm doesn't mask the shift count so we need to do it ourselves.
+ // ARM doesn't mask the shift count so we need to do it ourselves.
__ and_(out_reg, second_reg, ShifterOperand(kMaxIntShiftValue));
if (op->IsShl()) {
__ Lsl(out_reg, first_reg, out_reg);
@@ -3449,7 +3496,7 @@ void InstructionCodeGeneratorARM::HandleShift(HBinaryOperation* op) {
} else {
int32_t cst = second.GetConstant()->AsIntConstant()->GetValue();
uint32_t shift_value = static_cast<uint32_t>(cst & kMaxIntShiftValue);
- if (shift_value == 0) { // arm does not support shifting with 0 immediate.
+ if (shift_value == 0) { // ARM does not support shifting with 0 immediate.
__ Mov(out_reg, first_reg);
} else if (op->IsShl()) {
__ Lsl(out_reg, first_reg, shift_value);
@@ -3796,9 +3843,9 @@ void InstructionCodeGeneratorARM::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
LOG(FATAL) << "Unreachable";
}
-void InstructionCodeGeneratorARM::GenerateMemoryBarrier(MemBarrierKind kind) {
- // TODO (ported from quick): revisit Arm barrier kinds
- DmbOptions flavor = DmbOptions::ISH; // quiet c++ warnings
+void CodeGeneratorARM::GenerateMemoryBarrier(MemBarrierKind kind) {
+ // TODO (ported from quick): revisit ARM barrier kinds.
+ DmbOptions flavor = DmbOptions::ISH; // Quiet C++ warnings.
switch (kind) {
case MemBarrierKind::kAnyStore:
case MemBarrierKind::kLoadAny:
@@ -3879,11 +3926,11 @@ void LocationsBuilderARM::HandleFieldSet(HInstruction* instruction, const FieldI
locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
locations->AddTemp(Location::RequiresRegister());
} else if (generate_volatile) {
- // Arm encoding have some additional constraints for ldrexd/strexd:
+ // ARM encoding have some additional constraints for ldrexd/strexd:
// - registers need to be consecutive
// - the first register should be even but not R14.
- // We don't test for Arm yet, and the assertion makes sure that we revisit this if we ever
- // enable Arm encoding.
+ // We don't test for ARM yet, and the assertion makes sure that we
+ // revisit this if we ever enable ARM encoding.
DCHECK_EQ(InstructionSet::kThumb2, codegen_->GetInstructionSet());
locations->AddTemp(Location::RequiresRegister());
@@ -3913,7 +3960,7 @@ void InstructionCodeGeneratorARM::HandleFieldSet(HInstruction* instruction,
CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
if (is_volatile) {
- GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
}
switch (field_type) {
@@ -4005,7 +4052,7 @@ void InstructionCodeGeneratorARM::HandleFieldSet(HInstruction* instruction,
}
if (is_volatile) {
- GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
}
}
@@ -4039,14 +4086,18 @@ void LocationsBuilderARM::HandleFieldGet(HInstruction* instruction, const FieldI
(overlap ? Location::kOutputOverlap : Location::kNoOutputOverlap));
}
if (volatile_for_double) {
- // Arm encoding have some additional constraints for ldrexd/strexd:
+ // ARM encoding have some additional constraints for ldrexd/strexd:
// - registers need to be consecutive
// - the first register should be even but not R14.
- // We don't test for Arm yet, and the assertion makes sure that we revisit this if we ever
- // enable Arm encoding.
+ // We don't test for ARM yet, and the assertion makes sure that we
+ // revisit this if we ever enable ARM encoding.
DCHECK_EQ(InstructionSet::kThumb2, codegen_->GetInstructionSet());
locations->AddTemp(Location::RequiresRegister());
locations->AddTemp(Location::RequiresRegister());
+ } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
+ // We need a temporary register for the read barrier marking slow
+ // path in CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier.
+ locations->AddTemp(Location::RequiresRegister());
}
}
@@ -4105,33 +4156,52 @@ void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction,
uint32_t offset = field_info.GetFieldOffset().Uint32Value();
switch (field_type) {
- case Primitive::kPrimBoolean: {
+ case Primitive::kPrimBoolean:
__ LoadFromOffset(kLoadUnsignedByte, out.AsRegister<Register>(), base, offset);
break;
- }
- case Primitive::kPrimByte: {
+ case Primitive::kPrimByte:
__ LoadFromOffset(kLoadSignedByte, out.AsRegister<Register>(), base, offset);
break;
- }
- case Primitive::kPrimShort: {
+ case Primitive::kPrimShort:
__ LoadFromOffset(kLoadSignedHalfword, out.AsRegister<Register>(), base, offset);
break;
- }
- case Primitive::kPrimChar: {
+ case Primitive::kPrimChar:
__ LoadFromOffset(kLoadUnsignedHalfword, out.AsRegister<Register>(), base, offset);
break;
- }
case Primitive::kPrimInt:
- case Primitive::kPrimNot: {
__ LoadFromOffset(kLoadWord, out.AsRegister<Register>(), base, offset);
break;
+
+ case Primitive::kPrimNot: {
+ // /* HeapReference<Object> */ out = *(base + offset)
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ Location temp_loc = locations->GetTemp(0);
+ // Note that a potential implicit null check is handled in this
+ // CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier call.
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ instruction, out, base, offset, temp_loc, /* needs_null_check */ true);
+ if (is_volatile) {
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+ }
+ } else {
+ __ LoadFromOffset(kLoadWord, out.AsRegister<Register>(), base, offset);
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ if (is_volatile) {
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+ }
+ // If read barriers are enabled, emit read barriers other than
+ // Baker's using a slow path (and also unpoison the loaded
+ // reference, if heap poisoning is enabled).
+ codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
+ }
+ break;
}
- case Primitive::kPrimLong: {
+ case Primitive::kPrimLong:
if (is_volatile && !atomic_ldrd_strd) {
GenerateWideAtomicLoad(base, offset,
out.AsRegisterPairLow<Register>(),
@@ -4140,12 +4210,10 @@ void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction,
__ LoadFromOffset(kLoadWordPair, out.AsRegisterPairLow<Register>(), base, offset);
}
break;
- }
- case Primitive::kPrimFloat: {
+ case Primitive::kPrimFloat:
__ LoadSFromOffset(out.AsFpuRegister<SRegister>(), base, offset);
break;
- }
case Primitive::kPrimDouble: {
DRegister out_reg = FromLowSToD(out.AsFpuRegisterPairLow<SRegister>());
@@ -4167,17 +4235,20 @@ void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction,
UNREACHABLE();
}
- // Doubles are handled in the switch.
- if (field_type != Primitive::kPrimDouble) {
+ if (field_type == Primitive::kPrimNot || field_type == Primitive::kPrimDouble) {
+ // Potential implicit null checks, in the case of reference or
+ // double fields, are handled in the previous switch statement.
+ } else {
codegen_->MaybeRecordImplicitNullCheck(instruction);
}
if (is_volatile) {
- GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
- }
-
- if (field_type == Primitive::kPrimNot) {
- codegen_->MaybeGenerateReadBarrier(instruction, out, out, base_loc, offset);
+ if (field_type == Primitive::kPrimNot) {
+ // Memory barriers, in the case of references, are also handled
+ // in the previous switch statement.
+ } else {
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+ }
}
}
@@ -4340,6 +4411,11 @@ void LocationsBuilderARM::VisitArrayGet(HArrayGet* instruction) {
Location::RequiresRegister(),
object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
}
+ // We need a temporary register for the read barrier marking slow
+ // path in CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier.
+ if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
@@ -4347,12 +4423,13 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
Location obj_loc = locations->InAt(0);
Register obj = obj_loc.AsRegister<Register>();
Location index = locations->InAt(1);
- Primitive::Type type = instruction->GetType();
+ Location out_loc = locations->Out();
+ Primitive::Type type = instruction->GetType();
switch (type) {
case Primitive::kPrimBoolean: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
- Register out = locations->Out().AsRegister<Register>();
+ Register out = out_loc.AsRegister<Register>();
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
@@ -4366,7 +4443,7 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
case Primitive::kPrimByte: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value();
- Register out = locations->Out().AsRegister<Register>();
+ Register out = out_loc.AsRegister<Register>();
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
@@ -4380,7 +4457,7 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
case Primitive::kPrimShort: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value();
- Register out = locations->Out().AsRegister<Register>();
+ Register out = out_loc.AsRegister<Register>();
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
@@ -4394,7 +4471,7 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
case Primitive::kPrimChar: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
- Register out = locations->Out().AsRegister<Register>();
+ Register out = out_loc.AsRegister<Register>();
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
@@ -4406,13 +4483,9 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
break;
}
- case Primitive::kPrimInt:
- case Primitive::kPrimNot: {
- static_assert(
- sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
- "art::mirror::HeapReference<mirror::Object> and int32_t have different sizes.");
+ case Primitive::kPrimInt: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
- Register out = locations->Out().AsRegister<Register>();
+ Register out = out_loc.AsRegister<Register>();
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
@@ -4424,44 +4497,79 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
break;
}
+ case Primitive::kPrimNot: {
+ static_assert(
+ sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+ uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+ // /* HeapReference<Object> */ out =
+ // *(obj + data_offset + index * sizeof(HeapReference<Object>))
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ Location temp = locations->GetTemp(0);
+ // Note that a potential implicit null check is handled in this
+ // CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier call.
+ codegen_->GenerateArrayLoadWithBakerReadBarrier(
+ instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true);
+ } else {
+ Register out = out_loc.AsRegister<Register>();
+ if (index.IsConstant()) {
+ size_t offset =
+ (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+ __ LoadFromOffset(kLoadWord, out, obj, offset);
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ // If read barriers are enabled, emit read barriers other than
+ // Baker's using a slow path (and also unpoison the loaded
+ // reference, if heap poisoning is enabled).
+ codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
+ } else {
+ __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
+ __ LoadFromOffset(kLoadWord, out, IP, data_offset);
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ // If read barriers are enabled, emit read barriers other than
+ // Baker's using a slow path (and also unpoison the loaded
+ // reference, if heap poisoning is enabled).
+ codegen_->MaybeGenerateReadBarrierSlow(
+ instruction, out_loc, out_loc, obj_loc, data_offset, index);
+ }
+ }
+ break;
+ }
+
case Primitive::kPrimLong: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
- Location out = locations->Out();
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
- __ LoadFromOffset(kLoadWordPair, out.AsRegisterPairLow<Register>(), obj, offset);
+ __ LoadFromOffset(kLoadWordPair, out_loc.AsRegisterPairLow<Register>(), obj, offset);
} else {
__ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_8));
- __ LoadFromOffset(kLoadWordPair, out.AsRegisterPairLow<Register>(), IP, data_offset);
+ __ LoadFromOffset(kLoadWordPair, out_loc.AsRegisterPairLow<Register>(), IP, data_offset);
}
break;
}
case Primitive::kPrimFloat: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
- Location out = locations->Out();
- DCHECK(out.IsFpuRegister());
+ SRegister out = out_loc.AsFpuRegister<SRegister>();
if (index.IsConstant()) {
size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
- __ LoadSFromOffset(out.AsFpuRegister<SRegister>(), obj, offset);
+ __ LoadSFromOffset(out, obj, offset);
} else {
__ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
- __ LoadSFromOffset(out.AsFpuRegister<SRegister>(), IP, data_offset);
+ __ LoadSFromOffset(out, IP, data_offset);
}
break;
}
case Primitive::kPrimDouble: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
- Location out = locations->Out();
- DCHECK(out.IsFpuRegisterPair());
+ SRegister out = out_loc.AsFpuRegisterPairLow<SRegister>();
if (index.IsConstant()) {
size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
- __ LoadDFromOffset(FromLowSToD(out.AsFpuRegisterPairLow<SRegister>()), obj, offset);
+ __ LoadDFromOffset(FromLowSToD(out), obj, offset);
} else {
__ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_8));
- __ LoadDFromOffset(FromLowSToD(out.AsFpuRegisterPairLow<SRegister>()), IP, data_offset);
+ __ LoadDFromOffset(FromLowSToD(out), IP, data_offset);
}
break;
}
@@ -4470,20 +4578,12 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
LOG(FATAL) << "Unreachable type " << type;
UNREACHABLE();
}
- codegen_->MaybeRecordImplicitNullCheck(instruction);
if (type == Primitive::kPrimNot) {
- static_assert(
- sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
- "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
- uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
- Location out = locations->Out();
- if (index.IsConstant()) {
- uint32_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
- codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset);
- } else {
- codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, data_offset, index);
- }
+ // Potential implicit null checks, in the case of reference
+ // arrays, are handled in the previous switch statement.
+ } else {
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
}
}
@@ -4574,6 +4674,7 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) {
__ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
__ StoreToOffset(kStoreWord, source, IP, data_offset);
}
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
DCHECK(!needs_write_barrier);
DCHECK(!may_need_runtime_call_for_type_check);
break;
@@ -4615,12 +4716,12 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) {
// __ Mov(temp2, temp1);
// // /* HeapReference<Class> */ temp1 = temp1->component_type_
// __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
- // codegen_->GenerateReadBarrier(
+ // codegen_->GenerateReadBarrierSlow(
// instruction, temp1_loc, temp1_loc, temp2_loc, component_offset);
//
// // /* HeapReference<Class> */ temp2 = value->klass_
// __ LoadFromOffset(kLoadWord, temp2, value, class_offset);
- // codegen_->GenerateReadBarrier(
+ // codegen_->GenerateReadBarrierSlow(
// instruction, temp2_loc, temp2_loc, value_loc, class_offset, temp1_loc);
//
// __ cmp(temp1, ShifterOperand(temp2));
@@ -4717,8 +4818,6 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) {
__ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
__ StoreToOffset(kStoreWord, value, IP, data_offset);
}
-
- codegen_->MaybeRecordImplicitNullCheck(instruction);
break;
}
@@ -4770,8 +4869,8 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) {
UNREACHABLE();
}
- // Ints and objects are handled in the switch.
- if (value_type != Primitive::kPrimInt && value_type != Primitive::kPrimNot) {
+ // Objects are handled in the switch.
+ if (value_type != Primitive::kPrimNot) {
codegen_->MaybeRecordImplicitNullCheck(instruction);
}
}
@@ -5140,16 +5239,9 @@ void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) {
if (cls->IsReferrersClass()) {
DCHECK(!cls->CanCallRuntime());
DCHECK(!cls->MustGenerateClinitCheck());
- uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
- if (kEmitCompilerReadBarrier) {
- // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
- __ AddConstant(out, current_method, declaring_class_offset);
- // /* mirror::Class* */ out = out->Read()
- codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
- } else {
- // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
- __ LoadFromOffset(kLoadWord, out, current_method, declaring_class_offset);
- }
+ // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+ GenerateGcRootFieldLoad(
+ cls, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
} else {
// /* GcRoot<mirror::Class>[] */ out =
// current_method.ptr_sized_fields_->dex_cache_resolved_types_
@@ -5157,17 +5249,8 @@ void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) {
out,
current_method,
ArtMethod::DexCacheResolvedTypesOffset(kArmPointerSize).Int32Value());
-
- size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex());
- if (kEmitCompilerReadBarrier) {
- // /* GcRoot<mirror::Class>* */ out = &out[type_index]
- __ AddConstant(out, out, cache_offset);
- // /* mirror::Class* */ out = out->Read()
- codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
- } else {
- // /* GcRoot<mirror::Class> */ out = out[type_index]
- __ LoadFromOffset(kLoadWord, out, out, cache_offset);
- }
+ // /* GcRoot<mirror::Class> */ out = out[type_index]
+ GenerateGcRootFieldLoad(cls, out_loc, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
DCHECK(cls->CanCallRuntime());
@@ -5230,30 +5313,14 @@ void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) {
Register out = out_loc.AsRegister<Register>();
Register current_method = locations->InAt(0).AsRegister<Register>();
- uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
- if (kEmitCompilerReadBarrier) {
- // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
- __ AddConstant(out, current_method, declaring_class_offset);
- // /* mirror::Class* */ out = out->Read()
- codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
- } else {
- // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
- __ LoadFromOffset(kLoadWord, out, current_method, declaring_class_offset);
- }
-
+ // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+ GenerateGcRootFieldLoad(
+ load, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
// /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
__ LoadFromOffset(kLoadWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value());
-
- size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex());
- if (kEmitCompilerReadBarrier) {
- // /* GcRoot<mirror::String>* */ out = &out[string_index]
- __ AddConstant(out, out, cache_offset);
- // /* mirror::String* */ out = out->Read()
- codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
- } else {
- // /* GcRoot<mirror::String> */ out = out[string_index]
- __ LoadFromOffset(kLoadWord, out, out, cache_offset);
- }
+ // /* GcRoot<mirror::String> */ out = out[string_index]
+ GenerateGcRootFieldLoad(
+ load, out_loc, out, CodeGenerator::GetCacheOffset(load->GetStringIndex()));
if (!load->IsInDexCache()) {
SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM(load);
@@ -5300,6 +5367,14 @@ void InstructionCodeGeneratorARM::VisitThrow(HThrow* instruction) {
CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
}
+static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
+ return kEmitCompilerReadBarrier &&
+ (kUseBakerReadBarrier ||
+ type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+ type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+ type_check_kind == TypeCheckKind::kArrayObjectCheck);
+}
+
void LocationsBuilderARM::VisitInstanceOf(HInstanceOf* instruction) {
LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
@@ -5326,21 +5401,22 @@ void LocationsBuilderARM::VisitInstanceOf(HInstanceOf* instruction) {
locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
// When read barriers are enabled, we need a temporary register for
// some cases.
- if (kEmitCompilerReadBarrier &&
- (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
- type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
- type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+ if (TypeCheckNeedsATemporary(type_check_kind)) {
locations->AddTemp(Location::RequiresRegister());
}
}
void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
+ TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
LocationSummary* locations = instruction->GetLocations();
Location obj_loc = locations->InAt(0);
Register obj = obj_loc.AsRegister<Register>();
Register cls = locations->InAt(1).AsRegister<Register>();
Location out_loc = locations->Out();
Register out = out_loc.AsRegister<Register>();
+ Location temp_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+ locations->GetTemp(0) :
+ Location::NoLocation();
uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
@@ -5355,10 +5431,9 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
}
// /* HeapReference<Class> */ out = obj->klass_
- __ LoadFromOffset(kLoadWord, out, obj, class_offset);
- codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, temp_loc);
- switch (instruction->GetTypeCheckKind()) {
+ switch (type_check_kind) {
case TypeCheckKind::kExactCheck: {
__ cmp(out, ShifterOperand(cls));
// Classes must be equal for the instanceof to succeed.
@@ -5373,17 +5448,8 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
// object to avoid doing a comparison we know will fail.
Label loop;
__ Bind(&loop);
- Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
- if (kEmitCompilerReadBarrier) {
- // Save the value of `out` into `temp` before overwriting it
- // in the following move operation, as we will need it for the
- // read barrier below.
- Register temp = temp_loc.AsRegister<Register>();
- __ Mov(temp, out);
- }
// /* HeapReference<Class> */ out = out->super_class_
- __ LoadFromOffset(kLoadWord, out, out, super_offset);
- codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
+ GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, temp_loc);
// If `out` is null, we use it for the result, and jump to `done`.
__ CompareAndBranchIfZero(out, &done);
__ cmp(out, ShifterOperand(cls));
@@ -5401,17 +5467,8 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
__ Bind(&loop);
__ cmp(out, ShifterOperand(cls));
__ b(&success, EQ);
- Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
- if (kEmitCompilerReadBarrier) {
- // Save the value of `out` into `temp` before overwriting it
- // in the following move operation, as we will need it for the
- // read barrier below.
- Register temp = temp_loc.AsRegister<Register>();
- __ Mov(temp, out);
- }
// /* HeapReference<Class> */ out = out->super_class_
- __ LoadFromOffset(kLoadWord, out, out, super_offset);
- codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
+ GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, temp_loc);
__ CompareAndBranchIfNonZero(out, &loop);
// If `out` is null, we use it for the result, and jump to `done`.
__ b(&done);
@@ -5429,17 +5486,8 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
__ cmp(out, ShifterOperand(cls));
__ b(&exact_check, EQ);
// Otherwise, we need to check that the object's class is a non-primitive array.
- Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
- if (kEmitCompilerReadBarrier) {
- // Save the value of `out` into `temp` before overwriting it
- // in the following move operation, as we will need it for the
- // read barrier below.
- Register temp = temp_loc.AsRegister<Register>();
- __ Mov(temp, out);
- }
// /* HeapReference<Class> */ out = out->component_type_
- __ LoadFromOffset(kLoadWord, out, out, component_offset);
- codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset);
+ GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, temp_loc);
// If `out` is null, we use it for the result, and jump to `done`.
__ CompareAndBranchIfZero(out, &done);
__ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset);
@@ -5478,6 +5526,13 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
// HInstanceOf instruction (following the runtime calling
// convention), which might be cluttered by the potential first
// read barrier emission at the beginning of this method.
+ //
+ // TODO: Introduce a new runtime entry point taking the object
+ // to test (instead of its class) as argument, and let it deal
+ // with the read barrier issues. This will let us refactor this
+ // case of the `switch` code as it was previously (with a direct
+ // call to the runtime not using a type checking slow path).
+ // This should also be beneficial for the other cases above.
DCHECK(locations->OnlyCallsOnSlowPath());
slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM(instruction,
/* is_fatal */ false);
@@ -5532,27 +5587,27 @@ void LocationsBuilderARM::VisitCheckCast(HCheckCast* instruction) {
locations->AddTemp(Location::RequiresRegister());
// When read barriers are enabled, we need an additional temporary
// register for some cases.
- if (kEmitCompilerReadBarrier &&
- (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
- type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
- type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+ if (TypeCheckNeedsATemporary(type_check_kind)) {
locations->AddTemp(Location::RequiresRegister());
}
}
void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
+ TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
LocationSummary* locations = instruction->GetLocations();
Location obj_loc = locations->InAt(0);
Register obj = obj_loc.AsRegister<Register>();
Register cls = locations->InAt(1).AsRegister<Register>();
Location temp_loc = locations->GetTemp(0);
Register temp = temp_loc.AsRegister<Register>();
+ Location temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+ locations->GetTemp(1) :
+ Location::NoLocation();
uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
- TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
bool is_type_check_slow_path_fatal =
(type_check_kind == TypeCheckKind::kExactCheck ||
type_check_kind == TypeCheckKind::kAbstractClassCheck ||
@@ -5571,8 +5626,7 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
}
// /* HeapReference<Class> */ temp = obj->klass_
- __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
- codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
switch (type_check_kind) {
case TypeCheckKind::kExactCheck:
@@ -5589,18 +5643,8 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
// object to avoid doing a comparison we know will fail.
Label loop, compare_classes;
__ Bind(&loop);
- Location temp2_loc =
- kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
- if (kEmitCompilerReadBarrier) {
- // Save the value of `temp` into `temp2` before overwriting it
- // in the following move operation, as we will need it for the
- // read barrier below.
- Register temp2 = temp2_loc.AsRegister<Register>();
- __ Mov(temp2, temp);
- }
// /* HeapReference<Class> */ temp = temp->super_class_
- __ LoadFromOffset(kLoadWord, temp, temp, super_offset);
- codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+ GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, temp2_loc);
// If the class reference currently in `temp` is not null, jump
// to the `compare_classes` label to compare it with the checked
@@ -5612,8 +5656,7 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
// going into the slow path, as it has been overwritten in the
// meantime.
// /* HeapReference<Class> */ temp = obj->klass_
- __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
- codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
__ b(type_check_slow_path->GetEntryLabel());
__ Bind(&compare_classes);
@@ -5629,18 +5672,8 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
__ cmp(temp, ShifterOperand(cls));
__ b(&done, EQ);
- Location temp2_loc =
- kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
- if (kEmitCompilerReadBarrier) {
- // Save the value of `temp` into `temp2` before overwriting it
- // in the following move operation, as we will need it for the
- // read barrier below.
- Register temp2 = temp2_loc.AsRegister<Register>();
- __ Mov(temp2, temp);
- }
// /* HeapReference<Class> */ temp = temp->super_class_
- __ LoadFromOffset(kLoadWord, temp, temp, super_offset);
- codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+ GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, temp2_loc);
// If the class reference currently in `temp` is not null, jump
// back at the beginning of the loop.
@@ -5651,8 +5684,7 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
// going into the slow path, as it has been overwritten in the
// meantime.
// /* HeapReference<Class> */ temp = obj->klass_
- __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
- codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
__ b(type_check_slow_path->GetEntryLabel());
break;
}
@@ -5664,19 +5696,8 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
__ b(&done, EQ);
// Otherwise, we need to check that the object's class is a non-primitive array.
- Location temp2_loc =
- kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
- if (kEmitCompilerReadBarrier) {
- // Save the value of `temp` into `temp2` before overwriting it
- // in the following move operation, as we will need it for the
- // read barrier below.
- Register temp2 = temp2_loc.AsRegister<Register>();
- __ Mov(temp2, temp);
- }
// /* HeapReference<Class> */ temp = temp->component_type_
- __ LoadFromOffset(kLoadWord, temp, temp, component_offset);
- codegen_->MaybeGenerateReadBarrier(
- instruction, temp_loc, temp_loc, temp2_loc, component_offset);
+ GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, temp2_loc);
// If the component type is not null (i.e. the object is indeed
// an array), jump to label `check_non_primitive_component_type`
@@ -5689,8 +5710,7 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
// going into the slow path, as it has been overwritten in the
// meantime.
// /* HeapReference<Class> */ temp = obj->klass_
- __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
- codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
__ b(type_check_slow_path->GetEntryLabel());
__ Bind(&check_non_primitive_component_type);
@@ -5699,8 +5719,7 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
__ CompareAndBranchIfZero(temp, &done);
// Same comment as above regarding `temp` and the slow path.
// /* HeapReference<Class> */ temp = obj->klass_
- __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
- codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+ GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc);
__ b(type_check_slow_path->GetEntryLabel());
break;
}
@@ -5717,6 +5736,13 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
// instruction (following the runtime calling convention), which
// might be cluttered by the potential first read barrier
// emission at the beginning of this method.
+ //
+ // TODO: Introduce a new runtime entry point taking the object
+ // to test (instead of its class) as argument, and let it deal
+ // with the read barrier issues. This will let us refactor this
+ // case of the `switch` code as it was previously (with a direct
+ // call to the runtime not using a type checking slow path).
+ // This should also be beneficial for the other cases above.
__ b(type_check_slow_path->GetEntryLabel());
break;
}
@@ -5901,14 +5927,249 @@ void InstructionCodeGeneratorARM::HandleBitwiseOperation(HBinaryOperation* instr
}
}
-void CodeGeneratorARM::GenerateReadBarrier(HInstruction* instruction,
- Location out,
- Location ref,
- Location obj,
- uint32_t offset,
- Location index) {
+void InstructionCodeGeneratorARM::GenerateReferenceLoadOneRegister(HInstruction* instruction,
+ Location out,
+ uint32_t offset,
+ Location temp) {
+ Register out_reg = out.AsRegister<Register>();
+ if (kEmitCompilerReadBarrier) {
+ if (kUseBakerReadBarrier) {
+ // Load with fast path based Baker's read barrier.
+ // /* HeapReference<Object> */ out = *(out + offset)
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ instruction, out, out_reg, offset, temp, /* needs_null_check */ false);
+ } else {
+ // Load with slow path based read barrier.
+ // Save the value of `out` into `temp` before overwriting it
+ // in the following move operation, as we will need it for the
+ // read barrier below.
+ __ Mov(temp.AsRegister<Register>(), out_reg);
+ // /* HeapReference<Object> */ out = *(out + offset)
+ __ LoadFromOffset(kLoadWord, out_reg, out_reg, offset);
+ codegen_->GenerateReadBarrierSlow(instruction, out, out, temp, offset);
+ }
+ } else {
+ // Plain load with no read barrier.
+ // /* HeapReference<Object> */ out = *(out + offset)
+ __ LoadFromOffset(kLoadWord, out_reg, out_reg, offset);
+ __ MaybeUnpoisonHeapReference(out_reg);
+ }
+}
+
+void InstructionCodeGeneratorARM::GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+ Location out,
+ Location obj,
+ uint32_t offset,
+ Location temp) {
+ Register out_reg = out.AsRegister<Register>();
+ Register obj_reg = obj.AsRegister<Register>();
+ if (kEmitCompilerReadBarrier) {
+ if (kUseBakerReadBarrier) {
+ // Load with fast path based Baker's read barrier.
+ // /* HeapReference<Object> */ out = *(obj + offset)
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ instruction, out, obj_reg, offset, temp, /* needs_null_check */ false);
+ } else {
+ // Load with slow path based read barrier.
+ // /* HeapReference<Object> */ out = *(obj + offset)
+ __ LoadFromOffset(kLoadWord, out_reg, obj_reg, offset);
+ codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
+ }
+ } else {
+ // Plain load with no read barrier.
+ // /* HeapReference<Object> */ out = *(obj + offset)
+ __ LoadFromOffset(kLoadWord, out_reg, obj_reg, offset);
+ __ MaybeUnpoisonHeapReference(out_reg);
+ }
+}
+
+void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruction,
+ Location root,
+ Register obj,
+ uint32_t offset) {
+ Register root_reg = root.AsRegister<Register>();
+ if (kEmitCompilerReadBarrier) {
+ if (kUseBakerReadBarrier) {
+ // Fast path implementation of art::ReadBarrier::BarrierForRoot when
+ // Baker's read barrier are used:
+ //
+ // root = obj.field;
+ // if (Thread::Current()->GetIsGcMarking()) {
+ // root = ReadBarrier::Mark(root)
+ // }
+
+ // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+ __ LoadFromOffset(kLoadWord, root_reg, obj, offset);
+ static_assert(
+ sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
+ "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
+ "have different sizes.");
+ static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::CompressedReference<mirror::Object> and int32_t "
+ "have different sizes.");
+
+ // Slow path used to mark the GC root `root`.
+ SlowPathCode* slow_path =
+ new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, root, root);
+ codegen_->AddSlowPath(slow_path);
+
+ __ LoadFromOffset(
+ kLoadWord, IP, TR, Thread::IsGcMarkingOffset<kArmWordSize>().Int32Value());
+ __ CompareAndBranchIfNonZero(IP, slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+ } else {
+ // GC root loaded through a slow path for read barriers other
+ // than Baker's.
+ // /* GcRoot<mirror::Object>* */ root = obj + offset
+ __ AddConstant(root_reg, obj, offset);
+ // /* mirror::Object* */ root = root->Read()
+ codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
+ }
+ } else {
+ // Plain GC root load with no read barrier.
+ // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+ __ LoadFromOffset(kLoadWord, root_reg, obj, offset);
+ // Note that GC roots are not affected by heap poisoning, thus we
+ // do not have to unpoison `root_reg` here.
+ }
+}
+
+void CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ uint32_t offset,
+ Location temp,
+ bool needs_null_check) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+
+ // /* HeapReference<Object> */ ref = *(obj + offset)
+ Location no_index = Location::NoLocation();
+ GenerateReferenceLoadWithBakerReadBarrier(
+ instruction, ref, obj, offset, no_index, temp, needs_null_check);
+}
+
+void CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ uint32_t data_offset,
+ Location index,
+ Location temp,
+ bool needs_null_check) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+
+ // /* HeapReference<Object> */ ref =
+ // *(obj + data_offset + index * sizeof(HeapReference<Object>))
+ GenerateReferenceLoadWithBakerReadBarrier(
+ instruction, ref, obj, data_offset, index, temp, needs_null_check);
+}
+
+void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ uint32_t offset,
+ Location index,
+ Location temp,
+ bool needs_null_check) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+
+ // In slow path based read barriers, the read barrier call is
+ // inserted after the original load. However, in fast path based
+ // Baker's read barriers, we need to perform the load of
+ // mirror::Object::monitor_ *before* the original reference load.
+ // This load-load ordering is required by the read barrier.
+ // The fast path/slow path (for Baker's algorithm) should look like:
+ //
+ // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // HeapReference<Object> ref = *src; // Original reference load.
+ // bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+ // if (is_gray) {
+ // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path.
+ // }
+ //
+ // Note: the original implementation in ReadBarrier::Barrier is
+ // slightly more complex as:
+ // - it implements the load-load fence using a data dependency on
+ // the high-bits of rb_state, which are expected to be all zeroes;
+ // - it performs additional checks that we do not do here for
+ // performance reasons.
+
+ Register ref_reg = ref.AsRegister<Register>();
+ Register temp_reg = temp.AsRegister<Register>();
+ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+
+ // /* int32_t */ monitor = obj->monitor_
+ __ LoadFromOffset(kLoadWord, temp_reg, obj, monitor_offset);
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
+ // /* LockWord */ lock_word = LockWord(monitor)
+ static_assert(sizeof(LockWord) == sizeof(int32_t),
+ "art::LockWord and int32_t have different sizes.");
+ // /* uint32_t */ rb_state = lock_word.ReadBarrierState()
+ __ Lsr(temp_reg, temp_reg, LockWord::kReadBarrierStateShift);
+ __ and_(temp_reg, temp_reg, ShifterOperand(LockWord::kReadBarrierStateMask));
+ static_assert(
+ LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_,
+ "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_.");
+
+ // Introduce a dependency on the high bits of rb_state, which shall
+ // be all zeroes, to prevent load-load reordering, and without using
+ // a memory barrier (which would be more expensive).
+ // IP = rb_state & ~LockWord::kReadBarrierStateMask = 0
+ __ bic(IP, temp_reg, ShifterOperand(LockWord::kReadBarrierStateMask));
+ // obj is unchanged by this operation, but its value now depends on
+ // IP, which depends on temp_reg.
+ __ add(obj, obj, ShifterOperand(IP));
+
+ // The actual reference load.
+ if (index.IsValid()) {
+ static_assert(
+ sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+ // /* HeapReference<Object> */ ref =
+ // *(obj + offset + index * sizeof(HeapReference<Object>))
+ if (index.IsConstant()) {
+ size_t computed_offset =
+ (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset;
+ __ LoadFromOffset(kLoadWord, ref_reg, obj, computed_offset);
+ } else {
+ __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
+ __ LoadFromOffset(kLoadWord, ref_reg, IP, offset);
+ }
+ } else {
+ // /* HeapReference<Object> */ ref = *(obj + offset)
+ __ LoadFromOffset(kLoadWord, ref_reg, obj, offset);
+ }
+
+ // Object* ref = ref_addr->AsMirrorPtr()
+ __ MaybeUnpoisonHeapReference(ref_reg);
+
+ // Slow path used to mark the object `ref` when it is gray.
+ SlowPathCode* slow_path =
+ new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, ref, ref);
+ AddSlowPath(slow_path);
+
+ // if (rb_state == ReadBarrier::gray_ptr_)
+ // ref = ReadBarrier::Mark(ref);
+ __ cmp(temp_reg, ShifterOperand(ReadBarrier::gray_ptr_));
+ __ b(slow_path->GetEntryLabel(), EQ);
+ __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorARM::GenerateReadBarrierSlow(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index) {
DCHECK(kEmitCompilerReadBarrier);
+ // Insert a slow path based read barrier *after* the reference load.
+ //
// If heap poisoning is enabled, the unpoisoning of the loaded
// reference will be carried out by the runtime within the slow
// path.
@@ -5922,57 +6183,41 @@ void CodeGeneratorARM::GenerateReadBarrier(HInstruction* instruction,
ReadBarrierForHeapReferenceSlowPathARM(instruction, out, ref, obj, offset, index);
AddSlowPath(slow_path);
- // TODO: When read barrier has a fast path, add it here.
- /* Currently the read barrier call is inserted after the original load.
- * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the
- * original load. This load-load ordering is required by the read barrier.
- * The fast path/slow path (for Baker's algorithm) should look like:
- *
- * bool isGray = obj.LockWord & kReadBarrierMask;
- * lfence; // load fence or artificial data dependence to prevent load-load reordering
- * ref = obj.field; // this is the original load
- * if (isGray) {
- * ref = Mark(ref); // ideally the slow path just does Mark(ref)
- * }
- */
-
__ b(slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
}
-void CodeGeneratorARM::MaybeGenerateReadBarrier(HInstruction* instruction,
- Location out,
- Location ref,
- Location obj,
- uint32_t offset,
- Location index) {
+void CodeGeneratorARM::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index) {
if (kEmitCompilerReadBarrier) {
+ // Baker's read barriers shall be handled by the fast path
+ // (CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier).
+ DCHECK(!kUseBakerReadBarrier);
// If heap poisoning is enabled, unpoisoning will be taken care of
// by the runtime within the slow path.
- GenerateReadBarrier(instruction, out, ref, obj, offset, index);
+ GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
} else if (kPoisonHeapReferences) {
__ UnpoisonHeapReference(out.AsRegister<Register>());
}
}
-void CodeGeneratorARM::GenerateReadBarrierForRoot(HInstruction* instruction,
- Location out,
- Location root) {
+void CodeGeneratorARM::GenerateReadBarrierForRootSlow(HInstruction* instruction,
+ Location out,
+ Location root) {
DCHECK(kEmitCompilerReadBarrier);
+ // Insert a slow path based read barrier *after* the GC root load.
+ //
// Note that GC roots are not affected by heap poisoning, so we do
// not need to do anything special for this here.
SlowPathCode* slow_path =
new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathARM(instruction, out, root);
AddSlowPath(slow_path);
- // TODO: Implement a fast path for ReadBarrierForRoot, performing
- // the following operation (for Baker's algorithm):
- //
- // if (thread.tls32_.is_gc_marking) {
- // root = Mark(root);
- // }
-
__ b(slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
}
@@ -6304,7 +6549,7 @@ void InstructionCodeGeneratorARM::VisitPackedSwitch(HPackedSwitch* switch_instr)
}
if (num_entries - last_index == 2) {
// The last missing case_value.
- GenerateCompareWithImmediate(temp_reg, 1);
+ __ CmpConstant(temp_reg, 1);
__ b(codegen_->GetLabelOf(successors[last_index + 1]), EQ);
}
@@ -6364,7 +6609,7 @@ void InstructionCodeGeneratorARM::VisitArmDexCacheArraysBase(HArmDexCacheArraysB
void CodeGeneratorARM::MoveFromReturnRegister(Location trg, Primitive::Type type) {
if (!trg.IsValid()) {
- DCHECK(type == Primitive::kPrimVoid);
+ DCHECK_EQ(type, Primitive::kPrimVoid);
return;
}
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index b7c58e1248..26d6d63b31 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -188,7 +188,7 @@ class LocationsBuilderARM : public HGraphVisitor {
DISALLOW_COPY_AND_ASSIGN(LocationsBuilderARM);
};
-class InstructionCodeGeneratorARM : public HGraphVisitor {
+class InstructionCodeGeneratorARM : public InstructionCodeGenerator {
public:
InstructionCodeGeneratorARM(HGraph* graph, CodeGeneratorARM* codegen);
@@ -222,24 +222,57 @@ class InstructionCodeGeneratorARM : public HGraphVisitor {
void HandleLongRotate(LocationSummary* locations);
void HandleRotate(HRor* ror);
void HandleShift(HBinaryOperation* operation);
- void GenerateMemoryBarrier(MemBarrierKind kind);
+
void GenerateWideAtomicStore(Register addr, uint32_t offset,
Register value_lo, Register value_hi,
Register temp1, Register temp2,
HInstruction* instruction);
void GenerateWideAtomicLoad(Register addr, uint32_t offset,
Register out_lo, Register out_hi);
+
void HandleFieldSet(HInstruction* instruction,
const FieldInfo& field_info,
bool value_can_be_null);
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+
+ // Generate a heap reference load using one register `out`:
+ //
+ // out <- *(out + offset)
+ //
+ // while honoring heap poisoning and/or read barriers (if any).
+ // Register `temp` is used when generating a read barrier.
+ void GenerateReferenceLoadOneRegister(HInstruction* instruction,
+ Location out,
+ uint32_t offset,
+ Location temp);
+ // Generate a heap reference load using two different registers
+ // `out` and `obj`:
+ //
+ // out <- *(obj + offset)
+ //
+ // while honoring heap poisoning and/or read barriers (if any).
+ // Register `temp` is used when generating a Baker's read barrier.
+ void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+ Location out,
+ Location obj,
+ uint32_t offset,
+ Location temp);
+ // Generate a GC root reference load:
+ //
+ // root <- *(obj + offset)
+ //
+ // while honoring read barriers (if any).
+ void GenerateGcRootFieldLoad(HInstruction* instruction,
+ Location root,
+ Register obj,
+ uint32_t offset);
+
void GenerateImplicitNullCheck(HNullCheck* instruction);
void GenerateExplicitNullCheck(HNullCheck* instruction);
void GenerateTestAndBranch(HInstruction* instruction,
size_t condition_input_index,
Label* true_target,
Label* false_target);
- void GenerateCompareWithImmediate(Register left, int32_t right);
void GenerateCompareTestAndBranch(HCondition* condition,
Label* true_target,
Label* false_target);
@@ -346,6 +379,8 @@ class CodeGeneratorARM : public CodeGenerator {
// Emit a write barrier.
void MarkGCCard(Register temp, Register card, Register object, Register value, bool can_be_null);
+ void GenerateMemoryBarrier(MemBarrierKind kind);
+
Label* GetLabelOf(HBasicBlock* block) const {
return CommonGetLabelOf<Label>(block_labels_, block);
}
@@ -406,7 +441,26 @@ class CodeGeneratorARM : public CodeGenerator {
return &it->second;
}
- // Generate a read barrier for a heap reference within `instruction`.
+ // Fast path implementation of ReadBarrier::Barrier for a heap
+ // reference field load when Baker's read barriers are used.
+ void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location out,
+ Register obj,
+ uint32_t offset,
+ Location temp,
+ bool needs_null_check);
+ // Fast path implementation of ReadBarrier::Barrier for a heap
+ // reference array load when Baker's read barriers are used.
+ void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location out,
+ Register obj,
+ uint32_t data_offset,
+ Location index,
+ Location temp,
+ bool needs_null_check);
+
+ // Generate a read barrier for a heap reference within `instruction`
+ // using a slow path.
//
// A read barrier for an object reference read from the heap is
// implemented as a call to the artReadBarrierSlow runtime entry
@@ -423,23 +477,25 @@ class CodeGeneratorARM : public CodeGenerator {
// When `index` is provided (i.e. for array accesses), the offset
// value passed to artReadBarrierSlow is adjusted to take `index`
// into account.
- void GenerateReadBarrier(HInstruction* instruction,
- Location out,
- Location ref,
- Location obj,
- uint32_t offset,
- Location index = Location::NoLocation());
-
- // If read barriers are enabled, generate a read barrier for a heap reference.
- // If heap poisoning is enabled, also unpoison the reference in `out`.
- void MaybeGenerateReadBarrier(HInstruction* instruction,
- Location out,
- Location ref,
- Location obj,
- uint32_t offset,
- Location index = Location::NoLocation());
-
- // Generate a read barrier for a GC root within `instruction`.
+ void GenerateReadBarrierSlow(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index = Location::NoLocation());
+
+ // If read barriers are enabled, generate a read barrier for a heap
+ // reference using a slow path. If heap poisoning is enabled, also
+ // unpoison the reference in `out`.
+ void MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index = Location::NoLocation());
+
+ // Generate a read barrier for a GC root within `instruction` using
+ // a slow path.
//
// A read barrier for an object reference GC root is implemented as
// a call to the artReadBarrierForRootSlow runtime entry point,
@@ -449,9 +505,19 @@ class CodeGeneratorARM : public CodeGenerator {
//
// The `out` location contains the value returned by
// artReadBarrierForRootSlow.
- void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root);
+ void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root);
private:
+ // Factored implementation of GenerateFieldLoadWithBakerReadBarrier
+ // and GenerateArrayLoadWithBakerReadBarrier.
+ void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ uint32_t offset,
+ Location index,
+ Location temp,
+ bool needs_null_check);
+
Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp);
using MethodToLiteralMap = ArenaSafeMap<MethodReference, Literal*, MethodReferenceComparator>;
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index b49f42b6c8..a3150d3d22 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -477,24 +477,24 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 {
class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 {
public:
- explicit DeoptimizationSlowPathARM64(HInstruction* instruction)
+ explicit DeoptimizationSlowPathARM64(HDeoptimize* instruction)
: instruction_(instruction) {}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, instruction_->GetLocations());
- DCHECK(instruction_->IsDeoptimize());
- HDeoptimize* deoptimize = instruction_->AsDeoptimize();
- uint32_t dex_pc = deoptimize->GetDexPc();
- CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
- arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this);
+ arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize),
+ instruction_,
+ instruction_->GetDexPc(),
+ this);
CheckEntrypointTypes<kQuickDeoptimize, void, void>();
}
const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM64"; }
private:
- HInstruction* const instruction_;
+ HDeoptimize* const instruction_;
DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM64);
};
@@ -1605,7 +1605,7 @@ void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruct
InstructionCodeGeneratorARM64::InstructionCodeGeneratorARM64(HGraph* graph,
CodeGeneratorARM64* codegen)
- : HGraphVisitor(graph),
+ : InstructionCodeGenerator(graph, codegen),
assembler_(codegen->GetAssembler()),
codegen_(codegen) {}
@@ -2534,8 +2534,7 @@ void InstructionCodeGeneratorARM64::DivRemByPowerOfTwo(HBinaryOperation* instruc
Register out = OutputRegister(instruction);
Register dividend = InputRegisterAt(instruction, 0);
int64_t imm = Int64FromConstant(second.GetConstant());
- uint64_t abs_imm = static_cast<uint64_t>(std::abs(imm));
- DCHECK(IsPowerOfTwo(abs_imm));
+ uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm));
int ctz_imm = CTZ(abs_imm);
UseScratchRegisterScope temps(GetVIXLAssembler());
@@ -2627,7 +2626,7 @@ void InstructionCodeGeneratorARM64::GenerateDivRemIntegral(HBinaryOperation* ins
// Do not generate anything. DivZeroCheck would prevent any code to be executed.
} else if (imm == 1 || imm == -1) {
DivRemOneOrMinusOne(instruction);
- } else if (IsPowerOfTwo(std::abs(imm))) {
+ } else if (IsPowerOfTwo(AbsOrMin(imm))) {
DivRemByPowerOfTwo(instruction);
} else {
DCHECK(imm <= -2 || imm >= 2);
@@ -2940,9 +2939,8 @@ void LocationsBuilderARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
}
void InstructionCodeGeneratorARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
- SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena())
- DeoptimizationSlowPathARM64(deoptimize);
- codegen_->AddSlowPath(slow_path);
+ SlowPathCodeARM64* slow_path =
+ deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARM64>(deoptimize);
GenerateTestAndBranch(deoptimize,
/* condition_input_index */ 0,
slow_path->GetEntryLabel(),
@@ -2954,6 +2952,10 @@ void LocationsBuilderARM64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
}
void InstructionCodeGeneratorARM64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+ if (codegen_->HasStackMapAtCurrentPc()) {
+ // Ensure that we do not collide with the stack map of the previous instruction.
+ __ Nop();
+ }
codegen_->RecordPcInfo(info, info->GetDexPc());
}
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 0e90ac6345..f2ff89488e 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -186,7 +186,7 @@ class FieldAccessCallingConventionARM64 : public FieldAccessCallingConvention {
DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConventionARM64);
};
-class InstructionCodeGeneratorARM64 : public HGraphVisitor {
+class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator {
public:
InstructionCodeGeneratorARM64(HGraph* graph, CodeGeneratorARM64* codegen);
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 4648606da8..322912976e 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -444,19 +444,16 @@ class TypeCheckSlowPathMIPS : public SlowPathCodeMIPS {
class DeoptimizationSlowPathMIPS : public SlowPathCodeMIPS {
public:
- explicit DeoptimizationSlowPathMIPS(HInstruction* instruction)
+ explicit DeoptimizationSlowPathMIPS(HDeoptimize* instruction)
: instruction_(instruction) {}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, instruction_->GetLocations());
- DCHECK(instruction_->IsDeoptimize());
- HDeoptimize* deoptimize = instruction_->AsDeoptimize();
- uint32_t dex_pc = deoptimize->GetDexPc();
- CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize),
instruction_,
- dex_pc,
+ instruction_->GetDexPc(),
this,
IsDirectEntrypoint(kQuickDeoptimize));
CheckEntrypointTypes<kQuickDeoptimize, void, void>();
@@ -465,7 +462,7 @@ class DeoptimizationSlowPathMIPS : public SlowPathCodeMIPS {
const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS"; }
private:
- HInstruction* const instruction_;
+ HDeoptimize* const instruction_;
DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathMIPS);
};
@@ -608,9 +605,9 @@ void ParallelMoveResolverMIPS::EmitSwap(size_t index) {
// then swap the high 32 bits of the same FPR. mtc1 makes the high 32 bits of an FPR
// unpredictable and the following mfch1 will fail.
__ Mfc1(TMP, f1);
- __ Mfhc1(AT, f1);
+ __ MoveFromFpuHigh(AT, f1);
__ Mtc1(r2_l, f1);
- __ Mthc1(r2_h, f1);
+ __ MoveToFpuHigh(r2_h, f1);
__ Move(r2_l, TMP);
__ Move(r2_h, AT);
} else if (loc1.IsStackSlot() && loc2.IsStackSlot()) {
@@ -862,7 +859,7 @@ void CodeGeneratorMIPS::Move64(Location destination, Location source) {
Register dst_low = destination.AsRegisterPairLow<Register>();
FRegister src = source.AsFpuRegister<FRegister>();
__ Mfc1(dst_low, src);
- __ Mfhc1(dst_high, src);
+ __ MoveFromFpuHigh(dst_high, src);
} else {
DCHECK(source.IsDoubleStackSlot()) << "Cannot move from " << source << " to " << destination;
int32_t off = source.GetStackIndex();
@@ -875,7 +872,7 @@ void CodeGeneratorMIPS::Move64(Location destination, Location source) {
Register src_high = source.AsRegisterPairHigh<Register>();
Register src_low = source.AsRegisterPairLow<Register>();
__ Mtc1(src_low, dst);
- __ Mthc1(src_high, dst);
+ __ MoveToFpuHigh(src_high, dst);
} else if (source.IsFpuRegister()) {
__ MovD(destination.AsFpuRegister<FRegister>(), source.AsFpuRegister<FRegister>());
} else {
@@ -1241,7 +1238,7 @@ void InstructionCodeGeneratorMIPS::GenerateSuspendCheck(HSuspendCheck* instructi
InstructionCodeGeneratorMIPS::InstructionCodeGeneratorMIPS(HGraph* graph,
CodeGeneratorMIPS* codegen)
- : HGraphVisitor(graph),
+ : InstructionCodeGenerator(graph, codegen),
assembler_(codegen->GetAssembler()),
codegen_(codegen) {}
@@ -1511,7 +1508,7 @@ void InstructionCodeGeneratorMIPS::HandleBinaryOp(HBinaryOperation* instruction)
}
void LocationsBuilderMIPS::HandleShift(HBinaryOperation* instr) {
- DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
+ DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr() || instr->IsRor());
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
Primitive::Type type = instr->GetResultType();
@@ -1534,7 +1531,7 @@ void LocationsBuilderMIPS::HandleShift(HBinaryOperation* instr) {
static constexpr size_t kMipsBitsPerWord = kMipsWordSize * kBitsPerByte;
void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) {
- DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
+ DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr() || instr->IsRor());
LocationSummary* locations = instr->GetLocations();
Primitive::Type type = instr->GetType();
@@ -1542,30 +1539,58 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) {
bool use_imm = rhs_location.IsConstant();
Register rhs_reg = use_imm ? ZERO : rhs_location.AsRegister<Register>();
int64_t rhs_imm = use_imm ? CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()) : 0;
- uint32_t shift_mask = (type == Primitive::kPrimInt) ? kMaxIntShiftValue : kMaxLongShiftValue;
- uint32_t shift_value = rhs_imm & shift_mask;
- // Is the INS (Insert Bit Field) instruction supported?
- bool has_ins = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2();
+ const uint32_t shift_mask = (type == Primitive::kPrimInt)
+ ? kMaxIntShiftValue
+ : kMaxLongShiftValue;
+ const uint32_t shift_value = rhs_imm & shift_mask;
+ // Are the INS (Insert Bit Field) and ROTR instructions supported?
+ bool has_ins_rotr = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2();
switch (type) {
case Primitive::kPrimInt: {
Register dst = locations->Out().AsRegister<Register>();
Register lhs = locations->InAt(0).AsRegister<Register>();
if (use_imm) {
- if (instr->IsShl()) {
+ if (shift_value == 0) {
+ if (dst != lhs) {
+ __ Move(dst, lhs);
+ }
+ } else if (instr->IsShl()) {
__ Sll(dst, lhs, shift_value);
} else if (instr->IsShr()) {
__ Sra(dst, lhs, shift_value);
- } else {
+ } else if (instr->IsUShr()) {
__ Srl(dst, lhs, shift_value);
+ } else {
+ if (has_ins_rotr) {
+ __ Rotr(dst, lhs, shift_value);
+ } else {
+ __ Sll(TMP, lhs, (kMipsBitsPerWord - shift_value) & shift_mask);
+ __ Srl(dst, lhs, shift_value);
+ __ Or(dst, dst, TMP);
+ }
}
} else {
if (instr->IsShl()) {
__ Sllv(dst, lhs, rhs_reg);
} else if (instr->IsShr()) {
__ Srav(dst, lhs, rhs_reg);
- } else {
+ } else if (instr->IsUShr()) {
__ Srlv(dst, lhs, rhs_reg);
+ } else {
+ if (has_ins_rotr) {
+ __ Rotrv(dst, lhs, rhs_reg);
+ } else {
+ __ Subu(TMP, ZERO, rhs_reg);
+ // 32-bit shift instructions use the 5 least significant bits of the shift count, so
+ // shifting by `-rhs_reg` is equivalent to shifting by `(32 - rhs_reg) & 31`. The case
+ // when `rhs_reg & 31 == 0` is OK even though we don't shift `lhs` left all the way out
+ // by 32, because the result in this case is computed as `(lhs >> 0) | (lhs << 0)`,
+ // IOW, the OR'd values are equal.
+ __ Sllv(TMP, lhs, TMP);
+ __ Srlv(dst, lhs, rhs_reg);
+ __ Or(dst, dst, TMP);
+ }
}
}
break;
@@ -1580,7 +1605,7 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) {
if (shift_value == 0) {
codegen_->Move64(locations->Out(), locations->InAt(0));
} else if (shift_value < kMipsBitsPerWord) {
- if (has_ins) {
+ if (has_ins_rotr) {
if (instr->IsShl()) {
__ Srl(dst_high, lhs_low, kMipsBitsPerWord - shift_value);
__ Ins(dst_high, lhs_high, shift_value, kMipsBitsPerWord - shift_value);
@@ -1589,10 +1614,15 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) {
__ Srl(dst_low, lhs_low, shift_value);
__ Ins(dst_low, lhs_high, kMipsBitsPerWord - shift_value, shift_value);
__ Sra(dst_high, lhs_high, shift_value);
+ } else if (instr->IsUShr()) {
+ __ Srl(dst_low, lhs_low, shift_value);
+ __ Ins(dst_low, lhs_high, kMipsBitsPerWord - shift_value, shift_value);
+ __ Srl(dst_high, lhs_high, shift_value);
} else {
__ Srl(dst_low, lhs_low, shift_value);
__ Ins(dst_low, lhs_high, kMipsBitsPerWord - shift_value, shift_value);
__ Srl(dst_high, lhs_high, shift_value);
+ __ Ins(dst_high, lhs_low, kMipsBitsPerWord - shift_value, shift_value);
}
} else {
if (instr->IsShl()) {
@@ -1605,24 +1635,51 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) {
__ Sll(TMP, lhs_high, kMipsBitsPerWord - shift_value);
__ Srl(dst_low, lhs_low, shift_value);
__ Or(dst_low, dst_low, TMP);
- } else {
+ } else if (instr->IsUShr()) {
__ Srl(dst_high, lhs_high, shift_value);
__ Sll(TMP, lhs_high, kMipsBitsPerWord - shift_value);
__ Srl(dst_low, lhs_low, shift_value);
__ Or(dst_low, dst_low, TMP);
+ } else {
+ __ Srl(TMP, lhs_low, shift_value);
+ __ Sll(dst_low, lhs_high, kMipsBitsPerWord - shift_value);
+ __ Or(dst_low, dst_low, TMP);
+ __ Srl(TMP, lhs_high, shift_value);
+ __ Sll(dst_high, lhs_low, kMipsBitsPerWord - shift_value);
+ __ Or(dst_high, dst_high, TMP);
}
}
} else {
- shift_value -= kMipsBitsPerWord;
+ const uint32_t shift_value_high = shift_value - kMipsBitsPerWord;
if (instr->IsShl()) {
- __ Sll(dst_high, lhs_low, shift_value);
+ __ Sll(dst_high, lhs_low, shift_value_high);
__ Move(dst_low, ZERO);
} else if (instr->IsShr()) {
- __ Sra(dst_low, lhs_high, shift_value);
+ __ Sra(dst_low, lhs_high, shift_value_high);
__ Sra(dst_high, dst_low, kMipsBitsPerWord - 1);
- } else {
- __ Srl(dst_low, lhs_high, shift_value);
+ } else if (instr->IsUShr()) {
+ __ Srl(dst_low, lhs_high, shift_value_high);
__ Move(dst_high, ZERO);
+ } else {
+ if (shift_value == kMipsBitsPerWord) {
+ // 64-bit rotation by 32 is just a swap.
+ __ Move(dst_low, lhs_high);
+ __ Move(dst_high, lhs_low);
+ } else {
+ if (has_ins_rotr) {
+ __ Srl(dst_low, lhs_high, shift_value_high);
+ __ Ins(dst_low, lhs_low, kMipsBitsPerWord - shift_value_high, shift_value_high);
+ __ Srl(dst_high, lhs_low, shift_value_high);
+ __ Ins(dst_high, lhs_high, kMipsBitsPerWord - shift_value_high, shift_value_high);
+ } else {
+ __ Sll(TMP, lhs_low, kMipsBitsPerWord - shift_value_high);
+ __ Srl(dst_low, lhs_high, shift_value_high);
+ __ Or(dst_low, dst_low, TMP);
+ __ Sll(TMP, lhs_high, kMipsBitsPerWord - shift_value_high);
+ __ Srl(dst_high, lhs_low, shift_value_high);
+ __ Or(dst_high, dst_high, TMP);
+ }
+ }
}
}
} else {
@@ -1649,7 +1706,7 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) {
__ Beqz(TMP, &done);
__ Move(dst_low, dst_high);
__ Sra(dst_high, dst_high, 31);
- } else {
+ } else if (instr->IsUShr()) {
__ Srlv(dst_high, lhs_high, rhs_reg);
__ Nor(AT, ZERO, rhs_reg);
__ Sll(TMP, lhs_high, 1);
@@ -1660,6 +1717,21 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) {
__ Beqz(TMP, &done);
__ Move(dst_low, dst_high);
__ Move(dst_high, ZERO);
+ } else {
+ __ Nor(AT, ZERO, rhs_reg);
+ __ Srlv(TMP, lhs_low, rhs_reg);
+ __ Sll(dst_low, lhs_high, 1);
+ __ Sllv(dst_low, dst_low, AT);
+ __ Or(dst_low, dst_low, TMP);
+ __ Srlv(TMP, lhs_high, rhs_reg);
+ __ Sll(dst_high, lhs_low, 1);
+ __ Sllv(dst_high, dst_high, AT);
+ __ Or(dst_high, dst_high, TMP);
+ __ Andi(TMP, rhs_reg, kMipsBitsPerWord);
+ __ Beqz(TMP, &done);
+ __ Move(TMP, dst_high);
+ __ Move(dst_high, dst_low);
+ __ Move(dst_low, TMP);
}
__ Bind(&done);
}
@@ -2314,8 +2386,7 @@ void InstructionCodeGeneratorMIPS::DivRemByPowerOfTwo(HBinaryOperation* instruct
Register out = locations->Out().AsRegister<Register>();
Register dividend = locations->InAt(0).AsRegister<Register>();
int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
- uint32_t abs_imm = static_cast<uint32_t>(std::abs(imm));
- DCHECK(IsPowerOfTwo(abs_imm));
+ uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
int ctz_imm = CTZ(abs_imm);
if (instruction->IsDiv()) {
@@ -2418,7 +2489,7 @@ void InstructionCodeGeneratorMIPS::GenerateDivRemIntegral(HBinaryOperation* inst
// Do not generate anything. DivZeroCheck would prevent any code to be executed.
} else if (imm == 1 || imm == -1) {
DivRemOneOrMinusOne(instruction);
- } else if (IsPowerOfTwo(std::abs(imm))) {
+ } else if (IsPowerOfTwo(AbsOrMin(imm))) {
DivRemByPowerOfTwo(instruction);
} else {
DCHECK(imm <= -2 || imm >= 2);
@@ -3358,8 +3429,8 @@ void LocationsBuilderMIPS::VisitDeoptimize(HDeoptimize* deoptimize) {
}
void InstructionCodeGeneratorMIPS::VisitDeoptimize(HDeoptimize* deoptimize) {
- SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) DeoptimizationSlowPathMIPS(deoptimize);
- codegen_->AddSlowPath(slow_path);
+ SlowPathCodeMIPS* slow_path =
+ deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathMIPS>(deoptimize);
GenerateTestAndBranch(deoptimize,
/* condition_input_index */ 0,
slow_path->GetEntryLabel(),
@@ -3371,6 +3442,10 @@ void LocationsBuilderMIPS::VisitNativeDebugInfo(HNativeDebugInfo* info) {
}
void InstructionCodeGeneratorMIPS::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+ if (codegen_->HasStackMapAtCurrentPc()) {
+ // Ensure that we do not collide with the stack map of the previous instruction.
+ __ Nop();
+ }
codegen_->RecordPcInfo(info, info->GetDexPc());
}
@@ -3457,8 +3532,8 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction,
// Need to move to FP regs since FP results are returned in core registers.
__ Mtc1(locations->GetTemp(1).AsRegister<Register>(),
locations->Out().AsFpuRegister<FRegister>());
- __ Mthc1(locations->GetTemp(2).AsRegister<Register>(),
- locations->Out().AsFpuRegister<FRegister>());
+ __ MoveToFpuHigh(locations->GetTemp(2).AsRegister<Register>(),
+ locations->Out().AsFpuRegister<FRegister>());
}
} else {
if (!Primitive::IsFloatingPointType(type)) {
@@ -3578,8 +3653,8 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction,
// Pass FP parameters in core registers.
__ Mfc1(locations->GetTemp(1).AsRegister<Register>(),
locations->InAt(1).AsFpuRegister<FRegister>());
- __ Mfhc1(locations->GetTemp(2).AsRegister<Register>(),
- locations->InAt(1).AsFpuRegister<FRegister>());
+ __ MoveFromFpuHigh(locations->GetTemp(2).AsRegister<Register>(),
+ locations->InAt(1).AsFpuRegister<FRegister>());
}
codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pA64Store),
instruction,
@@ -4536,14 +4611,12 @@ void InstructionCodeGeneratorMIPS::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UN
codegen_->GenerateFrameExit();
}
-void LocationsBuilderMIPS::VisitRor(HRor* ror ATTRIBUTE_UNUSED) {
- LOG(FATAL) << "Unreachable";
- UNREACHABLE();
+void LocationsBuilderMIPS::VisitRor(HRor* ror) {
+ HandleShift(ror);
}
-void InstructionCodeGeneratorMIPS::VisitRor(HRor* ror ATTRIBUTE_UNUSED) {
- LOG(FATAL) << "Unreachable";
- UNREACHABLE();
+void InstructionCodeGeneratorMIPS::VisitRor(HRor* ror) {
+ HandleShift(ror);
}
void LocationsBuilderMIPS::VisitShl(HShl* shl) {
@@ -4731,6 +4804,7 @@ void LocationsBuilderMIPS::VisitTypeConversion(HTypeConversion* conversion) {
Primitive::Type input_type = conversion->GetInputType();
Primitive::Type result_type = conversion->GetResultType();
DCHECK_NE(input_type, result_type);
+ bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
if ((input_type == Primitive::kPrimNot) || (input_type == Primitive::kPrimVoid) ||
(result_type == Primitive::kPrimNot) || (result_type == Primitive::kPrimVoid)) {
@@ -4738,8 +4812,9 @@ void LocationsBuilderMIPS::VisitTypeConversion(HTypeConversion* conversion) {
}
LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
- if ((Primitive::IsFloatingPointType(result_type) && input_type == Primitive::kPrimLong) ||
- (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type))) {
+ if (!isR6 &&
+ ((Primitive::IsFloatingPointType(result_type) && input_type == Primitive::kPrimLong) ||
+ (result_type == Primitive::kPrimLong && Primitive::IsFloatingPointType(input_type)))) {
call_kind = LocationSummary::kCall;
}
@@ -4777,6 +4852,8 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi
Primitive::Type result_type = conversion->GetResultType();
Primitive::Type input_type = conversion->GetInputType();
bool has_sign_extension = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2();
+ bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
+ bool fpu_32bit = codegen_->GetInstructionSetFeatures().Is32BitFloatingPoint();
DCHECK_NE(input_type, result_type);
@@ -4822,7 +4899,37 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi
<< " to " << result_type;
}
} else if (Primitive::IsFloatingPointType(result_type) && Primitive::IsIntegralType(input_type)) {
- if (input_type != Primitive::kPrimLong) {
+ if (input_type == Primitive::kPrimLong) {
+ if (isR6) {
+ // cvt.s.l/cvt.d.l requires MIPSR2+ with FR=1. MIPS32R6 is implemented as a secondary
+ // architecture on top of MIPS64R6, which has FR=1, and therefore can use the instruction.
+ Register src_high = locations->InAt(0).AsRegisterPairHigh<Register>();
+ Register src_low = locations->InAt(0).AsRegisterPairLow<Register>();
+ FRegister dst = locations->Out().AsFpuRegister<FRegister>();
+ __ Mtc1(src_low, FTMP);
+ __ Mthc1(src_high, FTMP);
+ if (result_type == Primitive::kPrimFloat) {
+ __ Cvtsl(dst, FTMP);
+ } else {
+ __ Cvtdl(dst, FTMP);
+ }
+ } else {
+ int32_t entry_offset = (result_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pL2f)
+ : QUICK_ENTRY_POINT(pL2d);
+ bool direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickL2f)
+ : IsDirectEntrypoint(kQuickL2d);
+ codegen_->InvokeRuntime(entry_offset,
+ conversion,
+ conversion->GetDexPc(),
+ nullptr,
+ direct);
+ if (result_type == Primitive::kPrimFloat) {
+ CheckEntrypointTypes<kQuickL2f, float, int64_t>();
+ } else {
+ CheckEntrypointTypes<kQuickL2d, double, int64_t>();
+ }
+ }
+ } else {
Register src = locations->InAt(0).AsRegister<Register>();
FRegister dst = locations->Out().AsFpuRegister<FRegister>();
__ Mtc1(src, FTMP);
@@ -4831,54 +4938,168 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi
} else {
__ Cvtdw(dst, FTMP);
}
- } else {
- int32_t entry_offset = (result_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pL2f)
- : QUICK_ENTRY_POINT(pL2d);
- bool direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickL2f)
- : IsDirectEntrypoint(kQuickL2d);
- codegen_->InvokeRuntime(entry_offset,
- conversion,
- conversion->GetDexPc(),
- nullptr,
- direct);
- if (result_type == Primitive::kPrimFloat) {
- CheckEntrypointTypes<kQuickL2f, float, int64_t>();
- } else {
- CheckEntrypointTypes<kQuickL2d, double, int64_t>();
- }
}
} else if (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type)) {
CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong);
- int32_t entry_offset;
- bool direct;
- if (result_type != Primitive::kPrimLong) {
- entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2iz)
- : QUICK_ENTRY_POINT(pD2iz);
- direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickF2iz)
- : IsDirectEntrypoint(kQuickD2iz);
+ if (result_type == Primitive::kPrimLong) {
+ if (isR6) {
+ // trunc.l.s/trunc.l.d requires MIPSR2+ with FR=1. MIPS32R6 is implemented as a secondary
+ // architecture on top of MIPS64R6, which has FR=1, and therefore can use the instruction.
+ FRegister src = locations->InAt(0).AsFpuRegister<FRegister>();
+ Register dst_high = locations->Out().AsRegisterPairHigh<Register>();
+ Register dst_low = locations->Out().AsRegisterPairLow<Register>();
+ MipsLabel truncate;
+ MipsLabel done;
+
+ // When NAN2008=0 (R2 and before), the truncate instruction produces the maximum positive
+ // value when the input is either a NaN or is outside of the range of the output type
+ // after the truncation. IOW, the three special cases (NaN, too small, too big) produce
+ // the same result.
+ //
+ // When NAN2008=1 (R6), the truncate instruction caps the output at the minimum/maximum
+ // value of the output type if the input is outside of the range after the truncation or
+ // produces 0 when the input is a NaN. IOW, the three special cases produce three distinct
+ // results. This matches the desired float/double-to-int/long conversion exactly.
+ //
+ // So, NAN2008 affects handling of negative values and NaNs by the truncate instruction.
+ //
+ // The following code supports both NAN2008=0 and NAN2008=1 behaviors of the truncate
+ // instruction, the reason being that the emulator implements NAN2008=0 on MIPS64R6,
+ // even though it must be NAN2008=1 on R6.
+ //
+ // The code takes care of the different behaviors by first comparing the input to the
+ // minimum output value (-2**-63 for truncating to long, -2**-31 for truncating to int).
+ // If the input is greater than or equal to the minimum, it procedes to the truncate
+ // instruction, which will handle such an input the same way irrespective of NAN2008.
+ // Otherwise the input is compared to itself to determine whether it is a NaN or not
+ // in order to return either zero or the minimum value.
+ //
+ // TODO: simplify this when the emulator correctly implements NAN2008=1 behavior of the
+ // truncate instruction for MIPS64R6.
+ if (input_type == Primitive::kPrimFloat) {
+ uint32_t min_val = bit_cast<uint32_t, float>(std::numeric_limits<int64_t>::min());
+ __ LoadConst32(TMP, min_val);
+ __ Mtc1(TMP, FTMP);
+ __ CmpLeS(FTMP, FTMP, src);
+ } else {
+ uint64_t min_val = bit_cast<uint64_t, double>(std::numeric_limits<int64_t>::min());
+ __ LoadConst32(TMP, High32Bits(min_val));
+ __ Mtc1(ZERO, FTMP);
+ __ Mthc1(TMP, FTMP);
+ __ CmpLeD(FTMP, FTMP, src);
+ }
+
+ __ Bc1nez(FTMP, &truncate);
+
+ if (input_type == Primitive::kPrimFloat) {
+ __ CmpEqS(FTMP, src, src);
+ } else {
+ __ CmpEqD(FTMP, src, src);
+ }
+ __ Move(dst_low, ZERO);
+ __ LoadConst32(dst_high, std::numeric_limits<int32_t>::min());
+ __ Mfc1(TMP, FTMP);
+ __ And(dst_high, dst_high, TMP);
+
+ __ B(&done);
+
+ __ Bind(&truncate);
+
+ if (input_type == Primitive::kPrimFloat) {
+ __ TruncLS(FTMP, src);
+ } else {
+ __ TruncLD(FTMP, src);
+ }
+ __ Mfc1(dst_low, FTMP);
+ __ Mfhc1(dst_high, FTMP);
+
+ __ Bind(&done);
+ } else {
+ int32_t entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2l)
+ : QUICK_ENTRY_POINT(pD2l);
+ bool direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickF2l)
+ : IsDirectEntrypoint(kQuickD2l);
+ codegen_->InvokeRuntime(entry_offset, conversion, conversion->GetDexPc(), nullptr, direct);
+ if (input_type == Primitive::kPrimFloat) {
+ CheckEntrypointTypes<kQuickF2l, int64_t, float>();
+ } else {
+ CheckEntrypointTypes<kQuickD2l, int64_t, double>();
+ }
+ }
} else {
- entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2l)
- : QUICK_ENTRY_POINT(pD2l);
- direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickF2l)
- : IsDirectEntrypoint(kQuickD2l);
- }
- codegen_->InvokeRuntime(entry_offset,
- conversion,
- conversion->GetDexPc(),
- nullptr,
- direct);
- if (result_type != Primitive::kPrimLong) {
+ FRegister src = locations->InAt(0).AsFpuRegister<FRegister>();
+ Register dst = locations->Out().AsRegister<Register>();
+ MipsLabel truncate;
+ MipsLabel done;
+
+ // The following code supports both NAN2008=0 and NAN2008=1 behaviors of the truncate
+ // instruction, the reason being that the emulator implements NAN2008=0 on MIPS64R6,
+ // even though it must be NAN2008=1 on R6.
+ //
+ // For details see the large comment above for the truncation of float/double to long on R6.
+ //
+ // TODO: simplify this when the emulator correctly implements NAN2008=1 behavior of the
+ // truncate instruction for MIPS64R6.
if (input_type == Primitive::kPrimFloat) {
- CheckEntrypointTypes<kQuickF2iz, int32_t, float>();
+ uint32_t min_val = bit_cast<uint32_t, float>(std::numeric_limits<int32_t>::min());
+ __ LoadConst32(TMP, min_val);
+ __ Mtc1(TMP, FTMP);
} else {
- CheckEntrypointTypes<kQuickD2iz, int32_t, double>();
+ uint64_t min_val = bit_cast<uint64_t, double>(std::numeric_limits<int32_t>::min());
+ __ LoadConst32(TMP, High32Bits(min_val));
+ __ Mtc1(ZERO, FTMP);
+ if (fpu_32bit) {
+ __ Mtc1(TMP, static_cast<FRegister>(FTMP + 1));
+ } else {
+ __ Mthc1(TMP, FTMP);
+ }
}
- } else {
+
+ if (isR6) {
+ if (input_type == Primitive::kPrimFloat) {
+ __ CmpLeS(FTMP, FTMP, src);
+ } else {
+ __ CmpLeD(FTMP, FTMP, src);
+ }
+ __ Bc1nez(FTMP, &truncate);
+
+ if (input_type == Primitive::kPrimFloat) {
+ __ CmpEqS(FTMP, src, src);
+ } else {
+ __ CmpEqD(FTMP, src, src);
+ }
+ __ LoadConst32(dst, std::numeric_limits<int32_t>::min());
+ __ Mfc1(TMP, FTMP);
+ __ And(dst, dst, TMP);
+ } else {
+ if (input_type == Primitive::kPrimFloat) {
+ __ ColeS(0, FTMP, src);
+ } else {
+ __ ColeD(0, FTMP, src);
+ }
+ __ Bc1t(0, &truncate);
+
+ if (input_type == Primitive::kPrimFloat) {
+ __ CeqS(0, src, src);
+ } else {
+ __ CeqD(0, src, src);
+ }
+ __ LoadConst32(dst, std::numeric_limits<int32_t>::min());
+ __ Movf(dst, ZERO, 0);
+ }
+
+ __ B(&done);
+
+ __ Bind(&truncate);
+
if (input_type == Primitive::kPrimFloat) {
- CheckEntrypointTypes<kQuickF2l, int64_t, float>();
+ __ TruncWS(FTMP, src);
} else {
- CheckEntrypointTypes<kQuickD2l, int64_t, double>();
+ __ TruncWD(FTMP, src);
}
+ __ Mfc1(dst, FTMP);
+
+ __ Bind(&done);
}
} else if (Primitive::IsFloatingPointType(result_type) &&
Primitive::IsFloatingPointType(input_type)) {
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index 38302ad315..c3d4851ee9 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -197,7 +197,7 @@ class LocationsBuilderMIPS : public HGraphVisitor {
DISALLOW_COPY_AND_ASSIGN(LocationsBuilderMIPS);
};
-class InstructionCodeGeneratorMIPS : public HGraphVisitor {
+class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator {
public:
InstructionCodeGeneratorMIPS(HGraph* graph, CodeGeneratorMIPS* codegen);
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 05834ff063..38c32cad06 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -391,24 +391,24 @@ class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
class DeoptimizationSlowPathMIPS64 : public SlowPathCodeMIPS64 {
public:
- explicit DeoptimizationSlowPathMIPS64(HInstruction* instruction)
+ explicit DeoptimizationSlowPathMIPS64(HDeoptimize* instruction)
: instruction_(instruction) {}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, instruction_->GetLocations());
- DCHECK(instruction_->IsDeoptimize());
- HDeoptimize* deoptimize = instruction_->AsDeoptimize();
- uint32_t dex_pc = deoptimize->GetDexPc();
- CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
- mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this);
+ mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize),
+ instruction_,
+ instruction_->GetDexPc(),
+ this);
CheckEntrypointTypes<kQuickDeoptimize, void, void>();
}
const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS64"; }
private:
- HInstruction* const instruction_;
+ HDeoptimize* const instruction_;
DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathMIPS64);
};
@@ -1113,7 +1113,7 @@ void InstructionCodeGeneratorMIPS64::GenerateSuspendCheck(HSuspendCheck* instruc
InstructionCodeGeneratorMIPS64::InstructionCodeGeneratorMIPS64(HGraph* graph,
CodeGeneratorMIPS64* codegen)
- : HGraphVisitor(graph),
+ : InstructionCodeGenerator(graph, codegen),
assembler_(codegen->GetAssembler()),
codegen_(codegen) {}
@@ -1247,7 +1247,7 @@ void InstructionCodeGeneratorMIPS64::HandleBinaryOp(HBinaryOperation* instructio
}
void LocationsBuilderMIPS64::HandleShift(HBinaryOperation* instr) {
- DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
+ DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr() || instr->IsRor());
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
Primitive::Type type = instr->GetResultType();
@@ -1265,7 +1265,7 @@ void LocationsBuilderMIPS64::HandleShift(HBinaryOperation* instr) {
}
void InstructionCodeGeneratorMIPS64::HandleShift(HBinaryOperation* instr) {
- DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
+ DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr() || instr->IsRor());
LocationSummary* locations = instr->GetLocations();
Primitive::Type type = instr->GetType();
@@ -1290,13 +1290,19 @@ void InstructionCodeGeneratorMIPS64::HandleShift(HBinaryOperation* instr) {
? static_cast<uint32_t>(rhs_imm & kMaxIntShiftValue)
: static_cast<uint32_t>(rhs_imm & kMaxLongShiftValue);
- if (type == Primitive::kPrimInt) {
+ if (shift_value == 0) {
+ if (dst != lhs) {
+ __ Move(dst, lhs);
+ }
+ } else if (type == Primitive::kPrimInt) {
if (instr->IsShl()) {
__ Sll(dst, lhs, shift_value);
} else if (instr->IsShr()) {
__ Sra(dst, lhs, shift_value);
- } else {
+ } else if (instr->IsUShr()) {
__ Srl(dst, lhs, shift_value);
+ } else {
+ __ Rotr(dst, lhs, shift_value);
}
} else {
if (shift_value < 32) {
@@ -1304,8 +1310,10 @@ void InstructionCodeGeneratorMIPS64::HandleShift(HBinaryOperation* instr) {
__ Dsll(dst, lhs, shift_value);
} else if (instr->IsShr()) {
__ Dsra(dst, lhs, shift_value);
- } else {
+ } else if (instr->IsUShr()) {
__ Dsrl(dst, lhs, shift_value);
+ } else {
+ __ Drotr(dst, lhs, shift_value);
}
} else {
shift_value -= 32;
@@ -1313,8 +1321,10 @@ void InstructionCodeGeneratorMIPS64::HandleShift(HBinaryOperation* instr) {
__ Dsll32(dst, lhs, shift_value);
} else if (instr->IsShr()) {
__ Dsra32(dst, lhs, shift_value);
- } else {
+ } else if (instr->IsUShr()) {
__ Dsrl32(dst, lhs, shift_value);
+ } else {
+ __ Drotr32(dst, lhs, shift_value);
}
}
}
@@ -1324,16 +1334,20 @@ void InstructionCodeGeneratorMIPS64::HandleShift(HBinaryOperation* instr) {
__ Sllv(dst, lhs, rhs_reg);
} else if (instr->IsShr()) {
__ Srav(dst, lhs, rhs_reg);
- } else {
+ } else if (instr->IsUShr()) {
__ Srlv(dst, lhs, rhs_reg);
+ } else {
+ __ Rotrv(dst, lhs, rhs_reg);
}
} else {
if (instr->IsShl()) {
__ Dsllv(dst, lhs, rhs_reg);
} else if (instr->IsShr()) {
__ Dsrav(dst, lhs, rhs_reg);
- } else {
+ } else if (instr->IsUShr()) {
__ Dsrlv(dst, lhs, rhs_reg);
+ } else {
+ __ Drotrv(dst, lhs, rhs_reg);
}
}
}
@@ -1955,8 +1969,7 @@ void InstructionCodeGeneratorMIPS64::DivRemByPowerOfTwo(HBinaryOperation* instru
GpuRegister out = locations->Out().AsRegister<GpuRegister>();
GpuRegister dividend = locations->InAt(0).AsRegister<GpuRegister>();
int64_t imm = Int64FromConstant(second.GetConstant());
- uint64_t abs_imm = static_cast<uint64_t>(std::abs(imm));
- DCHECK(IsPowerOfTwo(abs_imm));
+ uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm));
int ctz_imm = CTZ(abs_imm);
if (instruction->IsDiv()) {
@@ -2138,7 +2151,7 @@ void InstructionCodeGeneratorMIPS64::GenerateDivRemIntegral(HBinaryOperation* in
// Do not generate anything. DivZeroCheck would prevent any code to be executed.
} else if (imm == 1 || imm == -1) {
DivRemOneOrMinusOne(instruction);
- } else if (IsPowerOfTwo(std::abs(imm))) {
+ } else if (IsPowerOfTwo(AbsOrMin(imm))) {
DivRemByPowerOfTwo(instruction);
} else {
DCHECK(imm <= -2 || imm >= 2);
@@ -2736,9 +2749,8 @@ void LocationsBuilderMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) {
}
void InstructionCodeGeneratorMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) {
- SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena())
- DeoptimizationSlowPathMIPS64(deoptimize);
- codegen_->AddSlowPath(slow_path);
+ SlowPathCodeMIPS64* slow_path =
+ deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathMIPS64>(deoptimize);
GenerateTestAndBranch(deoptimize,
/* condition_input_index */ 0,
slow_path->GetEntryLabel(),
@@ -2750,6 +2762,10 @@ void LocationsBuilderMIPS64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
}
void InstructionCodeGeneratorMIPS64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+ if (codegen_->HasStackMapAtCurrentPc()) {
+ // Ensure that we do not collide with the stack map of the previous instruction.
+ __ Nop();
+ }
codegen_->RecordPcInfo(info, info->GetDexPc());
}
@@ -3722,14 +3738,12 @@ void InstructionCodeGeneratorMIPS64::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_
codegen_->GenerateFrameExit();
}
-void LocationsBuilderMIPS64::VisitRor(HRor* ror ATTRIBUTE_UNUSED) {
- LOG(FATAL) << "Unreachable";
- UNREACHABLE();
+void LocationsBuilderMIPS64::VisitRor(HRor* ror) {
+ HandleShift(ror);
}
-void InstructionCodeGeneratorMIPS64::VisitRor(HRor* ror ATTRIBUTE_UNUSED) {
- LOG(FATAL) << "Unreachable";
- UNREACHABLE();
+void InstructionCodeGeneratorMIPS64::VisitRor(HRor* ror) {
+ HandleShift(ror);
}
void LocationsBuilderMIPS64::VisitShl(HShl* shl) {
@@ -3918,36 +3932,18 @@ void LocationsBuilderMIPS64::VisitTypeConversion(HTypeConversion* conversion) {
LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type;
}
- LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
- if ((Primitive::IsFloatingPointType(result_type) && input_type == Primitive::kPrimLong) ||
- (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type))) {
- call_kind = LocationSummary::kCall;
- }
-
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind);
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(conversion);
- if (call_kind == LocationSummary::kNoCall) {
- if (Primitive::IsFloatingPointType(input_type)) {
- locations->SetInAt(0, Location::RequiresFpuRegister());
- } else {
- locations->SetInAt(0, Location::RequiresRegister());
- }
-
- if (Primitive::IsFloatingPointType(result_type)) {
- locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
- } else {
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
- }
+ if (Primitive::IsFloatingPointType(input_type)) {
+ locations->SetInAt(0, Location::RequiresFpuRegister());
} else {
- InvokeRuntimeCallingConvention calling_convention;
-
- if (Primitive::IsFloatingPointType(input_type)) {
- locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
- } else {
- locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
- }
+ locations->SetInAt(0, Location::RequiresRegister());
+ }
- locations->SetOut(calling_convention.GetReturnLocation(result_type));
+ if (Primitive::IsFloatingPointType(result_type)) {
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ } else {
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
}
}
@@ -3992,55 +3988,107 @@ void InstructionCodeGeneratorMIPS64::VisitTypeConversion(HTypeConversion* conver
<< " to " << result_type;
}
} else if (Primitive::IsFloatingPointType(result_type) && Primitive::IsIntegralType(input_type)) {
- if (input_type != Primitive::kPrimLong) {
- FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
- GpuRegister src = locations->InAt(0).AsRegister<GpuRegister>();
- __ Mtc1(src, FTMP);
+ FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
+ GpuRegister src = locations->InAt(0).AsRegister<GpuRegister>();
+ if (input_type == Primitive::kPrimLong) {
+ __ Dmtc1(src, FTMP);
if (result_type == Primitive::kPrimFloat) {
- __ Cvtsw(dst, FTMP);
+ __ Cvtsl(dst, FTMP);
} else {
- __ Cvtdw(dst, FTMP);
+ __ Cvtdl(dst, FTMP);
}
} else {
- int32_t entry_offset = (result_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pL2f)
- : QUICK_ENTRY_POINT(pL2d);
- codegen_->InvokeRuntime(entry_offset,
- conversion,
- conversion->GetDexPc(),
- nullptr);
+ __ Mtc1(src, FTMP);
if (result_type == Primitive::kPrimFloat) {
- CheckEntrypointTypes<kQuickL2f, float, int64_t>();
+ __ Cvtsw(dst, FTMP);
} else {
- CheckEntrypointTypes<kQuickL2d, double, int64_t>();
+ __ Cvtdw(dst, FTMP);
}
}
} else if (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type)) {
CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong);
- int32_t entry_offset;
- if (result_type != Primitive::kPrimLong) {
- entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2iz)
- : QUICK_ENTRY_POINT(pD2iz);
+ GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
+ FpuRegister src = locations->InAt(0).AsFpuRegister<FpuRegister>();
+ Mips64Label truncate;
+ Mips64Label done;
+
+ // When NAN2008=0 (R2 and before), the truncate instruction produces the maximum positive
+ // value when the input is either a NaN or is outside of the range of the output type
+ // after the truncation. IOW, the three special cases (NaN, too small, too big) produce
+ // the same result.
+ //
+ // When NAN2008=1 (R6), the truncate instruction caps the output at the minimum/maximum
+ // value of the output type if the input is outside of the range after the truncation or
+ // produces 0 when the input is a NaN. IOW, the three special cases produce three distinct
+ // results. This matches the desired float/double-to-int/long conversion exactly.
+ //
+ // So, NAN2008 affects handling of negative values and NaNs by the truncate instruction.
+ //
+ // The following code supports both NAN2008=0 and NAN2008=1 behaviors of the truncate
+ // instruction, the reason being that the emulator implements NAN2008=0 on MIPS64R6,
+ // even though it must be NAN2008=1 on R6.
+ //
+ // The code takes care of the different behaviors by first comparing the input to the
+ // minimum output value (-2**-63 for truncating to long, -2**-31 for truncating to int).
+ // If the input is greater than or equal to the minimum, it procedes to the truncate
+ // instruction, which will handle such an input the same way irrespective of NAN2008.
+ // Otherwise the input is compared to itself to determine whether it is a NaN or not
+ // in order to return either zero or the minimum value.
+ //
+ // TODO: simplify this when the emulator correctly implements NAN2008=1 behavior of the
+ // truncate instruction for MIPS64R6.
+ if (input_type == Primitive::kPrimFloat) {
+ uint32_t min_val = (result_type == Primitive::kPrimLong)
+ ? bit_cast<uint32_t, float>(std::numeric_limits<int64_t>::min())
+ : bit_cast<uint32_t, float>(std::numeric_limits<int32_t>::min());
+ __ LoadConst32(TMP, min_val);
+ __ Mtc1(TMP, FTMP);
+ __ CmpLeS(FTMP, FTMP, src);
} else {
- entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2l)
- : QUICK_ENTRY_POINT(pD2l);
+ uint64_t min_val = (result_type == Primitive::kPrimLong)
+ ? bit_cast<uint64_t, double>(std::numeric_limits<int64_t>::min())
+ : bit_cast<uint64_t, double>(std::numeric_limits<int32_t>::min());
+ __ LoadConst64(TMP, min_val);
+ __ Dmtc1(TMP, FTMP);
+ __ CmpLeD(FTMP, FTMP, src);
}
- codegen_->InvokeRuntime(entry_offset,
- conversion,
- conversion->GetDexPc(),
- nullptr);
- if (result_type != Primitive::kPrimLong) {
+
+ __ Bc1nez(FTMP, &truncate);
+
+ if (input_type == Primitive::kPrimFloat) {
+ __ CmpEqS(FTMP, src, src);
+ } else {
+ __ CmpEqD(FTMP, src, src);
+ }
+ if (result_type == Primitive::kPrimLong) {
+ __ LoadConst64(dst, std::numeric_limits<int64_t>::min());
+ } else {
+ __ LoadConst32(dst, std::numeric_limits<int32_t>::min());
+ }
+ __ Mfc1(TMP, FTMP);
+ __ And(dst, dst, TMP);
+
+ __ Bc(&done);
+
+ __ Bind(&truncate);
+
+ if (result_type == Primitive::kPrimLong) {
if (input_type == Primitive::kPrimFloat) {
- CheckEntrypointTypes<kQuickF2iz, int32_t, float>();
+ __ TruncLS(FTMP, src);
} else {
- CheckEntrypointTypes<kQuickD2iz, int32_t, double>();
+ __ TruncLD(FTMP, src);
}
+ __ Dmfc1(dst, FTMP);
} else {
if (input_type == Primitive::kPrimFloat) {
- CheckEntrypointTypes<kQuickF2l, int64_t, float>();
+ __ TruncWS(FTMP, src);
} else {
- CheckEntrypointTypes<kQuickD2l, int64_t, double>();
+ __ TruncWD(FTMP, src);
}
+ __ Mfc1(dst, FTMP);
}
+
+ __ Bind(&done);
} else if (Primitive::IsFloatingPointType(result_type) &&
Primitive::IsFloatingPointType(input_type)) {
FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index 60ff96dc43..7182e8e987 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -201,7 +201,7 @@ class LocationsBuilderMIPS64 : public HGraphVisitor {
DISALLOW_COPY_AND_ASSIGN(LocationsBuilderMIPS64);
};
-class InstructionCodeGeneratorMIPS64 : public HGraphVisitor {
+class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator {
public:
InstructionCodeGeneratorMIPS64(HGraph* graph, CodeGeneratorMIPS64* codegen);
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 86327fb741..6ab3aaff4b 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -365,11 +365,10 @@ class TypeCheckSlowPathX86 : public SlowPathCode {
class DeoptimizationSlowPathX86 : public SlowPathCode {
public:
- explicit DeoptimizationSlowPathX86(HInstruction* instruction)
+ explicit DeoptimizationSlowPathX86(HDeoptimize* instruction)
: instruction_(instruction) {}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
- DCHECK(instruction_->IsDeoptimize());
CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, instruction_->GetLocations());
@@ -383,7 +382,7 @@ class DeoptimizationSlowPathX86 : public SlowPathCode {
const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86"; }
private:
- HInstruction* const instruction_;
+ HDeoptimize* const instruction_;
DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86);
};
@@ -892,7 +891,7 @@ void CodeGeneratorX86::UpdateBlockedPairRegisters() const {
}
InstructionCodeGeneratorX86::InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen)
- : HGraphVisitor(graph),
+ : InstructionCodeGenerator(graph, codegen),
assembler_(codegen->GetAssembler()),
codegen_(codegen) {}
@@ -1611,9 +1610,7 @@ void LocationsBuilderX86::VisitDeoptimize(HDeoptimize* deoptimize) {
}
void InstructionCodeGeneratorX86::VisitDeoptimize(HDeoptimize* deoptimize) {
- SlowPathCode* slow_path = new (GetGraph()->GetArena())
- DeoptimizationSlowPathX86(deoptimize);
- codegen_->AddSlowPath(slow_path);
+ SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86>(deoptimize);
GenerateTestAndBranch(deoptimize,
/* condition_input_index */ 0,
slow_path->GetEntryLabel(),
@@ -1625,6 +1622,10 @@ void LocationsBuilderX86::VisitNativeDebugInfo(HNativeDebugInfo* info) {
}
void InstructionCodeGeneratorX86::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+ if (codegen_->HasStackMapAtCurrentPc()) {
+ // Ensure that we do not collide with the stack map of the previous instruction.
+ __ nop();
+ }
codegen_->RecordPcInfo(info, info->GetDexPc());
}
@@ -3223,11 +3224,12 @@ void InstructionCodeGeneratorX86::DivByPowerOfTwo(HDiv* instruction) {
Register out_register = locations->Out().AsRegister<Register>();
Register input_register = locations->InAt(0).AsRegister<Register>();
int32_t imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+ DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
+ uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
- DCHECK(IsPowerOfTwo(std::abs(imm)));
Register num = locations->GetTemp(0).AsRegister<Register>();
- __ leal(num, Address(input_register, std::abs(imm) - 1));
+ __ leal(num, Address(input_register, abs_imm - 1));
__ testl(input_register, input_register);
__ cmovl(kGreaterEqual, num, input_register);
int shift = CTZ(imm);
@@ -3340,7 +3342,7 @@ void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instr
// Do not generate anything for 0. DivZeroCheck would forbid any generated code.
} else if (imm == 1 || imm == -1) {
DivRemOneOrMinusOne(instruction);
- } else if (is_div && IsPowerOfTwo(std::abs(imm))) {
+ } else if (is_div && IsPowerOfTwo(AbsOrMin(imm))) {
DivByPowerOfTwo(instruction->AsDiv());
} else {
DCHECK(imm <= -2 || imm >= 2);
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index df7347658b..c65c423eae 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -178,7 +178,7 @@ class LocationsBuilderX86 : public HGraphVisitor {
DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86);
};
-class InstructionCodeGeneratorX86 : public HGraphVisitor {
+class InstructionCodeGeneratorX86 : public InstructionCodeGenerator {
public:
InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen);
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 76a4ce2e93..294b40e3d4 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -387,18 +387,16 @@ class TypeCheckSlowPathX86_64 : public SlowPathCode {
class DeoptimizationSlowPathX86_64 : public SlowPathCode {
public:
- explicit DeoptimizationSlowPathX86_64(HInstruction* instruction)
+ explicit DeoptimizationSlowPathX86_64(HDeoptimize* instruction)
: instruction_(instruction) {}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, instruction_->GetLocations());
- DCHECK(instruction_->IsDeoptimize());
- HDeoptimize* deoptimize = instruction_->AsDeoptimize();
x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize),
- deoptimize,
- deoptimize->GetDexPc(),
+ instruction_,
+ instruction_->GetDexPc(),
this);
CheckEntrypointTypes<kQuickDeoptimize, void, void>();
}
@@ -406,7 +404,7 @@ class DeoptimizationSlowPathX86_64 : public SlowPathCode {
const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86_64"; }
private:
- HInstruction* const instruction_;
+ HDeoptimize* const instruction_;
DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64);
};
@@ -1000,7 +998,7 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph,
CodeGeneratorX86_64* codegen)
- : HGraphVisitor(graph),
+ : InstructionCodeGenerator(graph, codegen),
assembler_(codegen->GetAssembler()),
codegen_(codegen) {}
@@ -1594,9 +1592,7 @@ void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
}
void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
- SlowPathCode* slow_path = new (GetGraph()->GetArena())
- DeoptimizationSlowPathX86_64(deoptimize);
- codegen_->AddSlowPath(slow_path);
+ SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86_64>(deoptimize);
GenerateTestAndBranch(deoptimize,
/* condition_input_index */ 0,
slow_path->GetEntryLabel(),
@@ -1608,6 +1604,10 @@ void LocationsBuilderX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
}
void InstructionCodeGeneratorX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+ if (codegen_->HasStackMapAtCurrentPc()) {
+ // Ensure that we do not collide with the stack map of the previous instruction.
+ __ nop();
+ }
codegen_->RecordPcInfo(info, info->GetDexPc());
}
@@ -3350,13 +3350,13 @@ void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) {
CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
int64_t imm = Int64FromConstant(second.GetConstant());
-
- DCHECK(IsPowerOfTwo(std::abs(imm)));
+ DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
+ uint64_t abs_imm = AbsOrMin(imm);
CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
if (instruction->GetResultType() == Primitive::kPrimInt) {
- __ leal(tmp, Address(numerator, std::abs(imm) - 1));
+ __ leal(tmp, Address(numerator, abs_imm - 1));
__ testl(numerator, numerator);
__ cmov(kGreaterEqual, tmp, numerator);
int shift = CTZ(imm);
@@ -3371,7 +3371,7 @@ void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) {
DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
- codegen_->Load64BitValue(rdx, std::abs(imm) - 1);
+ codegen_->Load64BitValue(rdx, abs_imm - 1);
__ addq(rdx, numerator);
__ testq(numerator, numerator);
__ cmov(kGreaterEqual, rdx, numerator);
@@ -3529,7 +3529,7 @@ void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* in
// Do not generate anything. DivZeroCheck would prevent any code to be executed.
} else if (imm == 1 || imm == -1) {
DivRemOneOrMinusOne(instruction);
- } else if (instruction->IsDiv() && IsPowerOfTwo(std::abs(imm))) {
+ } else if (instruction->IsDiv() && IsPowerOfTwo(AbsOrMin(imm))) {
DivByPowerOfTwo(instruction->AsDiv());
} else {
DCHECK(imm <= -2 || imm >= 2);
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index c5e8a04da6..505c9dcdad 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -183,7 +183,7 @@ class LocationsBuilderX86_64 : public HGraphVisitor {
DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86_64);
};
-class InstructionCodeGeneratorX86_64 : public HGraphVisitor {
+class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator {
public:
InstructionCodeGeneratorX86_64(HGraph* graph, CodeGeneratorX86_64* codegen);
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index c504ded54c..b90afb1d73 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -211,19 +211,6 @@ bool InstructionSimplifierVisitor::ReplaceRotateWithRor(HBinaryOperation* op,
// Try to replace a binary operation flanked by one UShr and one Shl with a bitfield rotation.
bool InstructionSimplifierVisitor::TryReplaceWithRotate(HBinaryOperation* op) {
- // This simplification is currently supported on x86, x86_64, ARM and ARM64.
- // TODO: Implement it for MIPS/64.
- const InstructionSet instruction_set = GetGraph()->GetInstructionSet();
- switch (instruction_set) {
- case kArm:
- case kArm64:
- case kThumb2:
- case kX86:
- case kX86_64:
- break;
- default:
- return false;
- }
DCHECK(op->IsAdd() || op->IsXor() || op->IsOr());
HInstruction* left = op->GetLeft();
HInstruction* right = op->GetRight();
@@ -1261,19 +1248,6 @@ void InstructionSimplifierVisitor::SimplifyStringEquals(HInvoke* instruction) {
void InstructionSimplifierVisitor::SimplifyRotate(HInvoke* invoke, bool is_left) {
DCHECK(invoke->IsInvokeStaticOrDirect());
DCHECK_EQ(invoke->GetOriginalInvokeType(), InvokeType::kStatic);
- // This simplification is currently supported on x86, x86_64, ARM and ARM64.
- // TODO: Implement it for MIPS/64.
- const InstructionSet instruction_set = GetGraph()->GetInstructionSet();
- switch (instruction_set) {
- case kArm:
- case kArm64:
- case kThumb2:
- case kX86:
- case kX86_64:
- break;
- default:
- return;
- }
HInstruction* value = invoke->InputAt(0);
HInstruction* distance = invoke->InputAt(1);
// Replace the invoke with an HRor.
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 4683aee603..b1fbf28204 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -502,9 +502,6 @@ static void GenUnsafeGet(HInvoke* invoke,
bool is_volatile,
CodeGeneratorARM* codegen) {
LocationSummary* locations = invoke->GetLocations();
- DCHECK((type == Primitive::kPrimInt) ||
- (type == Primitive::kPrimLong) ||
- (type == Primitive::kPrimNot));
ArmAssembler* assembler = codegen->GetAssembler();
Location base_loc = locations->InAt(1);
Register base = base_loc.AsRegister<Register>(); // Object pointer.
@@ -512,30 +509,67 @@ static void GenUnsafeGet(HInvoke* invoke,
Register offset = offset_loc.AsRegisterPairLow<Register>(); // Long offset, lo part only.
Location trg_loc = locations->Out();
- if (type == Primitive::kPrimLong) {
- Register trg_lo = trg_loc.AsRegisterPairLow<Register>();
- __ add(IP, base, ShifterOperand(offset));
- if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
- Register trg_hi = trg_loc.AsRegisterPairHigh<Register>();
- __ ldrexd(trg_lo, trg_hi, IP);
- } else {
- __ ldrd(trg_lo, Address(IP));
+ switch (type) {
+ case Primitive::kPrimInt: {
+ Register trg = trg_loc.AsRegister<Register>();
+ __ ldr(trg, Address(base, offset));
+ if (is_volatile) {
+ __ dmb(ISH);
+ }
+ break;
}
- } else {
- Register trg = trg_loc.AsRegister<Register>();
- __ ldr(trg, Address(base, offset));
- }
- if (is_volatile) {
- __ dmb(ISH);
- }
+ case Primitive::kPrimNot: {
+ Register trg = trg_loc.AsRegister<Register>();
+ if (kEmitCompilerReadBarrier) {
+ if (kUseBakerReadBarrier) {
+ Location temp = locations->GetTemp(0);
+ codegen->GenerateArrayLoadWithBakerReadBarrier(
+ invoke, trg_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false);
+ if (is_volatile) {
+ __ dmb(ISH);
+ }
+ } else {
+ __ ldr(trg, Address(base, offset));
+ if (is_volatile) {
+ __ dmb(ISH);
+ }
+ codegen->GenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc);
+ }
+ } else {
+ __ ldr(trg, Address(base, offset));
+ if (is_volatile) {
+ __ dmb(ISH);
+ }
+ __ MaybeUnpoisonHeapReference(trg);
+ }
+ break;
+ }
- if (type == Primitive::kPrimNot) {
- codegen->MaybeGenerateReadBarrier(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc);
+ case Primitive::kPrimLong: {
+ Register trg_lo = trg_loc.AsRegisterPairLow<Register>();
+ __ add(IP, base, ShifterOperand(offset));
+ if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
+ Register trg_hi = trg_loc.AsRegisterPairHigh<Register>();
+ __ ldrexd(trg_lo, trg_hi, IP);
+ } else {
+ __ ldrd(trg_lo, Address(IP));
+ }
+ if (is_volatile) {
+ __ dmb(ISH);
+ }
+ break;
+ }
+
+ default:
+ LOG(FATAL) << "Unexpected type " << type;
+ UNREACHABLE();
}
}
-static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
+ HInvoke* invoke,
+ Primitive::Type type) {
bool can_call = kEmitCompilerReadBarrier &&
(invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
@@ -548,25 +582,30 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke
locations->SetInAt(1, Location::RequiresRegister());
locations->SetInAt(2, Location::RequiresRegister());
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // We need a temporary register for the read barrier marking slow
+ // path in InstructionCodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier.
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
void IntrinsicLocationsBuilderARM::VisitUnsafeGet(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
}
void IntrinsicLocationsBuilderARM::VisitUnsafeGetVolatile(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
}
void IntrinsicLocationsBuilderARM::VisitUnsafeGetLong(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
}
void IntrinsicLocationsBuilderARM::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
}
void IntrinsicLocationsBuilderARM::VisitUnsafeGetObject(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
}
void IntrinsicLocationsBuilderARM::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
}
void IntrinsicCodeGeneratorARM::VisitUnsafeGet(HInvoke* invoke) {
@@ -808,6 +847,9 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat
}
// Prevent reordering with prior memory operations.
+ // Emit a DMB ISH instruction instead of an DMB ISHST one, as the
+ // latter allows a preceding load to be delayed past the STXR
+ // instruction below.
__ dmb(ISH);
__ add(tmp_ptr, base, ShifterOperand(offset));
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index f723940444..81cab86c83 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -1035,7 +1035,11 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat
__ Stlxr(tmp_32, value, MemOperand(tmp_ptr));
__ Cbnz(tmp_32, &loop_head);
} else {
- __ Dmb(InnerShareable, BarrierWrites);
+ // Emit a `Dmb(InnerShareable, BarrierAll)` (DMB ISH) instruction
+ // instead of a `Dmb(InnerShareable, BarrierWrites)` (DMB ISHST)
+ // one, as the latter allows a preceding load to be delayed past
+ // the STXR instruction below.
+ __ Dmb(InnerShareable, BarrierAll);
__ Bind(&loop_head);
// TODO: When `type == Primitive::kPrimNot`, add a read barrier for
// the reference stored in the object before attempting the CAS,
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 06fab616ad..bc126a2716 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -43,14 +43,18 @@ ArenaAllocator* IntrinsicCodeGeneratorMIPS::GetAllocator() {
return codegen_->GetGraph()->GetArena();
}
-inline bool IntrinsicCodeGeneratorMIPS::IsR2OrNewer() {
+inline bool IntrinsicCodeGeneratorMIPS::IsR2OrNewer() const {
return codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2();
}
-inline bool IntrinsicCodeGeneratorMIPS::IsR6() {
+inline bool IntrinsicCodeGeneratorMIPS::IsR6() const {
return codegen_->GetInstructionSetFeatures().IsR6();
}
+inline bool IntrinsicCodeGeneratorMIPS::Is32BitFPU() const {
+ return codegen_->GetInstructionSetFeatures().Is32BitFloatingPoint();
+}
+
#define __ codegen->GetAssembler()->
static void MoveFromReturnRegister(Location trg,
@@ -162,7 +166,7 @@ static void MoveFPToInt(LocationSummary* locations, bool is64bit, MipsAssembler*
Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
__ Mfc1(out_lo, in);
- __ Mfhc1(out_hi, in);
+ __ MoveFromFpuHigh(out_hi, in);
} else {
Register out = locations->Out().AsRegister<Register>();
@@ -204,7 +208,7 @@ static void MoveIntToFP(LocationSummary* locations, bool is64bit, MipsAssembler*
Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
__ Mtc1(in_lo, out);
- __ Mthc1(in_hi, out);
+ __ MoveToFpuHigh(in_hi, out);
} else {
Register in = locations->InAt(0).AsRegister<Register>();
diff --git a/compiler/optimizing/intrinsics_mips.h b/compiler/optimizing/intrinsics_mips.h
index f86b0efe4a..575a7d0a23 100644
--- a/compiler/optimizing/intrinsics_mips.h
+++ b/compiler/optimizing/intrinsics_mips.h
@@ -67,8 +67,9 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
#undef INTRINSICS_LIST
#undef OPTIMIZING_INTRINSICS
- bool IsR2OrNewer(void);
- bool IsR6(void);
+ bool IsR2OrNewer() const;
+ bool IsR6() const;
+ bool Is32BitFPU() const;
private:
MipsAssembler* GetAssembler();
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index f5a7048b01..b80c6bde82 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -2183,10 +2183,7 @@ void HInvoke::SetIntrinsic(Intrinsics intrinsic,
IntrinsicExceptions exceptions) {
intrinsic_ = intrinsic;
IntrinsicOptimizations opt(this);
- if (needs_env_or_cache == kNoEnvironmentOrCache) {
- opt.SetDoesNotNeedDexCache();
- opt.SetDoesNotNeedEnvironment();
- }
+
// Adjust method's side effects from intrinsic table.
switch (side_effects) {
case kNoSideEffects: SetSideEffects(SideEffects::None()); break;
@@ -2194,6 +2191,14 @@ void HInvoke::SetIntrinsic(Intrinsics intrinsic,
case kWriteSideEffects: SetSideEffects(SideEffects::AllWrites()); break;
case kAllSideEffects: SetSideEffects(SideEffects::AllExceptGCDependency()); break;
}
+
+ if (needs_env_or_cache == kNoEnvironmentOrCache) {
+ opt.SetDoesNotNeedDexCache();
+ opt.SetDoesNotNeedEnvironment();
+ } else {
+ // If we need an environment, that means there will be a call, which can trigger GC.
+ SetSideEffects(GetSideEffects().Union(SideEffects::CanTriggerGC()));
+ }
// Adjust method's exception status from intrinsic table.
switch (exceptions) {
case kNoThrow: SetCanThrow(false); break;
@@ -2325,4 +2330,19 @@ HInstruction* HGraph::InsertOppositeCondition(HInstruction* cond, HInstruction*
}
}
+std::ostream& operator<<(std::ostream& os, const MoveOperands& rhs) {
+ os << "["
+ << " source=" << rhs.GetSource()
+ << " destination=" << rhs.GetDestination()
+ << " type=" << rhs.GetType()
+ << " instruction=";
+ if (rhs.GetInstruction() != nullptr) {
+ os << rhs.GetInstruction()->DebugName() << ' ' << rhs.GetInstruction()->GetId();
+ } else {
+ os << "null";
+ }
+ os << " ]";
+ return os;
+}
+
} // namespace art
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 59c07690b1..23132308f0 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1881,6 +1881,10 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
return false;
}
+ virtual bool IsActualObject() const {
+ return GetType() == Primitive::kPrimNot;
+ }
+
void SetReferenceTypeInfo(ReferenceTypeInfo rti);
ReferenceTypeInfo GetReferenceTypeInfo() const {
@@ -2500,8 +2504,10 @@ class HTryBoundary : public HTemplateInstruction<0> {
// Deoptimize to interpreter, upon checking a condition.
class HDeoptimize : public HTemplateInstruction<1> {
public:
+ // We set CanTriggerGC to prevent any intermediate address to be live
+ // at the point of the `HDeoptimize`.
HDeoptimize(HInstruction* cond, uint32_t dex_pc)
- : HTemplateInstruction(SideEffects::None(), dex_pc) {
+ : HTemplateInstruction(SideEffects::CanTriggerGC(), dex_pc) {
SetRawInputAt(0, cond);
}
@@ -4017,8 +4023,10 @@ class HRem : public HBinaryOperation {
class HDivZeroCheck : public HExpression<1> {
public:
+ // `HDivZeroCheck` can trigger GC, as it may call the `ArithmeticException`
+ // constructor.
HDivZeroCheck(HInstruction* value, uint32_t dex_pc)
- : HExpression(value->GetType(), SideEffects::None(), dex_pc) {
+ : HExpression(value->GetType(), SideEffects::CanTriggerGC(), dex_pc) {
SetRawInputAt(0, value);
}
@@ -4539,8 +4547,10 @@ class HPhi : public HInstruction {
class HNullCheck : public HExpression<1> {
public:
+ // `HNullCheck` can trigger GC, as it may call the `NullPointerException`
+ // constructor.
HNullCheck(HInstruction* value, uint32_t dex_pc)
- : HExpression(value->GetType(), SideEffects::None(), dex_pc) {
+ : HExpression(value->GetType(), SideEffects::CanTriggerGC(), dex_pc) {
SetRawInputAt(0, value);
}
@@ -4861,8 +4871,10 @@ class HArrayLength : public HExpression<1> {
class HBoundsCheck : public HExpression<2> {
public:
+ // `HBoundsCheck` can trigger GC, as it may call the `IndexOutOfBoundsException`
+ // constructor.
HBoundsCheck(HInstruction* index, HInstruction* length, uint32_t dex_pc)
- : HExpression(index->GetType(), SideEffects::None(), dex_pc) {
+ : HExpression(index->GetType(), SideEffects::CanTriggerGC(), dex_pc) {
DCHECK(index->GetType() == Primitive::kPrimInt);
SetRawInputAt(0, index);
SetRawInputAt(1, length);
@@ -5626,8 +5638,8 @@ class MoveOperands : public ArenaObject<kArenaAllocMoveOperands> {
}
bool IsPending() const {
- DCHECK(!source_.IsInvalid() || destination_.IsInvalid());
- return destination_.IsInvalid() && !source_.IsInvalid();
+ DCHECK(source_.IsValid() || destination_.IsInvalid());
+ return destination_.IsInvalid() && source_.IsValid();
}
// True if this blocks a move from the given location.
@@ -5671,6 +5683,8 @@ class MoveOperands : public ArenaObject<kArenaAllocMoveOperands> {
HInstruction* instruction_;
};
+std::ostream& operator<<(std::ostream& os, const MoveOperands& rhs);
+
static constexpr size_t kDefaultNumberOfMoves = 4;
class HParallelMove : public HTemplateInstruction<0> {
diff --git a/compiler/optimizing/nodes_arm64.h b/compiler/optimizing/nodes_arm64.h
index 18405f2623..445cdab191 100644
--- a/compiler/optimizing/nodes_arm64.h
+++ b/compiler/optimizing/nodes_arm64.h
@@ -107,6 +107,7 @@ class HArm64IntermediateAddress : public HExpression<2> {
bool CanBeMoved() const OVERRIDE { return true; }
bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { return true; }
+ bool IsActualObject() const OVERRIDE { return false; }
HInstruction* GetBaseAddress() const { return InputAt(0); }
HInstruction* GetOffset() const { return InputAt(1); }
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index cafc6c5440..bb840eabdd 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -17,6 +17,7 @@
#include "optimizing_compiler.h"
#include <fstream>
+#include <memory>
#include <stdint.h>
#ifdef ART_ENABLE_CODEGEN_arm64
@@ -52,6 +53,8 @@
#include "driver/compiler_driver-inl.h"
#include "driver/compiler_options.h"
#include "driver/dex_compilation_unit.h"
+#include "dwarf/method_debug_info.h"
+#include "elf_writer_debug.h"
#include "elf_writer_quick.h"
#include "graph_checker.h"
#include "graph_visualizer.h"
@@ -60,6 +63,7 @@
#include "inliner.h"
#include "instruction_simplifier.h"
#include "intrinsics.h"
+#include "jit/debugger_interface.h"
#include "jit/jit_code_cache.h"
#include "licm.h"
#include "jni/quick/jni_compiler.h"
@@ -68,6 +72,7 @@
#include "prepare_for_register_allocation.h"
#include "reference_type_propagation.h"
#include "register_allocator.h"
+#include "oat_quick_method_header.h"
#include "sharpening.h"
#include "side_effects_analysis.h"
#include "ssa_builder.h"
@@ -965,6 +970,39 @@ bool OptimizingCompiler::JitCompile(Thread* self,
return false;
}
+ if (GetCompilerDriver()->GetCompilerOptions().GetGenerateDebugInfo()) {
+ const auto* method_header = reinterpret_cast<const OatQuickMethodHeader*>(code);
+ const uintptr_t code_address = reinterpret_cast<uintptr_t>(method_header->GetCode());
+ CompiledMethod compiled_method(
+ GetCompilerDriver(),
+ codegen->GetInstructionSet(),
+ ArrayRef<const uint8_t>(code_allocator.GetMemory()),
+ codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(),
+ codegen->GetCoreSpillMask(),
+ codegen->GetFpuSpillMask(),
+ ArrayRef<const SrcMapElem>(),
+ ArrayRef<const uint8_t>(), // mapping_table.
+ ArrayRef<const uint8_t>(stack_map_data, stack_map_size),
+ ArrayRef<const uint8_t>(), // native_gc_map.
+ ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()),
+ ArrayRef<const LinkerPatch>());
+ dwarf::MethodDebugInfo method_debug_info {
+ dex_file,
+ class_def_idx,
+ method_idx,
+ access_flags,
+ code_item,
+ false, // deduped.
+ code_address,
+ code_address + code_allocator.GetSize(),
+ &compiled_method
+ };
+ ArrayRef<const uint8_t> elf_file = dwarf::WriteDebugElfFileForMethod(method_debug_info);
+ CreateJITCodeEntryForAddress(code_address,
+ std::unique_ptr<const uint8_t[]>(elf_file.data()),
+ elf_file.size());
+ }
+
return true;
}
diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc
index 176c50ce21..9d136f3ae6 100644
--- a/compiler/optimizing/parallel_move_resolver.cc
+++ b/compiler/optimizing/parallel_move_resolver.cc
@@ -13,7 +13,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-#include <iostream>
#include "parallel_move_resolver.h"
@@ -172,7 +171,7 @@ MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) {
i = -1;
} else if (required_swap != nullptr) {
// A move is required to swap. We walk back the cycle to find the
- // move by just returning from this `PerforrmMove`.
+ // move by just returning from this `PerformMove`.
moves_[index]->ClearPending(destination);
return required_swap;
}
@@ -201,7 +200,7 @@ MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) {
} else {
for (MoveOperands* other_move : moves_) {
if (other_move->Blocks(destination)) {
- DCHECK(other_move->IsPending());
+ DCHECK(other_move->IsPending()) << "move=" << *move << " other_move=" << *other_move;
if (!move->Is64BitMove() && other_move->Is64BitMove()) {
// We swap 64bits moves before swapping 32bits moves. Go back from the
// cycle by returning the move that must be swapped.
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index d1770b75ab..63ef600756 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -96,7 +96,7 @@ void PrepareForRegisterAllocation::VisitClinitCheck(HClinitCheck* check) {
if (can_merge_with_load_class && !load_class->HasUses()) {
load_class->GetBlock()->RemoveInstruction(load_class);
}
- } else if (can_merge_with_load_class) {
+ } else if (can_merge_with_load_class && !load_class->NeedsAccessCheck()) {
// Pass the initialization duty to the `HLoadClass` instruction,
// and remove the instruction from the graph.
load_class->SetMustGenerateClinitCheck(true);
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 5ab4547e22..2bae4bc5c8 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -1679,6 +1679,9 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) {
LocationSummary* locations = safepoint_position->GetLocations();
if ((current->GetType() == Primitive::kPrimNot) && current->GetParent()->HasSpillSlot()) {
+ DCHECK(interval->GetDefinedBy()->IsActualObject())
+ << interval->GetDefinedBy()->DebugName()
+ << "@" << safepoint_position->GetInstruction()->DebugName();
locations->SetStackBit(current->GetParent()->GetSpillSlot() / kVRegSize);
}
@@ -1691,6 +1694,9 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) {
maximum_number_of_live_fp_registers_);
}
if (current->GetType() == Primitive::kPrimNot) {
+ DCHECK(interval->GetDefinedBy()->IsActualObject())
+ << interval->GetDefinedBy()->DebugName()
+ << "@" << safepoint_position->GetInstruction()->DebugName();
locations->SetRegisterBit(source.reg());
}
break;
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index c60a4eacaa..4784de1380 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -270,7 +270,7 @@ void StackMapStream::FillIn(MemoryRegion region) {
stack_map.SetStackMask(stack_map_encoding_, *entry.sp_mask);
}
- if (entry.num_dex_registers == 0) {
+ if (entry.num_dex_registers == 0 || (entry.live_dex_registers_mask->NumSetBits() == 0)) {
// No dex map available.
stack_map.SetDexRegisterMapOffset(stack_map_encoding_, StackMap::kNoDexRegisterMap);
} else {
diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc
index 560502fde6..604787fd92 100644
--- a/compiler/optimizing/stack_map_test.cc
+++ b/compiler/optimizing/stack_map_test.cc
@@ -614,6 +614,10 @@ TEST(StackMapTest, TestNoDexRegisterMap) {
stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
stream.EndStackMapEntry();
+ number_of_dex_registers = 1;
+ stream.BeginStackMapEntry(1, 67, 0x4, &sp_mask, number_of_dex_registers, 0);
+ stream.EndStackMapEntry();
+
size_t size = stream.PrepareForFillIn();
void* memory = arena.Alloc(size, kArenaAllocMisc);
MemoryRegion region(memory, size);
@@ -622,7 +626,7 @@ TEST(StackMapTest, TestNoDexRegisterMap) {
CodeInfo code_info(region);
StackMapEncoding encoding = code_info.ExtractEncoding();
ASSERT_EQ(0u, encoding.NumberOfBytesForStackMask());
- ASSERT_EQ(1u, code_info.GetNumberOfStackMaps());
+ ASSERT_EQ(2u, code_info.GetNumberOfStackMaps());
uint32_t number_of_location_catalog_entries = code_info.GetNumberOfLocationCatalogEntries();
ASSERT_EQ(0u, number_of_location_catalog_entries);
@@ -638,6 +642,16 @@ TEST(StackMapTest, TestNoDexRegisterMap) {
ASSERT_FALSE(stack_map.HasDexRegisterMap(encoding));
ASSERT_FALSE(stack_map.HasInlineInfo(encoding));
+
+ stack_map = code_info.GetStackMapAt(1, encoding);
+ ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(1, encoding)));
+ ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(67, encoding)));
+ ASSERT_EQ(1u, stack_map.GetDexPc(encoding));
+ ASSERT_EQ(67u, stack_map.GetNativePcOffset(encoding));
+ ASSERT_EQ(0x4u, stack_map.GetRegisterMask(encoding));
+
+ ASSERT_FALSE(stack_map.HasDexRegisterMap(encoding));
+ ASSERT_FALSE(stack_map.HasInlineInfo(encoding));
}
TEST(StackMapTest, InlineTest) {
diff --git a/compiler/profile_assistant.cc b/compiler/profile_assistant.cc
new file mode 100644
index 0000000000..81f2a5692d
--- /dev/null
+++ b/compiler/profile_assistant.cc
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "profile_assistant.h"
+
+namespace art {
+
+// Minimum number of new methods that profiles must contain to enable recompilation.
+static constexpr const uint32_t kMinNewMethodsForCompilation = 10;
+
+bool ProfileAssistant::ProcessProfiles(
+ const std::vector<std::string>& profile_files,
+ const std::vector<std::string>& reference_profile_files,
+ /*out*/ ProfileCompilationInfo** profile_compilation_info) {
+ DCHECK(!profile_files.empty());
+ DCHECK(reference_profile_files.empty() ||
+ (profile_files.size() == reference_profile_files.size()));
+
+ std::vector<ProfileCompilationInfo> new_info(profile_files.size());
+ bool should_compile = false;
+ // Read the main profile files.
+ for (size_t i = 0; i < profile_files.size(); i++) {
+ if (!new_info[i].Load(profile_files[i])) {
+ LOG(WARNING) << "Could not load profile file: " << profile_files[i];
+ return false;
+ }
+ // Do we have enough new profiled methods that will make the compilation worthwhile?
+ should_compile |= (new_info[i].GetNumberOfMethods() > kMinNewMethodsForCompilation);
+ }
+ if (!should_compile) {
+ *profile_compilation_info = nullptr;
+ return true;
+ }
+
+ std::unique_ptr<ProfileCompilationInfo> result(new ProfileCompilationInfo());
+ for (size_t i = 0; i < new_info.size(); i++) {
+ // Merge all data into a single object.
+ result->Load(new_info[i]);
+ // If we have any reference profile information merge their information with
+ // the current profiles and save them back to disk.
+ if (!reference_profile_files.empty()) {
+ if (!new_info[i].Load(reference_profile_files[i])) {
+ LOG(WARNING) << "Could not load reference profile file: " << reference_profile_files[i];
+ return false;
+ }
+ if (!new_info[i].Save(reference_profile_files[i])) {
+ LOG(WARNING) << "Could not save reference profile file: " << reference_profile_files[i];
+ return false;
+ }
+ }
+ }
+ *profile_compilation_info = result.release();
+ return true;
+}
+
+} // namespace art
diff --git a/compiler/profile_assistant.h b/compiler/profile_assistant.h
new file mode 100644
index 0000000000..088c8bd1c7
--- /dev/null
+++ b/compiler/profile_assistant.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_PROFILE_ASSISTANT_H_
+#define ART_COMPILER_PROFILE_ASSISTANT_H_
+
+#include <string>
+#include <vector>
+
+#include "jit/offline_profiling_info.cc"
+
+namespace art {
+
+class ProfileAssistant {
+ public:
+ // Process the profile information present in the given files. Returns true
+ // if the analysis ended up successfully (i.e. no errors during reading,
+ // merging or writing of profile files).
+ //
+ // If the returned value is true and there is a significant difference between
+ // profile_files and reference_profile_files:
+ // - profile_compilation_info is set to a not null object that
+ // can be used to drive compilation. It will be the merge of all the data
+ // found in profile_files and reference_profile_files.
+ // - the data from profile_files[i] is merged into
+ // reference_profile_files[i] and the corresponding backing file is
+ // updated.
+ //
+ // If the returned value is false or the difference is insignificant,
+ // profile_compilation_info will be set to null.
+ //
+ // Additional notes:
+ // - as mentioned above, this function may update the content of the files
+ // passed with the reference_profile_files.
+ // - if reference_profile_files is not empty it must be the same size as
+ // profile_files.
+ static bool ProcessProfiles(
+ const std::vector<std::string>& profile_files,
+ const std::vector<std::string>& reference_profile_files,
+ /*out*/ ProfileCompilationInfo** profile_compilation_info);
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ProfileAssistant);
+};
+
+} // namespace art
+
+#endif // ART_COMPILER_PROFILE_ASSISTANT_H_
diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h
index b79c2f0f4e..f96376d9fe 100644
--- a/compiler/utils/arm/assembler_arm.h
+++ b/compiler/utils/arm/assembler_arm.h
@@ -501,6 +501,8 @@ class ArmAssembler : public Assembler {
virtual void cmp(Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
+ // Note: CMN updates flags based on addition of its operands. Do not confuse
+ // the "N" suffix with bitwise inversion performed by MVN.
virtual void cmn(Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
virtual void orr(Register rd, Register rn, const ShifterOperand& so,
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index f341030c15..52023a67ee 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -3428,10 +3428,10 @@ void Thumb2Assembler::AddConstant(Register rd, Register rn, int32_t value,
CHECK(rn != IP);
// If rd != rn, use rd as temp. This alows 16-bit ADD/SUB in more situations than using IP.
Register temp = (rd != rn) ? rd : IP;
- if (ShifterOperandCanHold(temp, kNoRegister, MVN, ~value, set_cc, &shifter_op)) {
+ if (ShifterOperandCanHold(temp, kNoRegister, MVN, ~value, kCcKeep, &shifter_op)) {
mvn(temp, shifter_op, cond, kCcKeep);
add(rd, rn, ShifterOperand(temp), cond, set_cc);
- } else if (ShifterOperandCanHold(temp, kNoRegister, MVN, ~(-value), set_cc, &shifter_op)) {
+ } else if (ShifterOperandCanHold(temp, kNoRegister, MVN, ~(-value), kCcKeep, &shifter_op)) {
mvn(temp, shifter_op, cond, kCcKeep);
sub(rd, rn, ShifterOperand(temp), cond, set_cc);
} else if (High16Bits(-value) == 0) {
@@ -3449,22 +3449,32 @@ void Thumb2Assembler::AddConstant(Register rd, Register rn, int32_t value,
}
void Thumb2Assembler::CmpConstant(Register rn, int32_t value, Condition cond) {
- // We prefer to select the shorter code sequence rather than selecting add for
- // positive values and sub for negatives ones, which would slightly improve
- // the readability of generated code for some constants.
+ // We prefer to select the shorter code sequence rather than using plain cmp and cmn
+ // which would slightly improve the readability of generated code for some constants.
ShifterOperand shifter_op;
if (ShifterOperandCanHold(kNoRegister, rn, CMP, value, kCcSet, &shifter_op)) {
cmp(rn, shifter_op, cond);
- } else if (ShifterOperandCanHold(kNoRegister, rn, CMN, ~value, kCcSet, &shifter_op)) {
+ } else if (ShifterOperandCanHold(kNoRegister, rn, CMN, -value, kCcSet, &shifter_op)) {
cmn(rn, shifter_op, cond);
} else {
CHECK(rn != IP);
- movw(IP, Low16Bits(value), cond);
- uint16_t value_high = High16Bits(value);
- if (value_high != 0) {
- movt(IP, value_high, cond);
+ if (ShifterOperandCanHold(IP, kNoRegister, MVN, ~value, kCcKeep, &shifter_op)) {
+ mvn(IP, shifter_op, cond, kCcKeep);
+ cmp(rn, ShifterOperand(IP), cond);
+ } else if (ShifterOperandCanHold(IP, kNoRegister, MVN, ~(-value), kCcKeep, &shifter_op)) {
+ mvn(IP, shifter_op, cond, kCcKeep);
+ cmn(rn, ShifterOperand(IP), cond);
+ } else if (High16Bits(-value) == 0) {
+ movw(IP, Low16Bits(-value), cond);
+ cmn(rn, ShifterOperand(IP), cond);
+ } else {
+ movw(IP, Low16Bits(value), cond);
+ uint16_t value_high = High16Bits(value);
+ if (value_high != 0) {
+ movt(IP, value_high, cond);
+ }
+ cmp(rn, ShifterOperand(IP), cond);
}
- cmp(rn, ShifterOperand(IP), cond);
}
}
diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc
index 0ef0dc19e6..2df9b177bf 100644
--- a/compiler/utils/assembler_thumb_test.cc
+++ b/compiler/utils/assembler_thumb_test.cc
@@ -1626,6 +1626,76 @@ TEST(Thumb2AssemblerTest, AddConstant) {
EmitAndCheck(&assembler, "AddConstant");
}
+TEST(Thumb2AssemblerTest, CmpConstant) {
+ arm::Thumb2Assembler assembler;
+
+ __ CmpConstant(R0, 0); // 16-bit CMP.
+ __ CmpConstant(R1, 1); // 16-bit CMP.
+ __ CmpConstant(R0, 7); // 16-bit CMP.
+ __ CmpConstant(R1, 8); // 16-bit CMP.
+ __ CmpConstant(R0, 255); // 16-bit CMP.
+ __ CmpConstant(R1, 256); // 32-bit CMP.
+ __ CmpConstant(R0, 257); // MNV+CMN.
+ __ CmpConstant(R1, 0xfff); // MOVW+CMP.
+ __ CmpConstant(R0, 0x1000); // 32-bit CMP.
+ __ CmpConstant(R1, 0x1001); // MNV+CMN.
+ __ CmpConstant(R0, 0x1002); // MOVW+CMP.
+ __ CmpConstant(R1, 0xffff); // MOVW+CMP.
+ __ CmpConstant(R0, 0x10000); // 32-bit CMP.
+ __ CmpConstant(R1, 0x10001); // 32-bit CMP.
+ __ CmpConstant(R0, 0x10002); // MVN+CMN.
+ __ CmpConstant(R1, 0x10003); // MOVW+MOVT+CMP.
+ __ CmpConstant(R0, -1); // 32-bit CMP.
+ __ CmpConstant(R1, -7); // CMN.
+ __ CmpConstant(R0, -8); // CMN.
+ __ CmpConstant(R1, -255); // CMN.
+ __ CmpConstant(R0, -256); // CMN.
+ __ CmpConstant(R1, -257); // MNV+CMP.
+ __ CmpConstant(R0, -0xfff); // MOVW+CMN.
+ __ CmpConstant(R1, -0x1000); // CMN.
+ __ CmpConstant(R0, -0x1001); // MNV+CMP.
+ __ CmpConstant(R1, -0x1002); // MOVW+CMN.
+ __ CmpConstant(R0, -0xffff); // MOVW+CMN.
+ __ CmpConstant(R1, -0x10000); // CMN.
+ __ CmpConstant(R0, -0x10001); // CMN.
+ __ CmpConstant(R1, -0x10002); // MVN+CMP.
+ __ CmpConstant(R0, -0x10003); // MOVW+MOVT+CMP.
+
+ __ CmpConstant(R8, 0); // 32-bit CMP.
+ __ CmpConstant(R9, 1); // 32-bit CMP.
+ __ CmpConstant(R8, 7); // 32-bit CMP.
+ __ CmpConstant(R9, 8); // 32-bit CMP.
+ __ CmpConstant(R8, 255); // 32-bit CMP.
+ __ CmpConstant(R9, 256); // 32-bit CMP.
+ __ CmpConstant(R8, 257); // MNV+CMN
+ __ CmpConstant(R9, 0xfff); // MOVW+CMP.
+ __ CmpConstant(R8, 0x1000); // 32-bit CMP.
+ __ CmpConstant(R9, 0x1001); // MVN+CMN.
+ __ CmpConstant(R8, 0x1002); // MOVW+CMP.
+ __ CmpConstant(R9, 0xffff); // MOVW+CMP.
+ __ CmpConstant(R8, 0x10000); // 32-bit CMP.
+ __ CmpConstant(R9, 0x10001); // 32-bit CMP.
+ __ CmpConstant(R8, 0x10002); // MVN+CMN.
+ __ CmpConstant(R9, 0x10003); // MOVW+MOVT+CMP.
+ __ CmpConstant(R8, -1); // 32-bit CMP
+ __ CmpConstant(R9, -7); // CMN.
+ __ CmpConstant(R8, -8); // CMN.
+ __ CmpConstant(R9, -255); // CMN.
+ __ CmpConstant(R8, -256); // CMN.
+ __ CmpConstant(R9, -257); // MNV+CMP.
+ __ CmpConstant(R8, -0xfff); // MOVW+CMN.
+ __ CmpConstant(R9, -0x1000); // CMN.
+ __ CmpConstant(R8, -0x1001); // MVN+CMP.
+ __ CmpConstant(R9, -0x1002); // MOVW+CMN.
+ __ CmpConstant(R8, -0xffff); // MOVW+CMN.
+ __ CmpConstant(R9, -0x10000); // CMN.
+ __ CmpConstant(R8, -0x10001); // CMN.
+ __ CmpConstant(R9, -0x10002); // MVN+CMP.
+ __ CmpConstant(R8, -0x10003); // MOVW+MOVT+CMP.
+
+ EmitAndCheck(&assembler, "CmpConstant");
+}
+
#undef __
} // namespace arm
} // namespace art
diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc
index f07f8c74d7..6736015bf1 100644
--- a/compiler/utils/assembler_thumb_test_expected.cc.inc
+++ b/compiler/utils/assembler_thumb_test_expected.cc.inc
@@ -1,4 +1,4 @@
-const char* SimpleMovResults[] = {
+const char* const SimpleMovResults[] = {
" 0: 0008 movs r0, r1\n",
" 2: 4608 mov r0, r1\n",
" 4: 46c8 mov r8, r9\n",
@@ -6,18 +6,18 @@ const char* SimpleMovResults[] = {
" 8: f04f 0809 mov.w r8, #9\n",
nullptr
};
-const char* SimpleMov32Results[] = {
+const char* const SimpleMov32Results[] = {
" 0: ea4f 0001 mov.w r0, r1\n",
" 4: ea4f 0809 mov.w r8, r9\n",
nullptr
};
-const char* SimpleMovAddResults[] = {
+const char* const SimpleMovAddResults[] = {
" 0: 4608 mov r0, r1\n",
" 2: 1888 adds r0, r1, r2\n",
" 4: 1c08 adds r0, r1, #0\n",
nullptr
};
-const char* DataProcessingRegisterResults[] = {
+const char* const DataProcessingRegisterResults[] = {
" 0: ea6f 0001 mvn.w r0, r1\n",
" 4: eb01 0002 add.w r0, r1, r2\n",
" 8: eba1 0002 sub.w r0, r1, r2\n",
@@ -129,7 +129,7 @@ const char* DataProcessingRegisterResults[] = {
" 120: eb01 0c00 add.w ip, r1, r0\n",
nullptr
};
-const char* DataProcessingImmediateResults[] = {
+const char* const DataProcessingImmediateResults[] = {
" 0: 2055 movs r0, #85 ; 0x55\n",
" 2: f06f 0055 mvn.w r0, #85 ; 0x55\n",
" 6: f101 0055 add.w r0, r1, #85 ; 0x55\n",
@@ -154,7 +154,7 @@ const char* DataProcessingImmediateResults[] = {
" 48: 1f48 subs r0, r1, #5\n",
nullptr
};
-const char* DataProcessingModifiedImmediateResults[] = {
+const char* const DataProcessingModifiedImmediateResults[] = {
" 0: f04f 1055 mov.w r0, #5570645 ; 0x550055\n",
" 4: f06f 1055 mvn.w r0, #5570645 ; 0x550055\n",
" 8: f101 1055 add.w r0, r1, #5570645 ; 0x550055\n",
@@ -173,7 +173,7 @@ const char* DataProcessingModifiedImmediateResults[] = {
" 3c: f110 1f55 cmn.w r0, #5570645 ; 0x550055\n",
nullptr
};
-const char* DataProcessingModifiedImmediatesResults[] = {
+const char* const DataProcessingModifiedImmediatesResults[] = {
" 0: f04f 1055 mov.w r0, #5570645 ; 0x550055\n",
" 4: f04f 2055 mov.w r0, #1426085120 ; 0x55005500\n",
" 8: f04f 3055 mov.w r0, #1431655765 ; 0x55555555\n",
@@ -183,7 +183,7 @@ const char* DataProcessingModifiedImmediatesResults[] = {
" 18: f44f 70d4 mov.w r0, #424 ; 0x1a8\n",
nullptr
};
-const char* DataProcessingShiftedRegisterResults[] = {
+const char* const DataProcessingShiftedRegisterResults[] = {
" 0: 0123 lsls r3, r4, #4\n",
" 2: 0963 lsrs r3, r4, #5\n",
" 4: 11a3 asrs r3, r4, #6\n",
@@ -201,7 +201,7 @@ const char* DataProcessingShiftedRegisterResults[] = {
" 32: ea5f 0834 movs.w r8, r4, rrx\n",
nullptr
};
-const char* ShiftImmediateResults[] = {
+const char* const ShiftImmediateResults[] = {
" 0: 0123 lsls r3, r4, #4\n",
" 2: 0963 lsrs r3, r4, #5\n",
" 4: 11a3 asrs r3, r4, #6\n",
@@ -219,7 +219,7 @@ const char* ShiftImmediateResults[] = {
" 32: ea5f 0834 movs.w r8, r4, rrx\n",
nullptr
};
-const char* BasicLoadResults[] = {
+const char* const BasicLoadResults[] = {
" 0: 69a3 ldr r3, [r4, #24]\n",
" 2: 7e23 ldrb r3, [r4, #24]\n",
" 4: 8b23 ldrh r3, [r4, #24]\n",
@@ -233,7 +233,7 @@ const char* BasicLoadResults[] = {
" 20: f9b4 8018 ldrsh.w r8, [r4, #24]\n",
nullptr
};
-const char* BasicStoreResults[] = {
+const char* const BasicStoreResults[] = {
" 0: 61a3 str r3, [r4, #24]\n",
" 2: 7623 strb r3, [r4, #24]\n",
" 4: 8323 strh r3, [r4, #24]\n",
@@ -243,7 +243,7 @@ const char* BasicStoreResults[] = {
" 10: f8a4 8018 strh.w r8, [r4, #24]\n",
nullptr
};
-const char* ComplexLoadResults[] = {
+const char* const ComplexLoadResults[] = {
" 0: 69a3 ldr r3, [r4, #24]\n",
" 2: f854 3f18 ldr.w r3, [r4, #24]!\n",
" 6: f854 3b18 ldr.w r3, [r4], #24\n",
@@ -276,7 +276,7 @@ const char* ComplexLoadResults[] = {
" 6e: f934 3918 ldrsh.w r3, [r4], #-24\n",
nullptr
};
-const char* ComplexStoreResults[] = {
+const char* const ComplexStoreResults[] = {
" 0: 61a3 str r3, [r4, #24]\n",
" 2: f844 3f18 str.w r3, [r4, #24]!\n",
" 6: f844 3b18 str.w r3, [r4], #24\n",
@@ -297,7 +297,7 @@ const char* ComplexStoreResults[] = {
" 3e: f824 3918 strh.w r3, [r4], #-24\n",
nullptr
};
-const char* NegativeLoadStoreResults[] = {
+const char* const NegativeLoadStoreResults[] = {
" 0: f854 3c18 ldr.w r3, [r4, #-24]\n",
" 4: f854 3d18 ldr.w r3, [r4, #-24]!\n",
" 8: f854 3918 ldr.w r3, [r4], #-24\n",
@@ -348,12 +348,12 @@ const char* NegativeLoadStoreResults[] = {
" bc: f824 3b18 strh.w r3, [r4], #24\n",
nullptr
};
-const char* SimpleLoadStoreDualResults[] = {
+const char* const SimpleLoadStoreDualResults[] = {
" 0: e9c0 2306 strd r2, r3, [r0, #24]\n",
" 4: e9d0 2306 ldrd r2, r3, [r0, #24]\n",
nullptr
};
-const char* ComplexLoadStoreDualResults[] = {
+const char* const ComplexLoadStoreDualResults[] = {
" 0: e9c0 2306 strd r2, r3, [r0, #24]\n",
" 4: e9e0 2306 strd r2, r3, [r0, #24]!\n",
" 8: e8e0 2306 strd r2, r3, [r0], #24\n",
@@ -368,7 +368,7 @@ const char* ComplexLoadStoreDualResults[] = {
" 2c: e870 2306 ldrd r2, r3, [r0], #-24\n",
nullptr
};
-const char* NegativeLoadStoreDualResults[] = {
+const char* const NegativeLoadStoreDualResults[] = {
" 0: e940 2306 strd r2, r3, [r0, #-24]\n",
" 4: e960 2306 strd r2, r3, [r0, #-24]!\n",
" 8: e860 2306 strd r2, r3, [r0], #-24\n",
@@ -383,7 +383,7 @@ const char* NegativeLoadStoreDualResults[] = {
" 2c: e8f0 2306 ldrd r2, r3, [r0], #24\n",
nullptr
};
-const char* SimpleBranchResults[] = {
+const char* const SimpleBranchResults[] = {
" 0: 2002 movs r0, #2\n",
" 2: 2101 movs r1, #1\n",
" 4: e7fd b.n 2 <SimpleBranch+0x2>\n",
@@ -403,7 +403,7 @@ const char* SimpleBranchResults[] = {
" 20: 2006 movs r0, #6\n",
nullptr
};
-const char* LongBranchResults[] = {
+const char* const LongBranchResults[] = {
" 0: f04f 0002 mov.w r0, #2\n",
" 4: f04f 0101 mov.w r1, #1\n",
" 8: f7ff bffc b.w 4 <LongBranch+0x4>\n",
@@ -423,14 +423,14 @@ const char* LongBranchResults[] = {
" 40: f04f 0006 mov.w r0, #6\n",
nullptr
};
-const char* LoadMultipleResults[] = {
+const char* const LoadMultipleResults[] = {
" 0: cc09 ldmia r4!, {r0, r3}\n",
" 2: e934 4800 ldmdb r4!, {fp, lr}\n",
" 6: e914 4800 ldmdb r4, {fp, lr}\n",
" a: f854 5b04 ldr.w r5, [r4], #4\n",
nullptr
};
-const char* StoreMultipleResults[] = {
+const char* const StoreMultipleResults[] = {
" 0: c409 stmia r4!, {r0, r3}\n",
" 2: e8a4 4800 stmia.w r4!, {fp, lr}\n",
" 6: e884 4800 stmia.w r4, {fp, lr}\n",
@@ -438,7 +438,7 @@ const char* StoreMultipleResults[] = {
" e: f844 5d04 str.w r5, [r4, #-4]!\n",
nullptr
};
-const char* MovWMovTResults[] = {
+const char* const MovWMovTResults[] = {
" 0: f240 0400 movw r4, #0\n",
" 4: f240 0434 movw r4, #52 ; 0x34\n",
" 8: f240 0934 movw r9, #52 ; 0x34\n",
@@ -449,7 +449,7 @@ const char* MovWMovTResults[] = {
" 1c: f6cf 71ff movt r1, #65535 ; 0xffff\n",
nullptr
};
-const char* SpecialAddSubResults[] = {
+const char* const SpecialAddSubResults[] = {
" 0: aa14 add r2, sp, #80 ; 0x50\n",
" 2: b014 add sp, #80 ; 0x50\n",
" 4: f10d 0850 add.w r8, sp, #80 ; 0x50\n",
@@ -463,7 +463,7 @@ const char* SpecialAddSubResults[] = {
" 22: f6ad 7dfc subw sp, sp, #4092 ; 0xffc\n",
nullptr
};
-const char* LoadFromOffsetResults[] = {
+const char* const LoadFromOffsetResults[] = {
" 0: 68e2 ldr r2, [r4, #12]\n",
" 2: f8d4 2fff ldr.w r2, [r4, #4095] ; 0xfff\n",
" 6: f504 5280 add.w r2, r4, #4096 ; 0x1000\n",
@@ -514,7 +514,7 @@ const char* LoadFromOffsetResults[] = {
" 9e: f9b4 200c ldrsh.w r2, [r4, #12]\n",
nullptr
};
-const char* StoreToOffsetResults[] = {
+const char* const StoreToOffsetResults[] = {
" 0: 60e2 str r2, [r4, #12]\n",
" 2: f8c4 2fff str.w r2, [r4, #4095] ; 0xfff\n",
" 6: f504 5c80 add.w ip, r4, #4096 ; 0x1000\n",
@@ -563,7 +563,7 @@ const char* StoreToOffsetResults[] = {
" a4: 7322 strb r2, [r4, #12]\n",
nullptr
};
-const char* IfThenResults[] = {
+const char* const IfThenResults[] = {
" 0: bf08 it eq\n",
" 2: 2101 moveq r1, #1\n",
" 4: bf04 itt eq\n",
@@ -587,7 +587,7 @@ const char* IfThenResults[] = {
" 28: 2404 movne r4, #4\n",
nullptr
};
-const char* CbzCbnzResults[] = {
+const char* const CbzCbnzResults[] = {
" 0: b10a cbz r2, 6 <CbzCbnz+0x6>\n",
" 2: 2103 movs r1, #3\n",
" 4: 2203 movs r2, #3\n",
@@ -598,7 +598,7 @@ const char* CbzCbnzResults[] = {
" 10: 2204 movs r2, #4\n",
nullptr
};
-const char* MultiplyResults[] = {
+const char* const MultiplyResults[] = {
" 0: 4348 muls r0, r1\n",
" 2: fb01 f002 mul.w r0, r1, r2\n",
" 6: fb09 f808 mul.w r8, r9, r8\n",
@@ -611,21 +611,21 @@ const char* MultiplyResults[] = {
" 22: fbaa 890b umull r8, r9, sl, fp\n",
nullptr
};
-const char* DivideResults[] = {
+const char* const DivideResults[] = {
" 0: fb91 f0f2 sdiv r0, r1, r2\n",
" 4: fb99 f8fa sdiv r8, r9, sl\n",
" 8: fbb1 f0f2 udiv r0, r1, r2\n",
" c: fbb9 f8fa udiv r8, r9, sl\n",
nullptr
};
-const char* VMovResults[] = {
+const char* const VMovResults[] = {
" 0: eef7 0a00 vmov.f32 s1, #112 ; 0x70\n",
" 4: eeb7 1b00 vmov.f64 d1, #112 ; 0x70\n",
" 8: eef0 0a41 vmov.f32 s1, s2\n",
" c: eeb0 1b42 vmov.f64 d1, d2\n",
nullptr
};
-const char* BasicFloatingPointResults[] = {
+const char* const BasicFloatingPointResults[] = {
" 0: ee30 0a81 vadd.f32 s0, s1, s2\n",
" 4: ee30 0ac1 vsub.f32 s0, s1, s2\n",
" 8: ee20 0a81 vmul.f32 s0, s1, s2\n",
@@ -646,7 +646,7 @@ const char* BasicFloatingPointResults[] = {
" 44: eeb1 0bc1 vsqrt.f64 d0, d1\n",
nullptr
};
-const char* FloatingPointConversionsResults[] = {
+const char* const FloatingPointConversionsResults[] = {
" 0: eeb7 1bc2 vcvt.f32.f64 s2, d2\n",
" 4: eeb7 2ac1 vcvt.f64.f32 d2, s2\n",
" 8: eefd 0ac1 vcvt.s32.f32 s1, s2\n",
@@ -659,35 +659,35 @@ const char* FloatingPointConversionsResults[] = {
" 24: eeb8 1b41 vcvt.f64.u32 d1, s2\n",
nullptr
};
-const char* FloatingPointComparisonsResults[] = {
+const char* const FloatingPointComparisonsResults[] = {
" 0: eeb4 0a60 vcmp.f32 s0, s1\n",
" 4: eeb4 0b41 vcmp.f64 d0, d1\n",
" 8: eeb5 1a40 vcmp.f32 s2, #0.0\n",
" c: eeb5 2b40 vcmp.f64 d2, #0.0\n",
nullptr
};
-const char* CallsResults[] = {
+const char* const CallsResults[] = {
" 0: 47f0 blx lr\n",
" 2: 4770 bx lr\n",
nullptr
};
-const char* BreakpointResults[] = {
+const char* const BreakpointResults[] = {
" 0: be00 bkpt 0x0000\n",
nullptr
};
-const char* StrR1Results[] = {
+const char* const StrR1Results[] = {
" 0: 9111 str r1, [sp, #68] ; 0x44\n",
" 2: f8cd 142c str.w r1, [sp, #1068] ; 0x42c\n",
nullptr
};
-const char* VPushPopResults[] = {
+const char* const VPushPopResults[] = {
" 0: ed2d 1a04 vpush {s2-s5}\n",
" 4: ed2d 2b08 vpush {d2-d5}\n",
" 8: ecbd 1a04 vpop {s2-s5}\n",
" c: ecbd 2b08 vpop {d2-d5}\n",
nullptr
};
-const char* Max16BitBranchResults[] = {
+const char* const Max16BitBranchResults[] = {
" 0: e3ff b.n 802 <Max16BitBranch+0x802>\n",
" 2: 2300 movs r3, #0\n",
" 4: 2302 movs r3, #2\n",
@@ -1716,7 +1716,7 @@ const char* Max16BitBranchResults[] = {
" 802: 4611 mov r1, r2\n",
nullptr
};
-const char* Branch32Results[] = {
+const char* const Branch32Results[] = {
" 0: f000 bc01 b.w 806 <Branch32+0x806>\n",
" 4: 2300 movs r3, #0\n",
" 6: 2302 movs r3, #2\n",
@@ -2746,7 +2746,7 @@ const char* Branch32Results[] = {
" 806: 4611 mov r1, r2\n",
nullptr
};
-const char* CompareAndBranchMaxResults[] = {
+const char* const CompareAndBranchMaxResults[] = {
" 0: b3fc cbz r4, 82 <CompareAndBranchMax+0x82>\n",
" 2: 2300 movs r3, #0\n",
" 4: 2302 movs r3, #2\n",
@@ -2815,7 +2815,7 @@ const char* CompareAndBranchMaxResults[] = {
" 82: 4611 mov r1, r2\n",
nullptr
};
-const char* CompareAndBranchRelocation16Results[] = {
+const char* const CompareAndBranchRelocation16Results[] = {
" 0: 2c00 cmp r4, #0\n",
" 2: d040 beq.n 86 <CompareAndBranchRelocation16+0x86>\n",
" 4: 2300 movs r3, #0\n",
@@ -2886,7 +2886,7 @@ const char* CompareAndBranchRelocation16Results[] = {
" 86: 4611 mov r1, r2\n",
nullptr
};
-const char* CompareAndBranchRelocation32Results[] = {
+const char* const CompareAndBranchRelocation32Results[] = {
" 0: 2c00 cmp r4, #0\n",
" 2: f000 8401 beq.w 808 <CompareAndBranchRelocation32+0x808>\n",
" 6: 2300 movs r3, #0\n",
@@ -3917,7 +3917,7 @@ const char* CompareAndBranchRelocation32Results[] = {
" 808: 4611 mov r1, r2\n",
nullptr
};
-const char* MixedBranch32Results[] = {
+const char* const MixedBranch32Results[] = {
" 0: f000 bc03 b.w 80a <MixedBranch32+0x80a>\n",
" 4: 2300 movs r3, #0\n",
" 6: 2302 movs r3, #2\n",
@@ -4948,7 +4948,7 @@ const char* MixedBranch32Results[] = {
" 80a: 4611 mov r1, r2\n",
nullptr
};
-const char* ShiftsResults[] = {
+const char* const ShiftsResults[] = {
" 0: 0148 lsls r0, r1, #5\n",
" 2: 0948 lsrs r0, r1, #5\n",
" 4: 1148 asrs r0, r1, #5\n",
@@ -4997,7 +4997,7 @@ const char* ShiftsResults[] = {
" 98: fa51 f008 asrs.w r0, r1, r8\n",
nullptr
};
-const char* LoadStoreRegOffsetResults[] = {
+const char* const LoadStoreRegOffsetResults[] = {
" 0: 5888 ldr r0, [r1, r2]\n",
" 2: 5088 str r0, [r1, r2]\n",
" 4: f851 0012 ldr.w r0, [r1, r2, lsl #1]\n",
@@ -5012,7 +5012,7 @@ const char* LoadStoreRegOffsetResults[] = {
" 28: f841 0008 str.w r0, [r1, r8]\n",
nullptr
};
-const char* LoadStoreLiteralResults[] = {
+const char* const LoadStoreLiteralResults[] = {
" 0: 4801 ldr r0, [pc, #4] ; (8 <LoadStoreLiteral+0x8>)\n",
" 2: f8cf 0004 str.w r0, [pc, #4] ; 8 <LoadStoreLiteral+0x8>\n",
" 6: f85f 0008 ldr.w r0, [pc, #-8] ; 0 <LoadStoreLiteral>\n",
@@ -5023,7 +5023,7 @@ const char* LoadStoreLiteralResults[] = {
" 18: f8cf 07ff str.w r0, [pc, #2047] ; 81b <LoadStoreLiteral+0x81b>\n",
nullptr
};
-const char* LoadStoreLimitsResults[] = {
+const char* const LoadStoreLimitsResults[] = {
" 0: 6fe0 ldr r0, [r4, #124] ; 0x7c\n",
" 2: f8d4 0080 ldr.w r0, [r4, #128] ; 0x80\n",
" 6: 7fe0 ldrb r0, [r4, #31]\n",
@@ -5042,7 +5042,7 @@ const char* LoadStoreLimitsResults[] = {
" 30: f8a4 0040 strh.w r0, [r4, #64] ; 0x40\n",
nullptr
};
-const char* CompareAndBranchResults[] = {
+const char* const CompareAndBranchResults[] = {
" 0: b130 cbz r0, 10 <CompareAndBranch+0x10>\n",
" 2: f1bb 0f00 cmp.w fp, #0\n",
" 6: d003 beq.n 10 <CompareAndBranch+0x10>\n",
@@ -5052,7 +5052,7 @@ const char* CompareAndBranchResults[] = {
nullptr
};
-const char* AddConstantResults[] = {
+const char* const AddConstantResults[] = {
" 0: 4608 mov r0, r1\n",
" 2: 1c48 adds r0, r1, #1\n",
" 4: 1dc8 adds r0, r1, #7\n",
@@ -5370,6 +5370,104 @@ const char* AddConstantResults[] = {
nullptr
};
+const char* const CmpConstantResults[] = {
+ " 0: 2800 cmp r0, #0\n",
+ " 2: 2901 cmp r1, #1\n",
+ " 4: 2807 cmp r0, #7\n",
+ " 6: 2908 cmp r1, #8\n",
+ " 8: 28ff cmp r0, #255 ; 0xff\n",
+ " a: f5b1 7f80 cmp.w r1, #256 ; 0x100\n",
+ " e: f46f 7c80 mvn.w ip, #256 ; 0x100\n",
+ " 12: eb10 0f0c cmn.w r0, ip\n",
+ " 16: f640 7cff movw ip, #4095 ; 0xfff\n",
+ " 1a: 4561 cmp r1, ip\n",
+ " 1c: f5b0 5f80 cmp.w r0, #4096 ; 0x1000\n",
+ " 20: f46f 5c80 mvn.w ip, #4096 ; 0x1000\n",
+ " 24: eb11 0f0c cmn.w r1, ip\n",
+ " 28: f241 0c02 movw ip, #4098 ; 0x1002\n",
+ " 2c: 4560 cmp r0, ip\n",
+ " 2e: f64f 7cff movw ip, #65535 ; 0xffff\n",
+ " 32: 4561 cmp r1, ip\n",
+ " 34: f5b0 3f80 cmp.w r0, #65536 ; 0x10000\n",
+ " 38: f1b1 1f01 cmp.w r1, #65537 ; 0x10001\n",
+ " 3c: f06f 1c01 mvn.w ip, #65537 ; 0x10001\n",
+ " 40: eb10 0f0c cmn.w r0, ip\n",
+ " 44: f240 0c03 movw ip, #3\n",
+ " 48: f2c0 0c01 movt ip, #1\n",
+ " 4c: 4561 cmp r1, ip\n",
+ " 4e: f1b0 3fff cmp.w r0, #4294967295 ; 0xffffffff\n",
+ " 52: f111 0f07 cmn.w r1, #7\n",
+ " 56: f110 0f08 cmn.w r0, #8\n",
+ " 5a: f111 0fff cmn.w r1, #255 ; 0xff\n",
+ " 5e: f510 7f80 cmn.w r0, #256 ; 0x100\n",
+ " 62: f46f 7c80 mvn.w ip, #256 ; 0x100\n",
+ " 66: 4561 cmp r1, ip\n",
+ " 68: f640 7cff movw ip, #4095 ; 0xfff\n",
+ " 6c: eb10 0f0c cmn.w r0, ip\n",
+ " 70: f511 5f80 cmn.w r1, #4096 ; 0x1000\n",
+ " 74: f46f 5c80 mvn.w ip, #4096 ; 0x1000\n",
+ " 78: 4560 cmp r0, ip\n",
+ " 7a: f241 0c02 movw ip, #4098 ; 0x1002\n",
+ " 7e: eb11 0f0c cmn.w r1, ip\n",
+ " 82: f64f 7cff movw ip, #65535 ; 0xffff\n",
+ " 86: eb10 0f0c cmn.w r0, ip\n",
+ " 8a: f511 3f80 cmn.w r1, #65536 ; 0x10000\n",
+ " 8e: f110 1f01 cmn.w r0, #65537 ; 0x10001\n",
+ " 92: f06f 1c01 mvn.w ip, #65537 ; 0x10001\n",
+ " 96: 4561 cmp r1, ip\n",
+ " 98: f64f 7cfd movw ip, #65533 ; 0xfffd\n",
+ " 9c: f6cf 7cfe movt ip, #65534 ; 0xfffe\n",
+ " a0: 4560 cmp r0, ip\n",
+ " a2: f1b8 0f00 cmp.w r8, #0\n",
+ " a6: f1b9 0f01 cmp.w r9, #1\n",
+ " aa: f1b8 0f07 cmp.w r8, #7\n",
+ " ae: f1b9 0f08 cmp.w r9, #8\n",
+ " b2: f1b8 0fff cmp.w r8, #255 ; 0xff\n",
+ " b6: f5b9 7f80 cmp.w r9, #256 ; 0x100\n",
+ " ba: f46f 7c80 mvn.w ip, #256 ; 0x100\n",
+ " be: eb18 0f0c cmn.w r8, ip\n",
+ " c2: f640 7cff movw ip, #4095 ; 0xfff\n",
+ " c6: 45e1 cmp r9, ip\n",
+ " c8: f5b8 5f80 cmp.w r8, #4096 ; 0x1000\n",
+ " cc: f46f 5c80 mvn.w ip, #4096 ; 0x1000\n",
+ " d0: eb19 0f0c cmn.w r9, ip\n",
+ " d4: f241 0c02 movw ip, #4098 ; 0x1002\n",
+ " d8: 45e0 cmp r8, ip\n",
+ " da: f64f 7cff movw ip, #65535 ; 0xffff\n",
+ " de: 45e1 cmp r9, ip\n",
+ " e0: f5b8 3f80 cmp.w r8, #65536 ; 0x10000\n",
+ " e4: f1b9 1f01 cmp.w r9, #65537 ; 0x10001\n",
+ " e8: f06f 1c01 mvn.w ip, #65537 ; 0x10001\n",
+ " ec: eb18 0f0c cmn.w r8, ip\n",
+ " f0: f240 0c03 movw ip, #3\n",
+ " f4: f2c0 0c01 movt ip, #1\n",
+ " f8: 45e1 cmp r9, ip\n",
+ " fa: f1b8 3fff cmp.w r8, #4294967295 ; 0xffffffff\n",
+ " fe: f119 0f07 cmn.w r9, #7\n",
+ " 102: f118 0f08 cmn.w r8, #8\n",
+ " 106: f119 0fff cmn.w r9, #255 ; 0xff\n",
+ " 10a: f518 7f80 cmn.w r8, #256 ; 0x100\n",
+ " 10e: f46f 7c80 mvn.w ip, #256 ; 0x100\n",
+ " 112: 45e1 cmp r9, ip\n",
+ " 114: f640 7cff movw ip, #4095 ; 0xfff\n",
+ " 118: eb18 0f0c cmn.w r8, ip\n",
+ " 11c: f519 5f80 cmn.w r9, #4096 ; 0x1000\n",
+ " 120: f46f 5c80 mvn.w ip, #4096 ; 0x1000\n",
+ " 124: 45e0 cmp r8, ip\n",
+ " 126: f241 0c02 movw ip, #4098 ; 0x1002\n",
+ " 12a: eb19 0f0c cmn.w r9, ip\n",
+ " 12e: f64f 7cff movw ip, #65535 ; 0xffff\n",
+ " 132: eb18 0f0c cmn.w r8, ip\n",
+ " 136: f519 3f80 cmn.w r9, #65536 ; 0x10000\n",
+ " 13a: f118 1f01 cmn.w r8, #65537 ; 0x10001\n",
+ " 13e: f06f 1c01 mvn.w ip, #65537 ; 0x10001\n",
+ " 142: 45e1 cmp r9, ip\n",
+ " 144: f64f 7cfd movw ip, #65533 ; 0xfffd\n",
+ " 148: f6cf 7cfe movt ip, #65534 ; 0xfffe\n",
+ " 14c: 45e0 cmp r8, ip\n",
+ nullptr
+};
+
std::map<std::string, const char* const*> test_results;
void setup_results() {
test_results["SimpleMov"] = SimpleMovResults;
@@ -5421,4 +5519,5 @@ void setup_results() {
test_results["LoadStoreLimits"] = LoadStoreLimitsResults;
test_results["CompareAndBranch"] = CompareAndBranchResults;
test_results["AddConstant"] = AddConstantResults;
+ test_results["CmpConstant"] = CmpConstantResults;
}
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index 0dc307c9ac..ac9c097892 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -1035,6 +1035,22 @@ void MipsAssembler::Movt(Register rd, Register rs, int cc) {
EmitR(0, rs, static_cast<Register>((cc << 2) | 1), rd, 0, 0x01);
}
+void MipsAssembler::TruncLS(FRegister fd, FRegister fs) {
+ EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x09);
+}
+
+void MipsAssembler::TruncLD(FRegister fd, FRegister fs) {
+ EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x09);
+}
+
+void MipsAssembler::TruncWS(FRegister fd, FRegister fs) {
+ EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x0D);
+}
+
+void MipsAssembler::TruncWD(FRegister fd, FRegister fs) {
+ EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x0D);
+}
+
void MipsAssembler::Cvtsw(FRegister fd, FRegister fs) {
EmitFR(0x11, 0x14, static_cast<FRegister>(0), fs, fd, 0x20);
}
@@ -1051,6 +1067,14 @@ void MipsAssembler::Cvtds(FRegister fd, FRegister fs) {
EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x21);
}
+void MipsAssembler::Cvtsl(FRegister fd, FRegister fs) {
+ EmitFR(0x11, 0x15, static_cast<FRegister>(0), fs, fd, 0x20);
+}
+
+void MipsAssembler::Cvtdl(FRegister fd, FRegister fs) {
+ EmitFR(0x11, 0x15, static_cast<FRegister>(0), fs, fd, 0x21);
+}
+
void MipsAssembler::Mfc1(Register rt, FRegister fs) {
EmitFR(0x11, 0x00, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0);
}
@@ -1067,6 +1091,24 @@ void MipsAssembler::Mthc1(Register rt, FRegister fs) {
EmitFR(0x11, 0x07, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0);
}
+void MipsAssembler::MoveFromFpuHigh(Register rt, FRegister fs) {
+ if (Is32BitFPU()) {
+ CHECK_EQ(fs % 2, 0) << fs;
+ Mfc1(rt, static_cast<FRegister>(fs + 1));
+ } else {
+ Mfhc1(rt, fs);
+ }
+}
+
+void MipsAssembler::MoveToFpuHigh(Register rt, FRegister fs) {
+ if (Is32BitFPU()) {
+ CHECK_EQ(fs % 2, 0) << fs;
+ Mtc1(rt, static_cast<FRegister>(fs + 1));
+ } else {
+ Mthc1(rt, fs);
+ }
+}
+
void MipsAssembler::Lwc1(FRegister ft, Register rs, uint16_t imm16) {
EmitI(0x31, rs, static_cast<Register>(ft), imm16);
}
@@ -1213,10 +1255,10 @@ void MipsAssembler::LoadDConst64(FRegister rd, int64_t value, Register temp) {
Mtc1(temp, rd);
}
if (high == 0) {
- Mthc1(ZERO, rd);
+ MoveToFpuHigh(ZERO, rd);
} else {
LoadConst32(temp, high);
- Mthc1(temp, rd);
+ MoveToFpuHigh(temp, rd);
}
}
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index 066e7b0014..01c6490f88 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -265,15 +265,23 @@ class MipsAssembler FINAL : public Assembler {
void Movf(Register rd, Register rs, int cc); // R2
void Movt(Register rd, Register rs, int cc); // R2
+ void TruncLS(FRegister fd, FRegister fs); // R2+, FR=1
+ void TruncLD(FRegister fd, FRegister fs); // R2+, FR=1
+ void TruncWS(FRegister fd, FRegister fs);
+ void TruncWD(FRegister fd, FRegister fs);
void Cvtsw(FRegister fd, FRegister fs);
void Cvtdw(FRegister fd, FRegister fs);
void Cvtsd(FRegister fd, FRegister fs);
void Cvtds(FRegister fd, FRegister fs);
+ void Cvtsl(FRegister fd, FRegister fs); // R2+, FR=1
+ void Cvtdl(FRegister fd, FRegister fs); // R2+, FR=1
void Mfc1(Register rt, FRegister fs);
void Mtc1(Register rt, FRegister fs);
void Mfhc1(Register rt, FRegister fs);
void Mthc1(Register rt, FRegister fs);
+ void MoveFromFpuHigh(Register rt, FRegister fs);
+ void MoveToFpuHigh(Register rt, FRegister fs);
void Lwc1(FRegister ft, Register rs, uint16_t imm16);
void Ldc1(FRegister ft, Register rs, uint16_t imm16);
void Swc1(FRegister ft, Register rs, uint16_t imm16);
diff --git a/compiler/utils/mips/assembler_mips_test.cc b/compiler/utils/mips/assembler_mips_test.cc
index 4361843c54..5fc3deebd3 100644
--- a/compiler/utils/mips/assembler_mips_test.cc
+++ b/compiler/utils/mips/assembler_mips_test.cc
@@ -599,6 +599,14 @@ TEST_F(AssemblerMIPSTest, CvtDW) {
DriverStr(RepeatFF(&mips::MipsAssembler::Cvtdw, "cvt.d.w ${reg1}, ${reg2}"), "CvtDW");
}
+TEST_F(AssemblerMIPSTest, CvtSL) {
+ DriverStr(RepeatFF(&mips::MipsAssembler::Cvtsl, "cvt.s.l ${reg1}, ${reg2}"), "CvtSL");
+}
+
+TEST_F(AssemblerMIPSTest, CvtDL) {
+ DriverStr(RepeatFF(&mips::MipsAssembler::Cvtdl, "cvt.d.l ${reg1}, ${reg2}"), "CvtDL");
+}
+
TEST_F(AssemblerMIPSTest, CvtSD) {
DriverStr(RepeatFF(&mips::MipsAssembler::Cvtsd, "cvt.s.d ${reg1}, ${reg2}"), "CvtSD");
}
@@ -607,6 +615,22 @@ TEST_F(AssemblerMIPSTest, CvtDS) {
DriverStr(RepeatFF(&mips::MipsAssembler::Cvtds, "cvt.d.s ${reg1}, ${reg2}"), "CvtDS");
}
+TEST_F(AssemblerMIPSTest, TruncWS) {
+ DriverStr(RepeatFF(&mips::MipsAssembler::TruncWS, "trunc.w.s ${reg1}, ${reg2}"), "TruncWS");
+}
+
+TEST_F(AssemblerMIPSTest, TruncWD) {
+ DriverStr(RepeatFF(&mips::MipsAssembler::TruncWD, "trunc.w.d ${reg1}, ${reg2}"), "TruncWD");
+}
+
+TEST_F(AssemblerMIPSTest, TruncLS) {
+ DriverStr(RepeatFF(&mips::MipsAssembler::TruncLS, "trunc.l.s ${reg1}, ${reg2}"), "TruncLS");
+}
+
+TEST_F(AssemblerMIPSTest, TruncLD) {
+ DriverStr(RepeatFF(&mips::MipsAssembler::TruncLD, "trunc.l.d ${reg1}, ${reg2}"), "TruncLD");
+}
+
TEST_F(AssemblerMIPSTest, Mfc1) {
DriverStr(RepeatRF(&mips::MipsAssembler::Mfc1, "mfc1 ${reg1}, ${reg2}"), "Mfc1");
}
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index cfd8421e93..f9ff2df8bb 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -771,6 +771,22 @@ void Mips64Assembler::RoundWD(FpuRegister fd, FpuRegister fs) {
EmitFR(0x11, 0x11, static_cast<FpuRegister>(0), fs, fd, 0xc);
}
+void Mips64Assembler::TruncLS(FpuRegister fd, FpuRegister fs) {
+ EmitFR(0x11, 0x10, static_cast<FpuRegister>(0), fs, fd, 0x9);
+}
+
+void Mips64Assembler::TruncLD(FpuRegister fd, FpuRegister fs) {
+ EmitFR(0x11, 0x11, static_cast<FpuRegister>(0), fs, fd, 0x9);
+}
+
+void Mips64Assembler::TruncWS(FpuRegister fd, FpuRegister fs) {
+ EmitFR(0x11, 0x10, static_cast<FpuRegister>(0), fs, fd, 0xd);
+}
+
+void Mips64Assembler::TruncWD(FpuRegister fd, FpuRegister fs) {
+ EmitFR(0x11, 0x11, static_cast<FpuRegister>(0), fs, fd, 0xd);
+}
+
void Mips64Assembler::CeilLS(FpuRegister fd, FpuRegister fs) {
EmitFR(0x11, 0x10, static_cast<FpuRegister>(0), fs, fd, 0xa);
}
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index 883f013f87..3262640ce7 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -250,6 +250,10 @@ class Mips64Assembler FINAL : public Assembler {
void RoundLD(FpuRegister fd, FpuRegister fs);
void RoundWS(FpuRegister fd, FpuRegister fs);
void RoundWD(FpuRegister fd, FpuRegister fs);
+ void TruncLS(FpuRegister fd, FpuRegister fs);
+ void TruncLD(FpuRegister fd, FpuRegister fs);
+ void TruncWS(FpuRegister fd, FpuRegister fs);
+ void TruncWD(FpuRegister fd, FpuRegister fs);
void CeilLS(FpuRegister fd, FpuRegister fs);
void CeilLD(FpuRegister fd, FpuRegister fs);
void CeilWS(FpuRegister fd, FpuRegister fs);
diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc
index bac4375b35..7d79be2731 100644
--- a/compiler/utils/mips64/assembler_mips64_test.cc
+++ b/compiler/utils/mips64/assembler_mips64_test.cc
@@ -527,6 +527,22 @@ TEST_F(AssemblerMIPS64Test, CvtSW) {
DriverStr(RepeatFF(&mips64::Mips64Assembler::Cvtsw, "cvt.s.w ${reg1}, ${reg2}"), "cvt.s.w");
}
+TEST_F(AssemblerMIPS64Test, TruncWS) {
+ DriverStr(RepeatFF(&mips64::Mips64Assembler::TruncWS, "trunc.w.s ${reg1}, ${reg2}"), "trunc.w.s");
+}
+
+TEST_F(AssemblerMIPS64Test, TruncWD) {
+ DriverStr(RepeatFF(&mips64::Mips64Assembler::TruncWD, "trunc.w.d ${reg1}, ${reg2}"), "trunc.w.d");
+}
+
+TEST_F(AssemblerMIPS64Test, TruncLS) {
+ DriverStr(RepeatFF(&mips64::Mips64Assembler::TruncLS, "trunc.l.s ${reg1}, ${reg2}"), "trunc.l.s");
+}
+
+TEST_F(AssemblerMIPS64Test, TruncLD) {
+ DriverStr(RepeatFF(&mips64::Mips64Assembler::TruncLD, "trunc.l.d ${reg1}, ${reg2}"), "trunc.l.d");
+}
+
////////////////
// CALL / JMP //
////////////////
diff --git a/compiler/utils/swap_space.cc b/compiler/utils/swap_space.cc
index 42ed8810f8..244a5fedbe 100644
--- a/compiler/utils/swap_space.cc
+++ b/compiler/utils/swap_space.cc
@@ -18,6 +18,7 @@
#include <algorithm>
#include <numeric>
+#include <sys/mman.h>
#include "base/logging.h"
#include "base/macros.h"
@@ -44,23 +45,17 @@ static void DumpFreeMap(const FreeBySizeSet& free_by_size) {
}
}
-template <typename FreeByStartSet, typename FreeBySizeSet>
-static void RemoveChunk(FreeByStartSet* free_by_start,
- FreeBySizeSet* free_by_size,
- typename FreeBySizeSet::const_iterator free_by_size_pos) {
+void SwapSpace::RemoveChunk(FreeBySizeSet::const_iterator free_by_size_pos) {
auto free_by_start_pos = free_by_size_pos->second;
- free_by_size->erase(free_by_size_pos);
- free_by_start->erase(free_by_start_pos);
+ free_by_size_.erase(free_by_size_pos);
+ free_by_start_.erase(free_by_start_pos);
}
-template <typename FreeByStartSet, typename FreeBySizeSet>
-static void InsertChunk(FreeByStartSet* free_by_start,
- FreeBySizeSet* free_by_size,
- const SpaceChunk& chunk) {
+inline void SwapSpace::InsertChunk(const SpaceChunk& chunk) {
DCHECK_NE(chunk.size, 0u);
- auto insert_result = free_by_start->insert(chunk);
+ auto insert_result = free_by_start_.insert(chunk);
DCHECK(insert_result.second);
- free_by_size->emplace(chunk.size, insert_result.first);
+ free_by_size_.emplace(chunk.size, insert_result.first);
}
SwapSpace::SwapSpace(int fd, size_t initial_size)
@@ -69,10 +64,18 @@ SwapSpace::SwapSpace(int fd, size_t initial_size)
lock_("SwapSpace lock", static_cast<LockLevel>(LockLevel::kDefaultMutexLevel - 1)) {
// Assume that the file is unlinked.
- InsertChunk(&free_by_start_, &free_by_size_, NewFileChunk(initial_size));
+ InsertChunk(NewFileChunk(initial_size));
}
SwapSpace::~SwapSpace() {
+ // Unmap all mmapped chunks. Nothing should be allocated anymore at
+ // this point, so there should be only full size chunks in free_by_start_.
+ for (const SpaceChunk& chunk : free_by_start_) {
+ if (munmap(chunk.ptr, chunk.size) != 0) {
+ PLOG(ERROR) << "Failed to unmap swap space chunk at "
+ << static_cast<const void*>(chunk.ptr) << " size=" << chunk.size;
+ }
+ }
// All arenas are backed by the same file. Just close the descriptor.
close(fd_);
}
@@ -113,7 +116,7 @@ void* SwapSpace::Alloc(size_t size) {
: free_by_size_.lower_bound(FreeBySizeEntry { size, free_by_start_.begin() });
if (it != free_by_size_.end()) {
old_chunk = *it->second;
- RemoveChunk(&free_by_start_, &free_by_size_, it);
+ RemoveChunk(it);
} else {
// Not a big enough free chunk, need to increase file size.
old_chunk = NewFileChunk(size);
@@ -124,13 +127,13 @@ void* SwapSpace::Alloc(size_t size) {
if (old_chunk.size != size) {
// Insert the remainder.
SpaceChunk new_chunk = { old_chunk.ptr + size, old_chunk.size - size };
- InsertChunk(&free_by_start_, &free_by_size_, new_chunk);
+ InsertChunk(new_chunk);
}
return ret;
}
-SpaceChunk SwapSpace::NewFileChunk(size_t min_size) {
+SwapSpace::SpaceChunk SwapSpace::NewFileChunk(size_t min_size) {
#if !defined(__APPLE__)
size_t next_part = std::max(RoundUp(min_size, kPageSize), RoundUp(kMininumMapSize, kPageSize));
int result = TEMP_FAILURE_RETRY(ftruncate64(fd_, size_ + next_part));
@@ -159,7 +162,7 @@ SpaceChunk SwapSpace::NewFileChunk(size_t min_size) {
}
// TODO: Full coalescing.
-void SwapSpace::Free(void* ptrV, size_t size) {
+void SwapSpace::Free(void* ptr, size_t size) {
MutexLock lock(Thread::Current(), lock_);
size = RoundUp(size, 8U);
@@ -168,7 +171,7 @@ void SwapSpace::Free(void* ptrV, size_t size) {
free_before = CollectFree(free_by_start_, free_by_size_);
}
- SpaceChunk chunk = { reinterpret_cast<uint8_t*>(ptrV), size };
+ SpaceChunk chunk = { reinterpret_cast<uint8_t*>(ptr), size };
auto it = free_by_start_.lower_bound(chunk);
if (it != free_by_start_.begin()) {
auto prev = it;
@@ -180,7 +183,7 @@ void SwapSpace::Free(void* ptrV, size_t size) {
chunk.ptr -= prev->size;
auto erase_pos = free_by_size_.find(FreeBySizeEntry { prev->size, prev });
DCHECK(erase_pos != free_by_size_.end());
- RemoveChunk(&free_by_start_, &free_by_size_, erase_pos);
+ RemoveChunk(erase_pos);
// "prev" is invalidated but "it" remains valid.
}
}
@@ -191,11 +194,11 @@ void SwapSpace::Free(void* ptrV, size_t size) {
chunk.size += it->size;
auto erase_pos = free_by_size_.find(FreeBySizeEntry { it->size, it });
DCHECK(erase_pos != free_by_size_.end());
- RemoveChunk(&free_by_start_, &free_by_size_, erase_pos);
+ RemoveChunk(erase_pos);
// "it" is invalidated but we don't need it anymore.
}
}
- InsertChunk(&free_by_start_, &free_by_size_, chunk);
+ InsertChunk(chunk);
if (kCheckFreeMaps) {
size_t free_after = CollectFree(free_by_start_, free_by_size_);
diff --git a/compiler/utils/swap_space.h b/compiler/utils/swap_space.h
index 9127b6b096..b659f1d3c7 100644
--- a/compiler/utils/swap_space.h
+++ b/compiler/utils/swap_space.h
@@ -19,42 +19,17 @@
#include <cstdlib>
#include <list>
+#include <vector>
#include <set>
#include <stdint.h>
#include <stddef.h>
-#include "base/debug_stack.h"
#include "base/logging.h"
#include "base/macros.h"
#include "base/mutex.h"
-#include "mem_map.h"
namespace art {
-// Chunk of space.
-struct SpaceChunk {
- uint8_t* ptr;
- size_t size;
-
- uintptr_t Start() const {
- return reinterpret_cast<uintptr_t>(ptr);
- }
- uintptr_t End() const {
- return reinterpret_cast<uintptr_t>(ptr) + size;
- }
-};
-
-inline bool operator==(const SpaceChunk& lhs, const SpaceChunk& rhs) {
- return (lhs.size == rhs.size) && (lhs.ptr == rhs.ptr);
-}
-
-class SortChunkByPtr {
- public:
- bool operator()(const SpaceChunk& a, const SpaceChunk& b) const {
- return reinterpret_cast<uintptr_t>(a.ptr) < reinterpret_cast<uintptr_t>(b.ptr);
- }
-};
-
// An arena pool that creates arenas backed by an mmaped file.
class SwapSpace {
public:
@@ -68,17 +43,27 @@ class SwapSpace {
}
private:
- SpaceChunk NewFileChunk(size_t min_size) REQUIRES(lock_);
+ // Chunk of space.
+ struct SpaceChunk {
+ uint8_t* ptr;
+ size_t size;
- int fd_;
- size_t size_;
- std::list<SpaceChunk> maps_;
+ uintptr_t Start() const {
+ return reinterpret_cast<uintptr_t>(ptr);
+ }
+ uintptr_t End() const {
+ return reinterpret_cast<uintptr_t>(ptr) + size;
+ }
+ };
- // NOTE: Boost.Bimap would be useful for the two following members.
+ class SortChunkByPtr {
+ public:
+ bool operator()(const SpaceChunk& a, const SpaceChunk& b) const {
+ return reinterpret_cast<uintptr_t>(a.ptr) < reinterpret_cast<uintptr_t>(b.ptr);
+ }
+ };
- // Map start of a free chunk to its size.
typedef std::set<SpaceChunk, SortChunkByPtr> FreeByStartSet;
- FreeByStartSet free_by_start_ GUARDED_BY(lock_);
// Map size to an iterator to free_by_start_'s entry.
typedef std::pair<size_t, FreeByStartSet::const_iterator> FreeBySizeEntry;
@@ -92,6 +77,21 @@ class SwapSpace {
}
};
typedef std::set<FreeBySizeEntry, FreeBySizeComparator> FreeBySizeSet;
+
+ SpaceChunk NewFileChunk(size_t min_size) REQUIRES(lock_);
+
+ void RemoveChunk(FreeBySizeSet::const_iterator free_by_size_pos) REQUIRES(lock_);
+ void InsertChunk(const SpaceChunk& chunk) REQUIRES(lock_);
+
+ int fd_;
+ size_t size_;
+ std::list<SpaceChunk> maps_;
+
+ // NOTE: Boost.Bimap would be useful for the two following members.
+
+ // Map start of a free chunk to its size.
+ FreeByStartSet free_by_start_ GUARDED_BY(lock_);
+ // Free chunks ordered by size.
FreeBySizeSet free_by_size_ GUARDED_BY(lock_);
mutable Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
@@ -126,6 +126,9 @@ class SwapAllocator<void> {
template <typename U>
friend class SwapAllocator;
+
+ template <typename U>
+ friend bool operator==(const SwapAllocator<U>& lhs, const SwapAllocator<U>& rhs);
};
template <typename T>
@@ -201,9 +204,22 @@ class SwapAllocator {
template <typename U>
friend class SwapAllocator;
+
+ template <typename U>
+ friend bool operator==(const SwapAllocator<U>& lhs, const SwapAllocator<U>& rhs);
};
template <typename T>
+inline bool operator==(const SwapAllocator<T>& lhs, const SwapAllocator<T>& rhs) {
+ return lhs.swap_space_ == rhs.swap_space_;
+}
+
+template <typename T>
+inline bool operator!=(const SwapAllocator<T>& lhs, const SwapAllocator<T>& rhs) {
+ return !(lhs == rhs);
+}
+
+template <typename T>
using SwapVector = std::vector<T, SwapAllocator<T>>;
template <typename T, typename Comparator>
using SwapSet = std::set<T, Comparator, SwapAllocator<T>>;