summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'compiler')
-rw-r--r--compiler/Android.mk10
-rw-r--r--compiler/cfi_test.h139
-rw-r--r--compiler/common_compiler_test.cc1
-rw-r--r--compiler/compiled_method.cc39
-rw-r--r--compiler/compiled_method.h208
-rw-r--r--compiler/compiler.h3
-rw-r--r--compiler/dex/bb_optimizations.h7
-rw-r--r--compiler/dex/compiler_enums.h18
-rw-r--r--compiler/dex/gvn_dead_code_elimination.cc1
-rw-r--r--compiler/dex/local_value_numbering.cc6
-rw-r--r--compiler/dex/mir_dataflow.cc28
-rw-r--r--compiler/dex/mir_field_info.cc1
-rw-r--r--compiler/dex/mir_graph.cc13
-rw-r--r--compiler/dex/mir_graph.h18
-rw-r--r--compiler/dex/mir_method_info.cc24
-rw-r--r--compiler/dex/mir_method_info.h14
-rw-r--r--compiler/dex/mir_optimization.cc60
-rw-r--r--compiler/dex/mir_optimization_test.cc1
-rw-r--r--compiler/dex/quick/arm/assemble_arm.cc2
-rw-r--r--compiler/dex/quick/arm/call_arm.cc165
-rw-r--r--compiler/dex/quick/arm/codegen_arm.h21
-rw-r--r--compiler/dex/quick/arm/int_arm.cc30
-rw-r--r--compiler/dex/quick/arm/target_arm.cc30
-rw-r--r--compiler/dex/quick/arm/utility_arm.cc36
-rw-r--r--compiler/dex/quick/arm64/arm64_lir.h1
-rw-r--r--compiler/dex/quick/arm64/assemble_arm64.cc6
-rw-r--r--compiler/dex/quick/arm64/call_arm64.cc77
-rw-r--r--compiler/dex/quick/arm64/codegen_arm64.h10
-rw-r--r--compiler/dex/quick/arm64/int_arm64.cc65
-rw-r--r--compiler/dex/quick/arm64/target_arm64.cc18
-rw-r--r--compiler/dex/quick/arm64/utility_arm64.cc5
-rw-r--r--compiler/dex/quick/codegen_util.cc68
-rw-r--r--compiler/dex/quick/dex_file_method_inliner.cc11
-rw-r--r--compiler/dex/quick/dex_file_method_inliner.h5
-rw-r--r--compiler/dex/quick/gen_common.cc536
-rwxr-xr-xcompiler/dex/quick/gen_invoke.cc11
-rw-r--r--compiler/dex/quick/gen_loadstore.cc14
-rw-r--r--compiler/dex/quick/lazy_debug_frame_opcode_writer.cc58
-rw-r--r--compiler/dex/quick/lazy_debug_frame_opcode_writer.h69
-rw-r--r--compiler/dex/quick/local_optimizations.cc9
-rw-r--r--compiler/dex/quick/mips/call_mips.cc132
-rw-r--r--compiler/dex/quick/mips/int_mips.cc46
-rw-r--r--compiler/dex/quick/mips/target_mips.cc8
-rw-r--r--compiler/dex/quick/mips/utility_mips.cc4
-rw-r--r--compiler/dex/quick/mir_to_lir.cc7
-rw-r--r--compiler/dex/quick/mir_to_lir.h80
-rw-r--r--compiler/dex/quick/quick_cfi_test.cc139
-rw-r--r--compiler/dex/quick/quick_cfi_test_expected.inc217
-rw-r--r--compiler/dex/quick/quick_compiler.cc17
-rw-r--r--compiler/dex/quick/quick_compiler.h2
-rw-r--r--compiler/dex/quick/ralloc_util.cc164
-rw-r--r--compiler/dex/quick/x86/assemble_x86.cc45
-rw-r--r--compiler/dex/quick/x86/call_x86.cc102
-rw-r--r--compiler/dex/quick/x86/codegen_x86.h99
-rwxr-xr-xcompiler/dex/quick/x86/fp_x86.cc18
-rwxr-xr-xcompiler/dex/quick/x86/int_x86.cc112
-rwxr-xr-xcompiler/dex/quick/x86/target_x86.cc206
-rw-r--r--compiler/dex/quick/x86/utility_x86.cc242
-rw-r--r--compiler/dex/quick/x86/x86_lir.h3
-rw-r--r--compiler/driver/compiler_driver.cc65
-rw-r--r--compiler/driver/compiler_driver.h20
-rw-r--r--compiler/driver/compiler_options.cc5
-rw-r--r--compiler/driver/compiler_options.h1
-rw-r--r--compiler/dwarf/debug_frame_opcode_writer.h333
-rw-r--r--compiler/dwarf/debug_info_entry_writer.h248
-rw-r--r--compiler/dwarf/debug_line_opcode_writer.h243
-rw-r--r--compiler/dwarf/dwarf_test.cc281
-rw-r--r--compiler/dwarf/dwarf_test.h220
-rw-r--r--compiler/dwarf/headers.h167
-rw-r--r--compiler/dwarf/register.h58
-rw-r--r--compiler/dwarf/writer.h173
-rw-r--r--compiler/elf_builder.h2
-rw-r--r--compiler/elf_writer_debug.cc360
-rw-r--r--compiler/elf_writer_debug.h39
-rw-r--r--compiler/elf_writer_quick.cc693
-rw-r--r--compiler/image_writer.cc62
-rw-r--r--compiler/image_writer.h24
-rw-r--r--compiler/jni/jni_cfi_test.cc93
-rw-r--r--compiler/jni/jni_cfi_test_expected.inc505
-rw-r--r--compiler/jni/quick/jni_compiler.cc32
-rw-r--r--compiler/jni/quick/mips64/calling_convention_mips64.cc7
-rw-r--r--compiler/linker/arm/relative_patcher_arm_base.cc182
-rw-r--r--compiler/linker/arm/relative_patcher_arm_base.h69
-rw-r--r--compiler/linker/arm/relative_patcher_thumb2.cc118
-rw-r--r--compiler/linker/arm/relative_patcher_thumb2.h58
-rw-r--r--compiler/linker/arm/relative_patcher_thumb2_test.cc351
-rw-r--r--compiler/linker/arm64/relative_patcher_arm64.cc322
-rw-r--r--compiler/linker/arm64/relative_patcher_arm64.h74
-rw-r--r--compiler/linker/arm64/relative_patcher_arm64_test.cc582
-rw-r--r--compiler/linker/relative_patcher.cc113
-rw-r--r--compiler/linker/relative_patcher.h126
-rw-r--r--compiler/linker/relative_patcher_test.h255
-rw-r--r--compiler/linker/x86/relative_patcher_x86.cc59
-rw-r--r--compiler/linker/x86/relative_patcher_x86.h36
-rw-r--r--compiler/linker/x86/relative_patcher_x86_base.cc49
-rw-r--r--compiler/linker/x86/relative_patcher_x86_base.h50
-rw-r--r--compiler/linker/x86/relative_patcher_x86_test.cc135
-rw-r--r--compiler/linker/x86_64/relative_patcher_x86_64.cc37
-rw-r--r--compiler/linker/x86_64/relative_patcher_x86_64.h36
-rw-r--r--compiler/linker/x86_64/relative_patcher_x86_64_test.cc136
-rw-r--r--compiler/oat_test.cc5
-rw-r--r--compiler/oat_writer.cc535
-rw-r--r--compiler/oat_writer.h78
-rw-r--r--compiler/optimizing/boolean_simplifier.cc14
-rw-r--r--compiler/optimizing/bounds_check_elimination.cc124
-rw-r--r--compiler/optimizing/bounds_check_elimination_test.cc32
-rw-r--r--compiler/optimizing/builder.cc29
-rw-r--r--compiler/optimizing/builder.h6
-rw-r--r--compiler/optimizing/code_generator.cc29
-rw-r--r--compiler/optimizing/code_generator.h5
-rw-r--r--compiler/optimizing/code_generator_arm.cc132
-rw-r--r--compiler/optimizing/code_generator_arm.h4
-rw-r--r--compiler/optimizing/code_generator_arm64.cc174
-rw-r--r--compiler/optimizing/code_generator_arm64.h10
-rw-r--r--compiler/optimizing/code_generator_x86.cc495
-rw-r--r--compiler/optimizing/code_generator_x86.h17
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc349
-rw-r--r--compiler/optimizing/code_generator_x86_64.h28
-rw-r--r--compiler/optimizing/codegen_test.cc18
-rw-r--r--compiler/optimizing/common_arm64.h4
-rw-r--r--compiler/optimizing/constant_folding_test.cc5
-rw-r--r--compiler/optimizing/dead_code_elimination_test.cc5
-rw-r--r--compiler/optimizing/graph_visualizer.cc10
-rw-r--r--compiler/optimizing/graph_visualizer.h4
-rw-r--r--compiler/optimizing/intrinsics.cc6
-rw-r--r--compiler/optimizing/intrinsics_arm.cc4
-rw-r--r--compiler/optimizing/intrinsics_arm64.cc4
-rw-r--r--compiler/optimizing/intrinsics_x86.cc356
-rw-r--r--compiler/optimizing/intrinsics_x86.h3
-rw-r--r--compiler/optimizing/intrinsics_x86_64.cc441
-rw-r--r--compiler/optimizing/intrinsics_x86_64.h3
-rw-r--r--compiler/optimizing/linearize_test.cc5
-rw-r--r--compiler/optimizing/live_ranges_test.cc25
-rw-r--r--compiler/optimizing/liveness_test.cc5
-rw-r--r--compiler/optimizing/nodes.cc4
-rw-r--r--compiler/optimizing/nodes.h43
-rw-r--r--compiler/optimizing/nodes_test.cc2
-rw-r--r--compiler/optimizing/optimizing_cfi_test.cc127
-rw-r--r--compiler/optimizing/optimizing_cfi_test_expected.inc141
-rw-r--r--compiler/optimizing/optimizing_compiler.cc46
-rw-r--r--compiler/optimizing/parallel_move_resolver.cc82
-rw-r--r--compiler/optimizing/parallel_move_resolver.h7
-rw-r--r--compiler/optimizing/parallel_move_test.cc27
-rw-r--r--compiler/optimizing/prepare_for_register_allocation.cc4
-rw-r--r--compiler/optimizing/register_allocator.cc54
-rw-r--r--compiler/optimizing/register_allocator_test.cc65
-rw-r--r--compiler/optimizing/ssa_builder.cc2
-rw-r--r--compiler/optimizing/ssa_liveness_analysis.cc32
-rw-r--r--compiler/optimizing/ssa_liveness_analysis.h25
-rw-r--r--compiler/optimizing/stack_map_stream.h173
-rw-r--r--compiler/optimizing/stack_map_test.cc333
-rw-r--r--compiler/utils/arm/assembler_arm.cc90
-rw-r--r--compiler/utils/arm/assembler_thumb2.cc35
-rw-r--r--compiler/utils/arm/assembler_thumb2.h8
-rw-r--r--compiler/utils/arm/assembler_thumb2_test.cc99
-rw-r--r--compiler/utils/arm/managed_register_arm.h1
-rw-r--r--compiler/utils/arm64/assembler_arm64.cc121
-rw-r--r--compiler/utils/arm64/assembler_arm64.h6
-rw-r--r--compiler/utils/arm64/managed_register_arm64.h1
-rw-r--r--compiler/utils/array_ref.h2
-rw-r--r--compiler/utils/assembler.cc3
-rw-r--r--compiler/utils/assembler.h34
-rw-r--r--compiler/utils/assembler_test.h94
-rw-r--r--compiler/utils/dex_cache_arrays_layout-inl.h77
-rw-r--r--compiler/utils/dex_cache_arrays_layout.h89
-rw-r--r--compiler/utils/dwarf_cfi.cc156
-rw-r--r--compiler/utils/dwarf_cfi.h95
-rw-r--r--compiler/utils/mips/assembler_mips.cc15
-rw-r--r--compiler/utils/mips/managed_register_mips.h1
-rw-r--r--compiler/utils/mips64/assembler_mips64.cc17
-rw-r--r--compiler/utils/mips64/managed_register_mips64.h1
-rw-r--r--compiler/utils/x86/assembler_x86.cc182
-rw-r--r--compiler/utils/x86/assembler_x86.h31
-rw-r--r--compiler/utils/x86/assembler_x86_test.cc45
-rw-r--r--compiler/utils/x86/managed_register_x86.h9
-rw-r--r--compiler/utils/x86_64/assembler_x86_64.cc168
-rw-r--r--compiler/utils/x86_64/assembler_x86_64.h102
-rw-r--r--compiler/utils/x86_64/assembler_x86_64_test.cc133
-rw-r--r--compiler/utils/x86_64/managed_register_x86_64.h16
179 files changed, 12157 insertions, 3505 deletions
diff --git a/compiler/Android.mk b/compiler/Android.mk
index c663fcbf89..ac95abdd8d 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -41,6 +41,7 @@ LIBART_COMPILER_SRC_FILES := \
dex/quick/gen_common.cc \
dex/quick/gen_invoke.cc \
dex/quick/gen_loadstore.cc \
+ dex/quick/lazy_debug_frame_opcode_writer.cc \
dex/quick/local_optimizations.cc \
dex/quick/mips/assemble_mips.cc \
dex/quick/mips/call_mips.cc \
@@ -79,6 +80,13 @@ LIBART_COMPILER_SRC_FILES := \
driver/compiler_driver.cc \
driver/compiler_options.cc \
driver/dex_compilation_unit.cc \
+ linker/relative_patcher.cc \
+ linker/arm/relative_patcher_arm_base.cc \
+ linker/arm/relative_patcher_thumb2.cc \
+ linker/arm64/relative_patcher_arm64.cc \
+ linker/x86/relative_patcher_x86_base.cc \
+ linker/x86/relative_patcher_x86.cc \
+ linker/x86_64/relative_patcher_x86_64.cc \
jit/jit_compiler.cc \
jni/quick/arm/calling_convention_arm.cc \
jni/quick/arm64/calling_convention_arm64.cc \
@@ -132,7 +140,6 @@ LIBART_COMPILER_SRC_FILES := \
utils/arm64/assembler_arm64.cc \
utils/arm64/managed_register_arm64.cc \
utils/assembler.cc \
- utils/dwarf_cfi.cc \
utils/mips/assembler_mips.cc \
utils/mips/managed_register_mips.cc \
utils/mips64/assembler_mips64.cc \
@@ -145,6 +152,7 @@ LIBART_COMPILER_SRC_FILES := \
buffered_output_stream.cc \
compiler.cc \
elf_writer.cc \
+ elf_writer_debug.cc \
elf_writer_quick.cc \
file_output_stream.cc \
image_writer.cc \
diff --git a/compiler/cfi_test.h b/compiler/cfi_test.h
new file mode 100644
index 0000000000..918179290b
--- /dev/null
+++ b/compiler/cfi_test.h
@@ -0,0 +1,139 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_CFI_TEST_H_
+#define ART_COMPILER_CFI_TEST_H_
+
+#include <vector>
+#include <memory>
+#include <sstream>
+
+#include "arch/instruction_set.h"
+#include "dwarf/dwarf_test.h"
+#include "dwarf/headers.h"
+#include "disassembler/disassembler.h"
+#include "gtest/gtest.h"
+
+namespace art {
+
+class CFITest : public dwarf::DwarfTest {
+ public:
+ void GenerateExpected(FILE* f, InstructionSet isa, const char* isa_str,
+ const std::vector<uint8_t>& actual_asm,
+ const std::vector<uint8_t>& actual_cfi) {
+ std::vector<std::string> lines;
+ // Print the raw bytes.
+ fprintf(f, "static constexpr uint8_t expected_asm_%s[] = {", isa_str);
+ HexDump(f, actual_asm);
+ fprintf(f, "\n};\n");
+ fprintf(f, "static constexpr uint8_t expected_cfi_%s[] = {", isa_str);
+ HexDump(f, actual_cfi);
+ fprintf(f, "\n};\n");
+ // Pretty-print CFI opcodes.
+ constexpr bool is64bit = false;
+ dwarf::DebugFrameOpCodeWriter<> initial_opcodes;
+ dwarf::WriteEhFrameCIE(is64bit, dwarf::Reg(8), initial_opcodes, &eh_frame_data_);
+ dwarf::WriteEhFrameFDE(is64bit, 0, 0, actual_asm.size(), &actual_cfi, &eh_frame_data_);
+ ReformatCfi(Objdump(false, "-W"), &lines);
+ // Pretty-print assembly.
+ auto* opts = new DisassemblerOptions(false, actual_asm.data(), true);
+ std::unique_ptr<Disassembler> disasm(Disassembler::Create(isa, opts));
+ std::stringstream stream;
+ const uint8_t* base = actual_asm.data() + (isa == kThumb2 ? 1 : 0);
+ disasm->Dump(stream, base, base + actual_asm.size());
+ ReformatAsm(&stream, &lines);
+ // Print CFI and assembly interleaved.
+ std::stable_sort(lines.begin(), lines.end(), CompareByAddress);
+ for (const std::string& line : lines) {
+ fprintf(f, "// %s\n", line.c_str());
+ }
+ fprintf(f, "\n");
+ }
+
+ private:
+ // Helper - get offset just past the end of given string.
+ static size_t FindEndOf(const std::string& str, const char* substr) {
+ size_t pos = str.find(substr);
+ CHECK_NE(std::string::npos, pos);
+ return pos + strlen(substr);
+ }
+
+ // Spit to lines and remove raw instruction bytes.
+ static void ReformatAsm(std::stringstream* stream,
+ std::vector<std::string>* output) {
+ std::string line;
+ while (std::getline(*stream, line)) {
+ line = line.substr(0, FindEndOf(line, ": ")) +
+ line.substr(FindEndOf(line, "\t"));
+ size_t pos;
+ while ((pos = line.find(" ")) != std::string::npos) {
+ line = line.replace(pos, 2, " ");
+ }
+ while (!line.empty() && line.back() == ' ') {
+ line.pop_back();
+ }
+ output->push_back(line);
+ }
+ }
+
+ // Find interesting parts of objdump output and prefix the lines with address.
+ static void ReformatCfi(const std::vector<std::string>& lines,
+ std::vector<std::string>* output) {
+ std::string address;
+ for (const std::string& line : lines) {
+ if (line.find("DW_CFA_nop") != std::string::npos) {
+ // Ignore.
+ } else if (line.find("DW_CFA_advance_loc") != std::string::npos) {
+ // The last 8 characters are the address.
+ address = "0x" + line.substr(line.size() - 8);
+ } else if (line.find("DW_CFA_") != std::string::npos) {
+ std::string new_line(line);
+ // "bad register" warning is caused by always using host (x86) objdump.
+ const char* bad_reg = "bad register: ";
+ size_t pos;
+ if ((pos = new_line.find(bad_reg)) != std::string::npos) {
+ new_line = new_line.replace(pos, strlen(bad_reg), "");
+ }
+ // Remove register names in parentheses since they have x86 names.
+ if ((pos = new_line.find(" (")) != std::string::npos) {
+ new_line = new_line.replace(pos, FindEndOf(new_line, ")") - pos, "");
+ }
+ // Use the .cfi_ prefix.
+ new_line = ".cfi_" + new_line.substr(FindEndOf(new_line, "DW_CFA_"));
+ output->push_back(address + ": " + new_line);
+ }
+ }
+ }
+
+ // Compare strings by the address prefix.
+ static bool CompareByAddress(const std::string& lhs, const std::string& rhs) {
+ EXPECT_EQ(lhs[10], ':');
+ EXPECT_EQ(rhs[10], ':');
+ return strncmp(lhs.c_str(), rhs.c_str(), 10) < 0;
+ }
+
+ // Pretty-print byte array. 12 bytes per line.
+ static void HexDump(FILE* f, const std::vector<uint8_t>& data) {
+ for (size_t i = 0; i < data.size(); i++) {
+ fprintf(f, i % 12 == 0 ? "\n " : " "); // Whitespace.
+ fprintf(f, "0x%02X,", data[i]);
+ }
+ }
+};
+
+} // namespace art
+
+#endif // ART_COMPILER_CFI_TEST_H_
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc
index 1d0aad5425..96d90bb443 100644
--- a/compiler/common_compiler_test.cc
+++ b/compiler/common_compiler_test.cc
@@ -24,6 +24,7 @@
#include "dex/quick/dex_file_to_method_inliner_map.h"
#include "dex/verification_results.h"
#include "driver/compiler_driver.h"
+#include "driver/compiler_options.h"
#include "interpreter/interpreter.h"
#include "mirror/art_method.h"
#include "mirror/dex_cache.h"
diff --git a/compiler/compiled_method.cc b/compiler/compiled_method.cc
index 1849e7ef64..4f7a970fdd 100644
--- a/compiler/compiled_method.cc
+++ b/compiler/compiled_method.cc
@@ -132,7 +132,7 @@ CompiledMethod::CompiledMethod(CompilerDriver* driver,
const ArrayRef<const uint8_t>& vmap_table,
const ArrayRef<const uint8_t>& native_gc_map,
const ArrayRef<const uint8_t>& cfi_info,
- const ArrayRef<LinkerPatch>& patches)
+ const ArrayRef<const LinkerPatch>& patches)
: CompiledCode(driver, instruction_set, quick_code, !driver->DedupeEnabled()),
owns_arrays_(!driver->DedupeEnabled()),
frame_size_in_bytes_(frame_size_in_bytes), core_spill_mask_(core_spill_mask),
@@ -142,7 +142,6 @@ CompiledMethod::CompiledMethod(CompilerDriver* driver,
if (src_mapping_table == nullptr) {
src_mapping_table_ = new SwapSrcMap(driver->GetSwapSpaceAllocator());
} else {
- src_mapping_table->Arrange();
src_mapping_table_ = new SwapSrcMap(src_mapping_table->begin(), src_mapping_table->end(),
driver->GetSwapSpaceAllocator());
}
@@ -159,7 +158,7 @@ CompiledMethod::CompiledMethod(CompilerDriver* driver,
} else {
src_mapping_table_ = src_mapping_table == nullptr ?
driver->DeduplicateSrcMappingTable(ArrayRef<SrcMapElem>()) :
- driver->DeduplicateSrcMappingTable(ArrayRef<SrcMapElem>(src_mapping_table->Arrange()));
+ driver->DeduplicateSrcMappingTable(ArrayRef<SrcMapElem>(*src_mapping_table));
mapping_table_ = mapping_table.empty() ?
nullptr : driver->DeduplicateMappingTable(mapping_table);
vmap_table_ = driver->DeduplicateVMapTable(vmap_table);
@@ -180,7 +179,7 @@ CompiledMethod* CompiledMethod::SwapAllocCompiledMethod(
const ArrayRef<const uint8_t>& vmap_table,
const ArrayRef<const uint8_t>& native_gc_map,
const ArrayRef<const uint8_t>& cfi_info,
- const ArrayRef<LinkerPatch>& patches) {
+ const ArrayRef<const LinkerPatch>& patches) {
SwapAllocator<CompiledMethod> alloc(driver->GetSwapSpaceAllocator());
CompiledMethod* ret = alloc.allocate(1);
alloc.construct(ret, driver, instruction_set, quick_code, frame_size_in_bytes, core_spill_mask,
@@ -189,38 +188,6 @@ CompiledMethod* CompiledMethod::SwapAllocCompiledMethod(
return ret;
}
-CompiledMethod* CompiledMethod::SwapAllocCompiledMethodStackMap(
- CompilerDriver* driver,
- InstructionSet instruction_set,
- const ArrayRef<const uint8_t>& quick_code,
- const size_t frame_size_in_bytes,
- const uint32_t core_spill_mask,
- const uint32_t fp_spill_mask,
- const ArrayRef<const uint8_t>& stack_map) {
- SwapAllocator<CompiledMethod> alloc(driver->GetSwapSpaceAllocator());
- CompiledMethod* ret = alloc.allocate(1);
- alloc.construct(ret, driver, instruction_set, quick_code, frame_size_in_bytes, core_spill_mask,
- fp_spill_mask, nullptr, ArrayRef<const uint8_t>(), stack_map,
- ArrayRef<const uint8_t>(), ArrayRef<const uint8_t>(), ArrayRef<LinkerPatch>());
- return ret;
-}
-
-CompiledMethod* CompiledMethod::SwapAllocCompiledMethodCFI(
- CompilerDriver* driver,
- InstructionSet instruction_set,
- const ArrayRef<const uint8_t>& quick_code,
- const size_t frame_size_in_bytes,
- const uint32_t core_spill_mask,
- const uint32_t fp_spill_mask,
- const ArrayRef<const uint8_t>& cfi_info) {
- SwapAllocator<CompiledMethod> alloc(driver->GetSwapSpaceAllocator());
- CompiledMethod* ret = alloc.allocate(1);
- alloc.construct(ret, driver, instruction_set, quick_code, frame_size_in_bytes, core_spill_mask,
- fp_spill_mask, nullptr, ArrayRef<const uint8_t>(),
- ArrayRef<const uint8_t>(), ArrayRef<const uint8_t>(),
- cfi_info, ArrayRef<LinkerPatch>());
- return ret;
-}
void CompiledMethod::ReleaseSwapAllocatedCompiledMethod(CompilerDriver* driver, CompiledMethod* m) {
diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h
index d6a07f6226..480d021db0 100644
--- a/compiler/compiled_method.h
+++ b/compiler/compiled_method.h
@@ -94,20 +94,12 @@ class SrcMapElem {
uint32_t from_;
int32_t to_;
- explicit operator int64_t() const {
- return (static_cast<int64_t>(to_) << 32) | from_;
- }
-
- bool operator<(const SrcMapElem& sme) const {
- return int64_t(*this) < int64_t(sme);
- }
-
- bool operator==(const SrcMapElem& sme) const {
- return int64_t(*this) == int64_t(sme);
- }
-
- explicit operator uint8_t() const {
- return static_cast<uint8_t>(from_ + to_);
+ // Lexicographical compare.
+ bool operator<(const SrcMapElem& other) const {
+ if (from_ != other.from_) {
+ return from_ < other.from_;
+ }
+ return to_ < other.to_;
}
};
@@ -129,49 +121,33 @@ class SrcMap FINAL : public std::vector<SrcMapElem, Allocator> {
SrcMap(InputIt first, InputIt last, const Allocator& alloc)
: std::vector<SrcMapElem, Allocator>(first, last, alloc) {}
- void SortByFrom() {
- std::sort(begin(), end(), [] (const SrcMapElem& lhs, const SrcMapElem& rhs) -> bool {
- return lhs.from_ < rhs.from_;
- });
- }
-
- const_iterator FindByTo(int32_t to) const {
- return std::lower_bound(begin(), end(), SrcMapElem({0, to}));
- }
-
- SrcMap& Arrange() {
+ void push_back(const SrcMapElem& elem) {
if (!empty()) {
- std::sort(begin(), end());
- resize(std::unique(begin(), end()) - begin());
- shrink_to_fit();
+ // Check that the addresses are inserted in sorted order.
+ DCHECK_GE(elem.from_, this->back().from_);
+ // If two consequitive entries map to the same value, ignore the later.
+ // E.g. for map {{0, 1}, {4, 1}, {8, 2}}, all values in [0,8) map to 1.
+ if (elem.to_ == this->back().to_) {
+ return;
+ }
}
- return *this;
+ std::vector<SrcMapElem, Allocator>::push_back(elem);
}
- void DeltaFormat(const SrcMapElem& start, uint32_t highest_pc) {
- // Convert from abs values to deltas.
- if (!empty()) {
- SortByFrom();
-
- // TODO: one PC can be mapped to several Java src lines.
- // do we want such a one-to-many correspondence?
-
- // get rid of the highest values
- size_t i = size() - 1;
- for (; i > 0 ; i--) {
- if ((*this)[i].from_ < highest_pc) {
- break;
- }
- }
- this->resize(i + 1);
-
- for (i = size(); --i >= 1; ) {
- (*this)[i].from_ -= (*this)[i-1].from_;
- (*this)[i].to_ -= (*this)[i-1].to_;
- }
- DCHECK((*this)[0].from_ >= start.from_);
- (*this)[0].from_ -= start.from_;
- (*this)[0].to_ -= start.to_;
+ // Returns true and the corresponding "to" value if the mapping is found.
+ // Oterwise returns false and 0.
+ std::pair<bool, int32_t> Find(uint32_t from) const {
+ // Finds first mapping such that lb.from_ >= from.
+ auto lb = std::lower_bound(begin(), end(), SrcMapElem {from, INT32_MIN});
+ if (lb != end() && lb->from_ == from) {
+ // Found exact match.
+ return std::make_pair(true, lb->to_);
+ } else if (lb != begin()) {
+ // The previous mapping is still in effect.
+ return std::make_pair(true, (--lb)->to_);
+ } else {
+ // Not found because 'from' is smaller than first entry in the map.
+ return std::make_pair(false, 0);
}
}
};
@@ -185,6 +161,7 @@ enum LinkerPatchType {
kLinkerPatchCall,
kLinkerPatchCallRelative, // NOTE: Actual patching is instruction_set-dependent.
kLinkerPatchType,
+ kLinkerPatchDexCacheArray, // NOTE: Actual patching is instruction_set-dependent.
};
class LinkerPatch {
@@ -192,28 +169,44 @@ class LinkerPatch {
static LinkerPatch MethodPatch(size_t literal_offset,
const DexFile* target_dex_file,
uint32_t target_method_idx) {
- return LinkerPatch(literal_offset, kLinkerPatchMethod,
- target_method_idx, target_dex_file);
+ LinkerPatch patch(literal_offset, kLinkerPatchMethod, target_dex_file);
+ patch.method_idx_ = target_method_idx;
+ return patch;
}
static LinkerPatch CodePatch(size_t literal_offset,
const DexFile* target_dex_file,
uint32_t target_method_idx) {
- return LinkerPatch(literal_offset, kLinkerPatchCall,
- target_method_idx, target_dex_file);
+ LinkerPatch patch(literal_offset, kLinkerPatchCall, target_dex_file);
+ patch.method_idx_ = target_method_idx;
+ return patch;
}
static LinkerPatch RelativeCodePatch(size_t literal_offset,
const DexFile* target_dex_file,
uint32_t target_method_idx) {
- return LinkerPatch(literal_offset, kLinkerPatchCallRelative,
- target_method_idx, target_dex_file);
+ LinkerPatch patch(literal_offset, kLinkerPatchCallRelative, target_dex_file);
+ patch.method_idx_ = target_method_idx;
+ return patch;
}
static LinkerPatch TypePatch(size_t literal_offset,
const DexFile* target_dex_file,
uint32_t target_type_idx) {
- return LinkerPatch(literal_offset, kLinkerPatchType, target_type_idx, target_dex_file);
+ LinkerPatch patch(literal_offset, kLinkerPatchType, target_dex_file);
+ patch.type_idx_ = target_type_idx;
+ return patch;
+ }
+
+ static LinkerPatch DexCacheArrayPatch(size_t literal_offset,
+ const DexFile* target_dex_file,
+ uint32_t pc_insn_offset,
+ size_t element_offset) {
+ DCHECK(IsUint<32>(element_offset));
+ LinkerPatch patch(literal_offset, kLinkerPatchDexCacheArray, target_dex_file);
+ patch.pc_insn_offset_ = pc_insn_offset;
+ patch.element_offset_ = element_offset;
+ return patch;
}
LinkerPatch(const LinkerPatch& other) = default;
@@ -227,10 +220,14 @@ class LinkerPatch {
return patch_type_;
}
+ bool IsPcRelative() const {
+ return Type() == kLinkerPatchCallRelative || Type() == kLinkerPatchDexCacheArray;
+ }
+
MethodReference TargetMethod() const {
DCHECK(patch_type_ == kLinkerPatchMethod ||
patch_type_ == kLinkerPatchCall || patch_type_ == kLinkerPatchCallRelative);
- return MethodReference(target_dex_file_, target_idx_);
+ return MethodReference(target_dex_file_, method_idx_);
}
const DexFile* TargetTypeDexFile() const {
@@ -240,22 +237,52 @@ class LinkerPatch {
uint32_t TargetTypeIndex() const {
DCHECK(patch_type_ == kLinkerPatchType);
- return target_idx_;
+ return type_idx_;
+ }
+
+ const DexFile* TargetDexCacheDexFile() const {
+ DCHECK(patch_type_ == kLinkerPatchDexCacheArray);
+ return target_dex_file_;
+ }
+
+ size_t TargetDexCacheElementOffset() const {
+ DCHECK(patch_type_ == kLinkerPatchDexCacheArray);
+ return element_offset_;
+ }
+
+ uint32_t PcInsnOffset() const {
+ DCHECK(patch_type_ == kLinkerPatchDexCacheArray);
+ return pc_insn_offset_;
}
private:
- LinkerPatch(size_t literal_offset, LinkerPatchType patch_type,
- uint32_t target_idx, const DexFile* target_dex_file)
- : literal_offset_(literal_offset),
- patch_type_(patch_type),
- target_idx_(target_idx),
- target_dex_file_(target_dex_file) {
+ LinkerPatch(size_t literal_offset, LinkerPatchType patch_type, const DexFile* target_dex_file)
+ : target_dex_file_(target_dex_file),
+ literal_offset_(literal_offset),
+ patch_type_(patch_type) {
+ cmp1_ = 0u;
+ cmp2_ = 0u;
+ // The compiler rejects methods that are too big, so the compiled code
+ // of a single method really shouln't be anywhere close to 16MiB.
+ DCHECK(IsUint<24>(literal_offset));
}
- size_t literal_offset_;
- LinkerPatchType patch_type_;
- uint32_t target_idx_; // Method index (Call/Method patches) or type index (Type patches).
const DexFile* target_dex_file_;
+ uint32_t literal_offset_ : 24; // Method code size up to 16MiB.
+ LinkerPatchType patch_type_ : 8;
+ union {
+ uint32_t cmp1_; // Used for relational operators.
+ uint32_t method_idx_; // Method index for Call/Method patches.
+ uint32_t type_idx_; // Type index for Type patches.
+ uint32_t element_offset_; // Element offset in the dex cache arrays.
+ };
+ union {
+ uint32_t cmp2_; // Used for relational operators.
+ // Literal offset of the insn loading PC (same as literal_offset if it's the same insn,
+ // may be different if the PC-relative addressing needs multiple insns).
+ uint32_t pc_insn_offset_;
+ static_assert(sizeof(pc_insn_offset_) == sizeof(cmp2_), "needed by relational operators");
+ };
friend bool operator==(const LinkerPatch& lhs, const LinkerPatch& rhs);
friend bool operator<(const LinkerPatch& lhs, const LinkerPatch& rhs);
@@ -264,15 +291,17 @@ class LinkerPatch {
inline bool operator==(const LinkerPatch& lhs, const LinkerPatch& rhs) {
return lhs.literal_offset_ == rhs.literal_offset_ &&
lhs.patch_type_ == rhs.patch_type_ &&
- lhs.target_idx_ == rhs.target_idx_ &&
- lhs.target_dex_file_ == rhs.target_dex_file_;
+ lhs.target_dex_file_ == rhs.target_dex_file_ &&
+ lhs.cmp1_ == rhs.cmp1_ &&
+ lhs.cmp2_ == rhs.cmp2_;
}
inline bool operator<(const LinkerPatch& lhs, const LinkerPatch& rhs) {
return (lhs.literal_offset_ != rhs.literal_offset_) ? lhs.literal_offset_ < rhs.literal_offset_
: (lhs.patch_type_ != rhs.patch_type_) ? lhs.patch_type_ < rhs.patch_type_
- : (lhs.target_idx_ != rhs.target_idx_) ? lhs.target_idx_ < rhs.target_idx_
- : lhs.target_dex_file_ < rhs.target_dex_file_;
+ : (lhs.target_dex_file_ != rhs.target_dex_file_) ? lhs.target_dex_file_ < rhs.target_dex_file_
+ : (lhs.cmp1_ != rhs.cmp1_) ? lhs.cmp1_ < rhs.cmp1_
+ : lhs.cmp2_ < rhs.cmp2_;
}
class CompiledMethod FINAL : public CompiledCode {
@@ -291,7 +320,7 @@ class CompiledMethod FINAL : public CompiledCode {
const ArrayRef<const uint8_t>& vmap_table,
const ArrayRef<const uint8_t>& native_gc_map,
const ArrayRef<const uint8_t>& cfi_info,
- const ArrayRef<LinkerPatch>& patches = ArrayRef<LinkerPatch>());
+ const ArrayRef<const LinkerPatch>& patches);
virtual ~CompiledMethod();
@@ -307,24 +336,7 @@ class CompiledMethod FINAL : public CompiledCode {
const ArrayRef<const uint8_t>& vmap_table,
const ArrayRef<const uint8_t>& native_gc_map,
const ArrayRef<const uint8_t>& cfi_info,
- const ArrayRef<LinkerPatch>& patches = ArrayRef<LinkerPatch>());
-
- static CompiledMethod* SwapAllocCompiledMethodStackMap(
- CompilerDriver* driver,
- InstructionSet instruction_set,
- const ArrayRef<const uint8_t>& quick_code,
- const size_t frame_size_in_bytes,
- const uint32_t core_spill_mask,
- const uint32_t fp_spill_mask,
- const ArrayRef<const uint8_t>& stack_map);
-
- static CompiledMethod* SwapAllocCompiledMethodCFI(CompilerDriver* driver,
- InstructionSet instruction_set,
- const ArrayRef<const uint8_t>& quick_code,
- const size_t frame_size_in_bytes,
- const uint32_t core_spill_mask,
- const uint32_t fp_spill_mask,
- const ArrayRef<const uint8_t>& cfi_info);
+ const ArrayRef<const LinkerPatch>& patches);
static void ReleaseSwapAllocatedCompiledMethod(CompilerDriver* driver, CompiledMethod* m);
@@ -362,8 +374,8 @@ class CompiledMethod FINAL : public CompiledCode {
return cfi_info_;
}
- const SwapVector<LinkerPatch>& GetPatches() const {
- return patches_;
+ ArrayRef<const LinkerPatch> GetPatches() const {
+ return ArrayRef<const LinkerPatch>(patches_);
}
private:
@@ -375,7 +387,7 @@ class CompiledMethod FINAL : public CompiledCode {
const uint32_t core_spill_mask_;
// For quick code, a bit mask describing spilled FPR callee-save registers.
const uint32_t fp_spill_mask_;
- // For quick code, a set of pairs (PC, Line) mapping from native PC offset to Java line
+ // For quick code, a set of pairs (PC, DEX) mapping from native PC offset to DEX offset.
SwapSrcMap* src_mapping_table_;
// For quick code, a uleb128 encoded map from native PC offset to dex PC aswell as dex PC to
// native PC offset. Size prefixed.
@@ -388,7 +400,7 @@ class CompiledMethod FINAL : public CompiledCode {
// For quick code, a FDE entry for the debug_frame section.
SwapVector<uint8_t>* cfi_info_;
// For quick code, linker patches needed by the method.
- SwapVector<LinkerPatch> patches_;
+ const SwapVector<LinkerPatch> patches_;
};
} // namespace art
diff --git a/compiler/compiler.h b/compiler/compiler.h
index 6ec39f9605..a04641e3fa 100644
--- a/compiler/compiler.h
+++ b/compiler/compiler.h
@@ -107,6 +107,9 @@ class Compiler {
return driver_;
}
+ // Whether to produce 64-bit ELF files for 64-bit targets. Leave this off for now.
+ static constexpr bool kProduce64BitELFFiles = false;
+
private:
CompilerDriver* const driver_;
const uint64_t maximum_compilation_time_before_warning_;
diff --git a/compiler/dex/bb_optimizations.h b/compiler/dex/bb_optimizations.h
index 93d83c6fd4..0850f42a9a 100644
--- a/compiler/dex/bb_optimizations.h
+++ b/compiler/dex/bb_optimizations.h
@@ -403,13 +403,6 @@ class SuspendCheckElimination : public PassME {
DCHECK(bb != nullptr);
return c_unit->mir_graph->EliminateSuspendChecks(bb);
}
-
- void End(PassDataHolder* data) const {
- DCHECK(data != nullptr);
- CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
- DCHECK(c_unit != nullptr);
- c_unit->mir_graph->EliminateSuspendChecksEnd();
- }
};
} // namespace art
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h
index 39725dee38..0acdd422df 100644
--- a/compiler/dex/compiler_enums.h
+++ b/compiler/dex/compiler_enums.h
@@ -99,14 +99,16 @@ std::ostream& operator<<(std::ostream& os, const BBType& code);
// Shared pseudo opcodes - must be < 0.
enum LIRPseudoOpcode {
- kPseudoExportedPC = -16,
- kPseudoSafepointPC = -15,
- kPseudoIntrinsicRetry = -14,
- kPseudoSuspendTarget = -13,
- kPseudoThrowTarget = -12,
- kPseudoCaseLabel = -11,
- kPseudoMethodEntry = -10,
- kPseudoMethodExit = -9,
+ kPseudoPrologueBegin = -18,
+ kPseudoPrologueEnd = -17,
+ kPseudoEpilogueBegin = -16,
+ kPseudoEpilogueEnd = -15,
+ kPseudoExportedPC = -14,
+ kPseudoSafepointPC = -13,
+ kPseudoIntrinsicRetry = -12,
+ kPseudoSuspendTarget = -11,
+ kPseudoThrowTarget = -10,
+ kPseudoCaseLabel = -9,
kPseudoBarrier = -8,
kPseudoEntryBlock = -7,
kPseudoExitBlock = -6,
diff --git a/compiler/dex/gvn_dead_code_elimination.cc b/compiler/dex/gvn_dead_code_elimination.cc
index 2d4c18ff49..ec12221f3c 100644
--- a/compiler/dex/gvn_dead_code_elimination.cc
+++ b/compiler/dex/gvn_dead_code_elimination.cc
@@ -1357,7 +1357,6 @@ bool GvnDeadCodeElimination::RecordMIR(MIR* mir) {
default:
LOG(FATAL) << "Unexpected opcode: " << opcode;
UNREACHABLE();
- break;
}
if (mir->ssa_rep->num_defs != 0) {
diff --git a/compiler/dex/local_value_numbering.cc b/compiler/dex/local_value_numbering.cc
index dc222b5211..cdf5e38a9c 100644
--- a/compiler/dex/local_value_numbering.cc
+++ b/compiler/dex/local_value_numbering.cc
@@ -166,9 +166,9 @@ class LocalValueNumbering::AliasingArrayVersions {
return gvn->LookupValue(kAliasingArrayOp, type, location, memory_version);
}
- static uint16_t LookupMergeValue(GlobalValueNumbering* gvn ATTRIBUTE_UNUSED,
+ static uint16_t LookupMergeValue(GlobalValueNumbering* gvn,
const LocalValueNumbering* lvn,
- uint16_t type ATTRIBUTE_UNUSED, uint16_t location) {
+ uint16_t type, uint16_t location) {
// If the location is non-aliasing in lvn, use the non-aliasing value.
uint16_t array = gvn->GetArrayLocationBase(location);
if (lvn->IsNonAliasingArray(array, type)) {
@@ -182,8 +182,6 @@ class LocalValueNumbering::AliasingArrayVersions {
static bool HasNewBaseVersion(GlobalValueNumbering* gvn ATTRIBUTE_UNUSED,
const LocalValueNumbering* lvn,
uint16_t type ATTRIBUTE_UNUSED) {
- UNUSED(gvn);
- UNUSED(type);
return lvn->global_memory_version_ == lvn->merge_new_memory_version_;
}
diff --git a/compiler/dex/mir_dataflow.cc b/compiler/dex/mir_dataflow.cc
index f638b0bf4d..2a920a4e29 100644
--- a/compiler/dex/mir_dataflow.cc
+++ b/compiler/dex/mir_dataflow.cc
@@ -1396,6 +1396,13 @@ void MIRGraph::CompilerInitializeSSAConversion() {
InitializeBasicBlockDataFlow();
}
+uint32_t MIRGraph::GetUseCountWeight(BasicBlock* bb) const {
+ // Each level of nesting adds *100 to count, up to 3 levels deep.
+ uint32_t depth = std::min(3U, static_cast<uint32_t>(bb->nesting_depth));
+ uint32_t weight = std::max(1U, depth * 100);
+ return weight;
+}
+
/*
* Count uses, weighting by loop nesting depth. This code only
* counts explicitly used s_regs. A later phase will add implicit
@@ -1405,9 +1412,7 @@ void MIRGraph::CountUses(BasicBlock* bb) {
if (bb->block_type != kDalvikByteCode) {
return;
}
- // Each level of nesting adds *100 to count, up to 3 levels deep.
- uint32_t depth = std::min(3U, static_cast<uint32_t>(bb->nesting_depth));
- uint32_t weight = std::max(1U, depth * 100);
+ uint32_t weight = GetUseCountWeight(bb);
for (MIR* mir = bb->first_mir_insn; (mir != NULL); mir = mir->next) {
if (mir->ssa_rep == NULL) {
continue;
@@ -1417,23 +1422,6 @@ void MIRGraph::CountUses(BasicBlock* bb) {
raw_use_counts_[s_reg] += 1u;
use_counts_[s_reg] += weight;
}
- if (!(cu_->disable_opt & (1 << kPromoteCompilerTemps))) {
- uint64_t df_attributes = GetDataFlowAttributes(mir);
- // Implicit use of Method* ? */
- if (df_attributes & DF_UMS) {
- /*
- * Some invokes will not use Method* - need to perform test similar
- * to that found in GenInvoke() to decide whether to count refs
- * for Method* on invoke-class opcodes. This is a relatively expensive
- * operation, so should only be done once.
- * TODO: refactor InvokeUsesMethodStar() to perform check at parse time,
- * and save results for both here and GenInvoke. For now, go ahead
- * and assume all invokes use method*.
- */
- raw_use_counts_[method_sreg_] += 1u;
- use_counts_[method_sreg_] += weight;
- }
- }
}
}
diff --git a/compiler/dex/mir_field_info.cc b/compiler/dex/mir_field_info.cc
index d2079a254d..a9ab3bb0d4 100644
--- a/compiler/dex/mir_field_info.cc
+++ b/compiler/dex/mir_field_info.cc
@@ -19,6 +19,7 @@
#include <string.h>
#include "base/logging.h"
+#include "dex/verified_method.h"
#include "driver/compiler_driver.h"
#include "driver/compiler_driver-inl.h"
#include "mirror/class_loader.h" // Only to allow casts in Handle<ClassLoader>.
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index 3103f96e4e..4d340387f2 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -688,7 +688,7 @@ BasicBlock* MIRGraph::ProcessCanThrow(BasicBlock* cur_block, MIR* insn, DexOffse
/* Parse a Dex method and insert it into the MIRGraph at the current insert point. */
void MIRGraph::InlineMethod(const DexFile::CodeItem* code_item, uint32_t access_flags,
- InvokeType invoke_type, uint16_t class_def_idx,
+ InvokeType invoke_type ATTRIBUTE_UNUSED, uint16_t class_def_idx,
uint32_t method_idx, jobject class_loader, const DexFile& dex_file) {
current_code_item_ = code_item;
method_stack_.push_back(std::make_pair(current_method_, current_offset_));
@@ -726,13 +726,6 @@ void MIRGraph::InlineMethod(const DexFile::CodeItem* code_item, uint32_t access_
null_block->hidden = true;
entry_block_ = CreateNewBB(kEntryBlock);
exit_block_ = CreateNewBB(kExitBlock);
- // TODO: deprecate all "cu->" fields; move what's left to wherever CompilationUnit is allocated.
- cu_->dex_file = &dex_file;
- cu_->class_def_idx = class_def_idx;
- cu_->method_idx = method_idx;
- cu_->access_flags = access_flags;
- cu_->invoke_type = invoke_type;
- cu_->shorty = dex_file.GetMethodShorty(dex_file.GetMethodId(method_idx));
} else {
UNIMPLEMENTED(FATAL) << "Nested inlining not implemented.";
/*
@@ -1616,8 +1609,8 @@ void MIRGraph::ReplaceSpecialChars(std::string& str) {
}
std::string MIRGraph::GetSSAName(int ssa_reg) {
- // TODO: This value is needed for LLVM and debugging. Currently, we compute this and then copy to
- // the arena. We should be smarter and just place straight into the arena, or compute the
+ // TODO: This value is needed for debugging. Currently, we compute this and then copy to the
+ // arena. We should be smarter and just place straight into the arena, or compute the
// value more lazily.
int vreg = SRegToVReg(ssa_reg);
if (vreg >= static_cast<int>(GetFirstTempVR())) {
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index 3298af1162..85b13448da 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -960,6 +960,12 @@ class MIRGraph {
*/
CompilerTemp* GetNewCompilerTemp(CompilerTempType ct_type, bool wide);
+ /**
+ * @brief Used to remove last created compiler temporary when it's not needed.
+ * @param temp the temporary to remove.
+ */
+ void RemoveLastCompilerTemp(CompilerTempType ct_type, bool wide, CompilerTemp* temp);
+
bool MethodIsLeaf() {
return attributes_ & METHOD_IS_LEAF;
}
@@ -1079,7 +1085,6 @@ class MIRGraph {
void EliminateDeadCodeEnd();
bool EliminateSuspendChecksGate();
bool EliminateSuspendChecks(BasicBlock* bb);
- void EliminateSuspendChecksEnd();
uint16_t GetGvnIFieldId(MIR* mir) const {
DCHECK(IsInstructionIGetOrIPut(mir->dalvikInsn.opcode));
@@ -1185,6 +1190,12 @@ class MIRGraph {
void DoConstantPropagation(BasicBlock* bb);
/**
+ * @brief Get use count weight for a given block.
+ * @param bb the BasicBlock.
+ */
+ uint32_t GetUseCountWeight(BasicBlock* bb) const;
+
+ /**
* @brief Count the uses in the BasicBlock
* @param bb the BasicBlock
*/
@@ -1396,10 +1407,6 @@ class MIRGraph {
uint16_t* sfield_ids; // Ditto.
GvnDeadCodeElimination* dce;
} gvn;
- // Suspend check elimination.
- struct {
- DexFileMethodInliner* inliner;
- } sce;
} temp_;
static const int kInvalidEntry = -1;
ArenaVector<BasicBlock*> block_list_;
@@ -1451,6 +1458,7 @@ class MIRGraph {
friend class GvnDeadCodeEliminationTest;
friend class LocalValueNumberingTest;
friend class TopologicalSortOrderTest;
+ friend class QuickCFITest;
};
} // namespace art
diff --git a/compiler/dex/mir_method_info.cc b/compiler/dex/mir_method_info.cc
index 34fb1bf0e0..0c84b82edd 100644
--- a/compiler/dex/mir_method_info.cc
+++ b/compiler/dex/mir_method_info.cc
@@ -16,9 +16,13 @@
# include "mir_method_info.h"
+#include "dex/quick/dex_file_method_inliner.h"
+#include "dex/quick/dex_file_to_method_inliner_map.h"
+#include "dex/verified_method.h"
#include "driver/compiler_driver.h"
#include "driver/dex_compilation_unit.h"
#include "driver/compiler_driver-inl.h"
+#include "driver/compiler_options.h"
#include "mirror/class_loader.h" // Only to allow casts in Handle<ClassLoader>.
#include "mirror/dex_cache.h" // Only to allow casts in Handle<DexCache>.
#include "scoped_thread_state_change.h"
@@ -62,6 +66,9 @@ void MirMethodLoweringInfo::Resolve(CompilerDriver* compiler_driver,
const DexFile* const dex_file = mUnit->GetDexFile();
const bool use_jit = runtime->UseJit();
const VerifiedMethod* const verified_method = mUnit->GetVerifiedMethod();
+ DexFileToMethodInlinerMap* inliner_map = compiler_driver->GetMethodInlinerMap();
+ DexFileMethodInliner* default_inliner =
+ (inliner_map != nullptr) ? inliner_map->GetMethodInliner(dex_file) : nullptr;
for (auto it = method_infos, end = method_infos + count; it != end; ++it) {
// For quickened invokes, the dex method idx is actually the mir offset.
@@ -120,6 +127,7 @@ void MirMethodLoweringInfo::Resolve(CompilerDriver* compiler_driver,
if (UNLIKELY(resolved_method == nullptr)) {
continue;
}
+
compiler_driver->GetResolvedMethodDexFileLocation(resolved_method,
&it->declaring_dex_file_, &it->declaring_class_idx_, &it->declaring_method_idx_);
if (!it->IsQuickened()) {
@@ -131,6 +139,7 @@ void MirMethodLoweringInfo::Resolve(CompilerDriver* compiler_driver,
it->vtable_idx_ =
compiler_driver->GetResolvedMethodVTableIndex(resolved_method, invoke_type);
}
+
MethodReference target_method(it->target_dex_file_, it->target_method_idx_);
int fast_path_flags = compiler_driver->IsFastInvoke(
soa, current_dex_cache, class_loader, mUnit, referrer_class.Get(), resolved_method,
@@ -138,10 +147,23 @@ void MirMethodLoweringInfo::Resolve(CompilerDriver* compiler_driver,
const bool is_referrers_class = referrer_class.Get() == resolved_method->GetDeclaringClass();
const bool is_class_initialized =
compiler_driver->IsMethodsClassInitialized(referrer_class.Get(), resolved_method);
+
+ // Check if the target method is intrinsic or special.
+ InlineMethodFlags is_intrinsic_or_special = kNoInlineMethodFlags;
+ if (inliner_map != nullptr) {
+ auto* inliner = (target_method.dex_file == dex_file)
+ ? default_inliner
+ : inliner_map->GetMethodInliner(target_method.dex_file);
+ is_intrinsic_or_special = inliner->IsIntrinsicOrSpecial(target_method.dex_method_index);
+ }
+
uint16_t other_flags = it->flags_ &
- ~(kFlagFastPath | kFlagClassIsInitialized | (kInvokeTypeMask << kBitSharpTypeBegin));
+ ~(kFlagFastPath | kFlagIsIntrinsic | kFlagIsSpecial | kFlagClassIsInitialized |
+ (kInvokeTypeMask << kBitSharpTypeBegin));
it->flags_ = other_flags |
(fast_path_flags != 0 ? kFlagFastPath : 0u) |
+ ((is_intrinsic_or_special & kInlineIntrinsic) != 0 ? kFlagIsIntrinsic : 0u) |
+ ((is_intrinsic_or_special & kInlineSpecial) != 0 ? kFlagIsSpecial : 0u) |
(static_cast<uint16_t>(invoke_type) << kBitSharpTypeBegin) |
(is_referrers_class ? kFlagIsReferrersClass : 0u) |
(is_class_initialized ? kFlagClassIsInitialized : 0u);
diff --git a/compiler/dex/mir_method_info.h b/compiler/dex/mir_method_info.h
index e131c96a81..7230c462cd 100644
--- a/compiler/dex/mir_method_info.h
+++ b/compiler/dex/mir_method_info.h
@@ -127,6 +127,14 @@ class MirMethodLoweringInfo : public MirMethodInfo {
return (flags_ & kFlagFastPath) != 0u;
}
+ bool IsIntrinsic() const {
+ return (flags_ & kFlagIsIntrinsic) != 0u;
+ }
+
+ bool IsSpecial() const {
+ return (flags_ & kFlagIsSpecial) != 0u;
+ }
+
bool IsReferrersClass() const {
return (flags_ & kFlagIsReferrersClass) != 0;
}
@@ -188,9 +196,11 @@ class MirMethodLoweringInfo : public MirMethodInfo {
private:
enum {
kBitFastPath = kMethodInfoBitEnd,
+ kBitIsIntrinsic,
+ kBitIsSpecial,
kBitInvokeTypeBegin,
kBitInvokeTypeEnd = kBitInvokeTypeBegin + 3, // 3 bits for invoke type.
- kBitSharpTypeBegin,
+ kBitSharpTypeBegin = kBitInvokeTypeEnd,
kBitSharpTypeEnd = kBitSharpTypeBegin + 3, // 3 bits for sharp type.
kBitIsReferrersClass = kBitSharpTypeEnd,
kBitClassIsInitialized,
@@ -199,6 +209,8 @@ class MirMethodLoweringInfo : public MirMethodInfo {
};
static_assert(kMethodLoweringInfoBitEnd <= 16, "Too many flags");
static constexpr uint16_t kFlagFastPath = 1u << kBitFastPath;
+ static constexpr uint16_t kFlagIsIntrinsic = 1u << kBitIsIntrinsic;
+ static constexpr uint16_t kFlagIsSpecial = 1u << kBitIsSpecial;
static constexpr uint16_t kFlagIsReferrersClass = 1u << kBitIsReferrersClass;
static constexpr uint16_t kFlagClassIsInitialized = 1u << kBitClassIsInitialized;
static constexpr uint16_t kFlagQuickened = 1u << kBitQuickened;
diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc
index c85c3b6f21..9d7b4b4dfd 100644
--- a/compiler/dex/mir_optimization.cc
+++ b/compiler/dex/mir_optimization.cc
@@ -318,9 +318,11 @@ CompilerTemp* MIRGraph::GetNewCompilerTemp(CompilerTempType ct_type, bool wide)
// Since VR temps cannot be requested once the BE temps are requested, we
// allow reservation of VR temps as well for BE. We
size_t available_temps = reserved_temps_for_backend_ + GetNumAvailableVRTemps();
- if (available_temps <= 0 || (available_temps <= 1 && wide)) {
+ size_t needed_temps = wide ? 2u : 1u;
+ if (available_temps < needed_temps) {
if (verbose) {
- LOG(INFO) << "CompilerTemps: Not enough temp(s) of type " << ct_type_str << " are available.";
+ LOG(INFO) << "CompilerTemps: Not enough temp(s) of type " << ct_type_str
+ << " are available.";
}
return nullptr;
}
@@ -328,12 +330,8 @@ CompilerTemp* MIRGraph::GetNewCompilerTemp(CompilerTempType ct_type, bool wide)
// Update the remaining reserved temps since we have now used them.
// Note that the code below is actually subtracting to remove them from reserve
// once they have been claimed. It is careful to not go below zero.
- if (reserved_temps_for_backend_ >= 1) {
- reserved_temps_for_backend_--;
- }
- if (wide && reserved_temps_for_backend_ >= 1) {
- reserved_temps_for_backend_--;
- }
+ reserved_temps_for_backend_ =
+ std::max(reserved_temps_for_backend_, needed_temps) - needed_temps;
// The new non-special compiler temp must receive a unique v_reg.
compiler_temp->v_reg = GetFirstNonSpecialTempVR() + num_non_special_compiler_temps_;
@@ -407,6 +405,36 @@ CompilerTemp* MIRGraph::GetNewCompilerTemp(CompilerTempType ct_type, bool wide)
return compiler_temp;
}
+void MIRGraph::RemoveLastCompilerTemp(CompilerTempType ct_type, bool wide, CompilerTemp* temp) {
+ // Once the compiler temps have been committed, it's too late for any modifications.
+ DCHECK_EQ(compiler_temps_committed_, false);
+
+ size_t used_temps = wide ? 2u : 1u;
+
+ if (ct_type == kCompilerTempBackend) {
+ DCHECK(requested_backend_temp_);
+
+ // Make the temps available to backend again.
+ reserved_temps_for_backend_ += used_temps;
+ } else if (ct_type == kCompilerTempVR) {
+ DCHECK(!requested_backend_temp_);
+ } else {
+ UNIMPLEMENTED(FATAL) << "No handling for compiler temp type " << static_cast<int>(ct_type);
+ }
+
+ // Reduce the number of non-special compiler temps.
+ DCHECK_LE(used_temps, num_non_special_compiler_temps_);
+ num_non_special_compiler_temps_ -= used_temps;
+
+ // Check that this was really the last temp.
+ DCHECK_EQ(static_cast<size_t>(temp->v_reg),
+ GetFirstNonSpecialTempVR() + num_non_special_compiler_temps_);
+
+ if (cu_->verbose) {
+ LOG(INFO) << "Last temporary has been removed.";
+ }
+}
+
static bool EvaluateBranch(Instruction::Code opcode, int32_t src1, int32_t src2) {
bool is_taken;
switch (opcode) {
@@ -1489,7 +1517,7 @@ void MIRGraph::InlineSpecialMethods(BasicBlock* bb) {
continue;
}
const MirMethodLoweringInfo& method_info = GetMethodLoweringInfo(mir);
- if (!method_info.FastPath()) {
+ if (!method_info.FastPath() || !method_info.IsSpecial()) {
continue;
}
@@ -1631,10 +1659,6 @@ bool MIRGraph::EliminateSuspendChecksGate() {
!HasInvokes()) { // No invokes to actually eliminate any suspend checks.
return false;
}
- if (cu_->compiler_driver != nullptr && cu_->compiler_driver->GetMethodInlinerMap() != nullptr) {
- temp_.sce.inliner =
- cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(cu_->dex_file);
- }
suspend_checks_in_loops_ = arena_->AllocArray<uint32_t>(GetNumBlocks(), kArenaAllocMisc);
return true;
}
@@ -1652,9 +1676,9 @@ bool MIRGraph::EliminateSuspendChecks(BasicBlock* bb) {
uint32_t suspend_checks_in_loops = (1u << bb->nesting_depth) - 1u; // Start with all loop heads.
bool found_invoke = false;
for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
- if (IsInstructionInvoke(mir->dalvikInsn.opcode) &&
- (temp_.sce.inliner == nullptr ||
- !temp_.sce.inliner->IsIntrinsic(mir->dalvikInsn.vB, nullptr))) {
+ if ((IsInstructionInvoke(mir->dalvikInsn.opcode) ||
+ IsInstructionQuickInvoke(mir->dalvikInsn.opcode)) &&
+ !GetMethodLoweringInfo(mir).IsIntrinsic()) {
// Non-intrinsic invoke, rely on a suspend point in the invoked method.
found_invoke = true;
break;
@@ -1717,10 +1741,6 @@ bool MIRGraph::EliminateSuspendChecks(BasicBlock* bb) {
return true;
}
-void MIRGraph::EliminateSuspendChecksEnd() {
- temp_.sce.inliner = nullptr;
-}
-
bool MIRGraph::CanThrow(MIR* mir) const {
if ((mir->dalvikInsn.FlagsOf() & Instruction::kThrow) == 0) {
return false;
diff --git a/compiler/dex/mir_optimization_test.cc b/compiler/dex/mir_optimization_test.cc
index 9ce5ebbc1b..10a4337cf5 100644
--- a/compiler/dex/mir_optimization_test.cc
+++ b/compiler/dex/mir_optimization_test.cc
@@ -474,7 +474,6 @@ class SuspendCheckEliminationTest : public MirOptimizationTest {
for (BasicBlock* bb = iterator.Next(change); bb != nullptr; bb = iterator.Next(change)) {
change = cu_.mir_graph->EliminateSuspendChecks(bb);
}
- cu_.mir_graph->EliminateSuspendChecksEnd();
}
SuspendCheckEliminationTest()
diff --git a/compiler/dex/quick/arm/assemble_arm.cc b/compiler/dex/quick/arm/assemble_arm.cc
index 3e69878846..c5ac4c1508 100644
--- a/compiler/dex/quick/arm/assemble_arm.cc
+++ b/compiler/dex/quick/arm/assemble_arm.cc
@@ -1083,7 +1083,9 @@ void ArmMir2Lir::InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir) {
#define PADDING_MOV_R5_R5 0x1C2D
uint8_t* ArmMir2Lir::EncodeLIRs(uint8_t* write_pos, LIR* lir) {
+ uint8_t* const write_buffer = write_pos;
for (; lir != NULL; lir = NEXT_LIR(lir)) {
+ lir->offset = (write_pos - write_buffer);
if (!lir->flags.is_nop) {
int opcode = lir->opcode;
if (IsPseudoLirOp(opcode)) {
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index d46c25a8da..3d18af6169 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -23,11 +23,13 @@
#include "dex/mir_graph.h"
#include "dex/quick/mir_to_lir-inl.h"
#include "driver/compiler_driver.h"
+#include "driver/compiler_options.h"
#include "gc/accounting/card_table.h"
#include "mirror/art_method.h"
#include "mirror/object_array-inl.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "utils.h"
+#include "utils/dex_cache_arrays_layout-inl.h"
namespace art {
@@ -353,7 +355,16 @@ void ArmMir2Lir::UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) {
FreeTemp(reg_card_no);
}
+static dwarf::Reg DwarfCoreReg(int num) {
+ return dwarf::Reg::ArmCore(num);
+}
+
+static dwarf::Reg DwarfFpReg(int num) {
+ return dwarf::Reg::ArmFp(num);
+}
+
void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
+ DCHECK_EQ(cfi_.GetCurrentCFAOffset(), 0); // empty stack.
int spill_count = num_core_spills_ + num_fp_spills_;
/*
* On entry, r0, r1, r2 & r3 are live. Let the register allocation
@@ -371,7 +382,6 @@ void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
* a leaf *and* our frame size < fudge factor.
*/
bool skip_overflow_check = mir_graph_->MethodIsLeaf() && !FrameNeedsStackCheck(frame_size_, kArm);
- NewLIR0(kPseudoMethodEntry);
const size_t kStackOverflowReservedUsableBytes = GetStackOverflowReservedBytes(kArm);
bool large_frame = (static_cast<size_t>(frame_size_) > kStackOverflowReservedUsableBytes);
bool generate_explicit_stack_overflow_check = large_frame ||
@@ -402,28 +412,32 @@ void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
}
}
/* Spill core callee saves */
- if (core_spill_mask_ == 0u) {
- // Nothing to spill.
- } else if ((core_spill_mask_ & ~(0xffu | (1u << rs_rARM_LR.GetRegNum()))) == 0u) {
- // Spilling only low regs and/or LR, use 16-bit PUSH.
- constexpr int lr_bit_shift = rs_rARM_LR.GetRegNum() - 8;
- NewLIR1(kThumbPush,
- (core_spill_mask_ & ~(1u << rs_rARM_LR.GetRegNum())) |
- ((core_spill_mask_ & (1u << rs_rARM_LR.GetRegNum())) >> lr_bit_shift));
- } else if (IsPowerOfTwo(core_spill_mask_)) {
- // kThumb2Push cannot be used to spill a single register.
- NewLIR1(kThumb2Push1, CTZ(core_spill_mask_));
- } else {
- NewLIR1(kThumb2Push, core_spill_mask_);
+ if (core_spill_mask_ != 0u) {
+ if ((core_spill_mask_ & ~(0xffu | (1u << rs_rARM_LR.GetRegNum()))) == 0u) {
+ // Spilling only low regs and/or LR, use 16-bit PUSH.
+ constexpr int lr_bit_shift = rs_rARM_LR.GetRegNum() - 8;
+ NewLIR1(kThumbPush,
+ (core_spill_mask_ & ~(1u << rs_rARM_LR.GetRegNum())) |
+ ((core_spill_mask_ & (1u << rs_rARM_LR.GetRegNum())) >> lr_bit_shift));
+ } else if (IsPowerOfTwo(core_spill_mask_)) {
+ // kThumb2Push cannot be used to spill a single register.
+ NewLIR1(kThumb2Push1, CTZ(core_spill_mask_));
+ } else {
+ NewLIR1(kThumb2Push, core_spill_mask_);
+ }
+ cfi_.AdjustCFAOffset(num_core_spills_ * kArmPointerSize);
+ cfi_.RelOffsetForMany(DwarfCoreReg(0), 0, core_spill_mask_, kArmPointerSize);
}
/* Need to spill any FP regs? */
- if (num_fp_spills_) {
+ if (num_fp_spills_ != 0u) {
/*
* NOTE: fp spills are a little different from core spills in that
* they are pushed as a contiguous block. When promoting from
* the fp set, we must allocate all singles from s16..highest-promoted
*/
NewLIR1(kThumb2VPushCS, num_fp_spills_);
+ cfi_.AdjustCFAOffset(num_fp_spills_ * kArmPointerSize);
+ cfi_.RelOffsetForMany(DwarfFpReg(0), 0, fp_spill_mask_, kArmPointerSize);
}
const int spill_size = spill_count * 4;
@@ -444,12 +458,14 @@ void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
m2l_->LoadWordDisp(rs_rARM_SP, sp_displace_ - 4, rs_rARM_LR);
}
m2l_->OpRegImm(kOpAdd, rs_rARM_SP, sp_displace_);
+ m2l_->cfi().AdjustCFAOffset(-sp_displace_);
m2l_->ClobberCallerSave();
ThreadOffset<4> func_offset = QUICK_ENTRYPOINT_OFFSET(4, pThrowStackOverflow);
// Load the entrypoint directly into the pc instead of doing a load + branch. Assumes
// codegen and target are in thumb2 mode.
// NOTE: native pointer.
m2l_->LoadWordDisp(rs_rARM_SELF, func_offset.Int32Value(), rs_rARM_PC);
+ m2l_->cfi().AdjustCFAOffset(sp_displace_);
}
private:
@@ -464,6 +480,7 @@ void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
// Need to restore LR since we used it as a temp.
AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, true, spill_size));
OpRegCopy(rs_rARM_SP, rs_rARM_LR); // Establish stack
+ cfi_.AdjustCFAOffset(frame_size_without_spills);
} else {
/*
* If the frame is small enough we are guaranteed to have enough space that remains to
@@ -474,6 +491,7 @@ void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
MarkTemp(rs_rARM_LR);
FreeTemp(rs_rARM_LR);
OpRegRegImm(kOpSub, rs_rARM_SP, rs_rARM_SP, frame_size_without_spills);
+ cfi_.AdjustCFAOffset(frame_size_without_spills);
Clobber(rs_rARM_LR);
UnmarkTemp(rs_rARM_LR);
LIR* branch = OpCmpBranch(kCondUlt, rs_rARM_SP, rs_r12, nullptr);
@@ -483,13 +501,23 @@ void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
// Implicit stack overflow check has already been done. Just make room on the
// stack for the frame now.
OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills);
+ cfi_.AdjustCFAOffset(frame_size_without_spills);
}
} else {
OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills);
+ cfi_.AdjustCFAOffset(frame_size_without_spills);
}
FlushIns(ArgLocs, rl_method);
+ // We can promote a PC-relative reference to dex cache arrays to a register
+ // if it's used at least twice. Without investigating where we should lazily
+ // load the reference, we conveniently load it after flushing inputs.
+ if (dex_cache_arrays_base_reg_.Valid()) {
+ OpPcRelDexCacheArrayAddr(cu_->dex_file, dex_cache_arrays_min_offset_,
+ dex_cache_arrays_base_reg_);
+ }
+
FreeTemp(rs_r0);
FreeTemp(rs_r1);
FreeTemp(rs_r2);
@@ -498,7 +526,9 @@ void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
}
void ArmMir2Lir::GenExitSequence() {
+ cfi_.RememberState();
int spill_count = num_core_spills_ + num_fp_spills_;
+
/*
* In the exit path, r0/r1 are live - make sure they aren't
* allocated by the register utilities as temps.
@@ -506,35 +536,47 @@ void ArmMir2Lir::GenExitSequence() {
LockTemp(rs_r0);
LockTemp(rs_r1);
- NewLIR0(kPseudoMethodExit);
- OpRegImm(kOpAdd, rs_rARM_SP, frame_size_ - (spill_count * 4));
+ int adjust = frame_size_ - (spill_count * kArmPointerSize);
+ OpRegImm(kOpAdd, rs_rARM_SP, adjust);
+ cfi_.AdjustCFAOffset(-adjust);
/* Need to restore any FP callee saves? */
if (num_fp_spills_) {
NewLIR1(kThumb2VPopCS, num_fp_spills_);
+ cfi_.AdjustCFAOffset(-num_fp_spills_ * kArmPointerSize);
+ cfi_.RestoreMany(DwarfFpReg(0), fp_spill_mask_);
}
- if ((core_spill_mask_ & (1 << rs_rARM_LR.GetRegNum())) != 0) {
- /* Unspill rARM_LR to rARM_PC */
+ bool unspill_LR_to_PC = (core_spill_mask_ & (1 << rs_rARM_LR.GetRegNum())) != 0;
+ if (unspill_LR_to_PC) {
core_spill_mask_ &= ~(1 << rs_rARM_LR.GetRegNum());
core_spill_mask_ |= (1 << rs_rARM_PC.GetRegNum());
}
- if (core_spill_mask_ == 0u) {
- // Nothing to unspill.
- } else if ((core_spill_mask_ & ~(0xffu | (1u << rs_rARM_PC.GetRegNum()))) == 0u) {
- // Unspilling only low regs and/or PC, use 16-bit POP.
- constexpr int pc_bit_shift = rs_rARM_PC.GetRegNum() - 8;
- NewLIR1(kThumbPop,
- (core_spill_mask_ & ~(1u << rs_rARM_PC.GetRegNum())) |
- ((core_spill_mask_ & (1u << rs_rARM_PC.GetRegNum())) >> pc_bit_shift));
- } else if (IsPowerOfTwo(core_spill_mask_)) {
- // kThumb2Pop cannot be used to unspill a single register.
- NewLIR1(kThumb2Pop1, CTZ(core_spill_mask_));
- } else {
- NewLIR1(kThumb2Pop, core_spill_mask_);
+ if (core_spill_mask_ != 0u) {
+ if ((core_spill_mask_ & ~(0xffu | (1u << rs_rARM_PC.GetRegNum()))) == 0u) {
+ // Unspilling only low regs and/or PC, use 16-bit POP.
+ constexpr int pc_bit_shift = rs_rARM_PC.GetRegNum() - 8;
+ NewLIR1(kThumbPop,
+ (core_spill_mask_ & ~(1u << rs_rARM_PC.GetRegNum())) |
+ ((core_spill_mask_ & (1u << rs_rARM_PC.GetRegNum())) >> pc_bit_shift));
+ } else if (IsPowerOfTwo(core_spill_mask_)) {
+ // kThumb2Pop cannot be used to unspill a single register.
+ NewLIR1(kThumb2Pop1, CTZ(core_spill_mask_));
+ } else {
+ NewLIR1(kThumb2Pop, core_spill_mask_);
+ }
+ // If we pop to PC, there is no further epilogue code.
+ if (!unspill_LR_to_PC) {
+ cfi_.AdjustCFAOffset(-num_core_spills_ * kArmPointerSize);
+ cfi_.RestoreMany(DwarfCoreReg(0), core_spill_mask_);
+ DCHECK_EQ(cfi_.GetCurrentCFAOffset(), 0); // empty stack.
+ }
}
- if ((core_spill_mask_ & (1 << rs_rARM_PC.GetRegNum())) == 0) {
+ if (!unspill_LR_to_PC) {
/* We didn't pop to rARM_PC, so must do a bv rARM_LR */
NewLIR1(kThumbBx, rs_rARM_LR.GetReg());
}
+ // The CFI should be restored for any code that follows the exit block.
+ cfi_.RestoreState();
+ cfi_.DefCFAOffset(frame_size_);
}
void ArmMir2Lir::GenSpecialExitSequence() {
@@ -556,11 +598,16 @@ void ArmMir2Lir::GenSpecialEntryForSuspend() {
NewLIR1(kThumbPush, (1u << rs_r0.GetRegNum()) | // ArtMethod*
(core_spill_mask_ & ~(1u << rs_rARM_LR.GetRegNum())) | // Spills other than LR.
(1u << 8)); // LR encoded for 16-bit push.
+ cfi_.AdjustCFAOffset(frame_size_);
+ // Do not generate CFI for scratch register r0.
+ cfi_.RelOffsetForMany(DwarfCoreReg(0), 4, core_spill_mask_, kArmPointerSize);
}
void ArmMir2Lir::GenSpecialExitForSuspend() {
// Pop the frame. (ArtMethod* no longer needed but restore it anyway.)
NewLIR1(kThumb2Pop, (1u << rs_r0.GetRegNum()) | core_spill_mask_); // 32-bit because of LR.
+ cfi_.AdjustCFAOffset(-frame_size_);
+ cfi_.RestoreMany(DwarfCoreReg(0), core_spill_mask_);
}
static bool ArmUseRelativeCall(CompilationUnit* cu, const MethodReference& target_method) {
@@ -572,12 +619,12 @@ static bool ArmUseRelativeCall(CompilationUnit* cu, const MethodReference& targe
* Bit of a hack here - in the absence of a real scheduling pass,
* emit the next instruction in static & direct invoke sequences.
*/
-static int ArmNextSDCallInsn(CompilationUnit* cu, CallInfo* info ATTRIBUTE_UNUSED,
- int state, const MethodReference& target_method,
- uint32_t unused_idx ATTRIBUTE_UNUSED,
- uintptr_t direct_code, uintptr_t direct_method,
- InvokeType type) {
- Mir2Lir* cg = static_cast<Mir2Lir*>(cu->cg.get());
+int ArmMir2Lir::ArmNextSDCallInsn(CompilationUnit* cu, CallInfo* info ATTRIBUTE_UNUSED,
+ int state, const MethodReference& target_method,
+ uint32_t unused_idx ATTRIBUTE_UNUSED,
+ uintptr_t direct_code, uintptr_t direct_method,
+ InvokeType type) {
+ ArmMir2Lir* cg = static_cast<ArmMir2Lir*>(cu->cg.get());
if (direct_code != 0 && direct_method != 0) {
switch (state) {
case 0: // Get the current Method* [sets kArg0]
@@ -598,17 +645,24 @@ static int ArmNextSDCallInsn(CompilationUnit* cu, CallInfo* info ATTRIBUTE_UNUSE
return -1;
}
} else {
+ bool use_pc_rel = cg->CanUseOpPcRelDexCacheArrayLoad();
RegStorage arg0_ref = cg->TargetReg(kArg0, kRef);
switch (state) {
case 0: // Get the current Method* [sets kArg0]
// TUNING: we can save a reg copy if Method* has been promoted.
- cg->LoadCurrMethodDirect(arg0_ref);
- break;
+ if (!use_pc_rel) {
+ cg->LoadCurrMethodDirect(arg0_ref);
+ break;
+ }
+ ++state;
+ FALLTHROUGH_INTENDED;
case 1: // Get method->dex_cache_resolved_methods_
- cg->LoadRefDisp(arg0_ref,
- mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value(),
- arg0_ref,
- kNotVolatile);
+ if (!use_pc_rel) {
+ cg->LoadRefDisp(arg0_ref,
+ mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value(),
+ arg0_ref,
+ kNotVolatile);
+ }
// Set up direct code if known.
if (direct_code != 0) {
if (direct_code != static_cast<uintptr_t>(-1)) {
@@ -620,14 +674,23 @@ static int ArmNextSDCallInsn(CompilationUnit* cu, CallInfo* info ATTRIBUTE_UNUSE
cg->LoadCodeAddress(target_method, type, kInvokeTgt);
}
}
- break;
+ if (!use_pc_rel || direct_code != 0) {
+ break;
+ }
+ ++state;
+ FALLTHROUGH_INTENDED;
case 2: // Grab target method*
CHECK_EQ(cu->dex_file, target_method.dex_file);
- cg->LoadRefDisp(arg0_ref,
- mirror::ObjectArray<mirror::Object>::OffsetOfElement(
- target_method.dex_method_index).Int32Value(),
- arg0_ref,
- kNotVolatile);
+ if (!use_pc_rel) {
+ cg->LoadRefDisp(arg0_ref,
+ mirror::ObjectArray<mirror::Object>::OffsetOfElement(
+ target_method.dex_method_index).Int32Value(),
+ arg0_ref,
+ kNotVolatile);
+ } else {
+ size_t offset = cg->dex_cache_arrays_layout_.MethodOffset(target_method.dex_method_index);
+ cg->OpPcRelDexCacheArrayLoad(cu->dex_file, offset, arg0_ref);
+ }
break;
case 3: // Grab the code from the method*
if (direct_code == 0) {
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 4141bcfe98..83b27df939 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -82,6 +82,9 @@ class ArmMir2Lir FINAL : public Mir2Lir {
/// @copydoc Mir2Lir::UnconditionallyMarkGCCard(RegStorage)
void UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) OVERRIDE;
+ bool CanUseOpPcRelDexCacheArrayLoad() const OVERRIDE;
+ void OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset, RegStorage r_dest) OVERRIDE;
+
// Required for target - register utilities.
RegStorage TargetReg(SpecialTargetRegister reg) OVERRIDE;
RegStorage TargetReg(SpecialTargetRegister reg, WideKind wide_kind) OVERRIDE {
@@ -257,6 +260,9 @@ class ArmMir2Lir FINAL : public Mir2Lir {
*/
LIR* GenCallInsn(const MirMethodLoweringInfo& method_info) OVERRIDE;
+ void CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs) OVERRIDE;
+ void DoPromotion() OVERRIDE;
+
/*
* @brief Handle ARM specific literals.
*/
@@ -300,6 +306,13 @@ class ArmMir2Lir FINAL : public Mir2Lir {
ArenaVector<LIR*> call_method_insns_;
+ // Instructions needing patching with PC relative code addresses.
+ ArenaVector<LIR*> dex_cache_access_insns_;
+
+ // Register with a reference to the dex cache arrays at dex_cache_arrays_min_offset_,
+ // if promoted.
+ RegStorage dex_cache_arrays_base_reg_;
+
/**
* @brief Given float register pair, returns Solo64 float register.
* @param reg #RegStorage containing a float register pair (e.g. @c s2 and @c s3).
@@ -329,6 +342,14 @@ class ArmMir2Lir FINAL : public Mir2Lir {
}
int GenDalvikArgsBulkCopy(CallInfo* info, int first, int count) OVERRIDE;
+
+ static int ArmNextSDCallInsn(CompilationUnit* cu, CallInfo* info ATTRIBUTE_UNUSED,
+ int state, const MethodReference& target_method,
+ uint32_t unused_idx ATTRIBUTE_UNUSED,
+ uintptr_t direct_code, uintptr_t direct_method,
+ InvokeType type);
+
+ void OpPcRelDexCacheArrayAddr(const DexFile* dex_file, int offset, RegStorage r_dest);
};
} // namespace art
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index 9193e1b23c..47669db979 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -1087,6 +1087,36 @@ void ArmMir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
lir->target = target;
}
+bool ArmMir2Lir::CanUseOpPcRelDexCacheArrayLoad() const {
+ return dex_cache_arrays_layout_.Valid();
+}
+
+void ArmMir2Lir::OpPcRelDexCacheArrayAddr(const DexFile* dex_file, int offset, RegStorage r_dest) {
+ LIR* movw = NewLIR2(kThumb2MovImm16, r_dest.GetReg(), 0);
+ LIR* movt = NewLIR2(kThumb2MovImm16H, r_dest.GetReg(), 0);
+ ArmOpcode add_pc_opcode = (r_dest.GetRegNum() < 8) ? kThumbAddRRLH : kThumbAddRRHH;
+ LIR* add_pc = NewLIR2(add_pc_opcode, r_dest.GetReg(), rs_rARM_PC.GetReg());
+ add_pc->flags.fixup = kFixupLabel;
+ movw->operands[2] = WrapPointer(dex_file);
+ movw->operands[3] = offset;
+ movw->operands[4] = WrapPointer(add_pc);
+ movt->operands[2] = movw->operands[2];
+ movt->operands[3] = movw->operands[3];
+ movt->operands[4] = movw->operands[4];
+ dex_cache_access_insns_.push_back(movw);
+ dex_cache_access_insns_.push_back(movt);
+}
+
+void ArmMir2Lir::OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset, RegStorage r_dest) {
+ if (dex_cache_arrays_base_reg_.Valid()) {
+ LoadRefDisp(dex_cache_arrays_base_reg_, offset - dex_cache_arrays_min_offset_,
+ r_dest, kNotVolatile);
+ } else {
+ OpPcRelDexCacheArrayAddr(dex_file, offset, r_dest);
+ LoadRefDisp(r_dest, 0, r_dest, kNotVolatile);
+ }
+}
+
LIR* ArmMir2Lir::OpVldm(RegStorage r_base, int count) {
return NewLIR3(kThumb2Vldms, r_base.GetReg(), rs_fr0.GetReg(), count);
}
diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc
index 9812d9ff99..5f27338e6b 100644
--- a/compiler/dex/quick/arm/target_arm.cc
+++ b/compiler/dex/quick/arm/target_arm.cc
@@ -575,7 +575,9 @@ RegisterClass ArmMir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volatil
ArmMir2Lir::ArmMir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena)
: Mir2Lir(cu, mir_graph, arena),
- call_method_insns_(arena->Adapter()) {
+ call_method_insns_(arena->Adapter()),
+ dex_cache_access_insns_(arena->Adapter()),
+ dex_cache_arrays_base_reg_(RegStorage::InvalidReg()) {
call_method_insns_.reserve(100);
// Sanity check - make sure encoding map lines up.
for (int i = 0; i < kArmLast; i++) {
@@ -901,14 +903,28 @@ RegStorage ArmMir2Lir::AllocPreservedSingle(int s_reg) {
}
void ArmMir2Lir::InstallLiteralPools() {
+ patches_.reserve(call_method_insns_.size() + dex_cache_access_insns_.size());
+
// PC-relative calls to methods.
- patches_.reserve(call_method_insns_.size());
for (LIR* p : call_method_insns_) {
- DCHECK_EQ(p->opcode, kThumb2Bl);
- uint32_t target_method_idx = p->operands[1];
- const DexFile* target_dex_file = UnwrapPointer<DexFile>(p->operands[2]);
- patches_.push_back(LinkerPatch::RelativeCodePatch(p->offset,
- target_dex_file, target_method_idx));
+ DCHECK_EQ(p->opcode, kThumb2Bl);
+ uint32_t target_method_idx = p->operands[1];
+ const DexFile* target_dex_file = UnwrapPointer<DexFile>(p->operands[2]);
+ patches_.push_back(LinkerPatch::RelativeCodePatch(p->offset,
+ target_dex_file, target_method_idx));
+ }
+
+ // PC-relative dex cache array accesses.
+ for (LIR* p : dex_cache_access_insns_) {
+ DCHECK(p->opcode = kThumb2MovImm16 || p->opcode == kThumb2MovImm16H);
+ const LIR* add_pc = UnwrapPointer<LIR>(p->operands[4]);
+ DCHECK(add_pc->opcode == kThumbAddRRLH || add_pc->opcode == kThumbAddRRHH);
+ const DexFile* dex_file = UnwrapPointer<DexFile>(p->operands[2]);
+ uint32_t offset = p->operands[3];
+ DCHECK(!p->flags.is_nop);
+ DCHECK(!add_pc->flags.is_nop);
+ patches_.push_back(LinkerPatch::DexCacheArrayPatch(p->offset,
+ dex_file, add_pc->offset, offset));
}
// And do the normal processing.
diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc
index e4bd2a33ae..25ea6941c0 100644
--- a/compiler/dex/quick/arm/utility_arm.cc
+++ b/compiler/dex/quick/arm/utility_arm.cc
@@ -19,6 +19,7 @@
#include "arch/arm/instruction_set_features_arm.h"
#include "arm_lir.h"
#include "base/logging.h"
+#include "dex/mir_graph.h"
#include "dex/quick/mir_to_lir-inl.h"
#include "dex/reg_storage_eq.h"
#include "driver/compiler_driver.h"
@@ -1266,4 +1267,39 @@ size_t ArmMir2Lir::GetInstructionOffset(LIR* lir) {
return offset;
}
+void ArmMir2Lir::CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs) {
+ // Start with the default counts.
+ Mir2Lir::CountRefs(core_counts, fp_counts, num_regs);
+
+ if (pc_rel_temp_ != nullptr) {
+ // Now, if the dex cache array base temp is used only once outside any loops (weight = 1),
+ // avoid the promotion, otherwise boost the weight by factor 3 because the full PC-relative
+ // load sequence is 4 instructions long and by promoting the PC base we save up to 3
+ // instructions per use.
+ int p_map_idx = SRegToPMap(pc_rel_temp_->s_reg_low);
+ if (core_counts[p_map_idx].count == 1) {
+ core_counts[p_map_idx].count = 0;
+ } else {
+ core_counts[p_map_idx].count *= 3;
+ }
+ }
+}
+
+void ArmMir2Lir::DoPromotion() {
+ if (CanUseOpPcRelDexCacheArrayLoad()) {
+ pc_rel_temp_ = mir_graph_->GetNewCompilerTemp(kCompilerTempBackend, false);
+ }
+
+ Mir2Lir::DoPromotion();
+
+ if (pc_rel_temp_ != nullptr) {
+ // Now, if the dex cache array base temp is promoted, remember the register but
+ // always remove the temp's stack location to avoid unnecessarily bloating the stack.
+ dex_cache_arrays_base_reg_ = mir_graph_->reg_location_[pc_rel_temp_->s_reg_low].reg;
+ DCHECK(!dex_cache_arrays_base_reg_.Valid() || !dex_cache_arrays_base_reg_.IsFloat());
+ mir_graph_->RemoveLastCompilerTemp(kCompilerTempBackend, false, pc_rel_temp_);
+ pc_rel_temp_ = nullptr;
+ }
+}
+
} // namespace art
diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h
index d15412a1bd..f6fa9389d0 100644
--- a/compiler/dex/quick/arm64/arm64_lir.h
+++ b/compiler/dex/quick/arm64/arm64_lir.h
@@ -236,6 +236,7 @@ enum A64Opcode {
kA64Add4rrro, // add [00001011000] rm[20-16] imm_6[15-10] rn[9-5] rd[4-0].
kA64Add4RRre, // add [00001011001] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] rd[4-0].
kA64Adr2xd, // adr [0] immlo[30-29] [10000] immhi[23-5] rd[4-0].
+ kA64Adrp2xd, // adrp [1] immlo[30-29] [10000] immhi[23-5] rd[4-0].
kA64And3Rrl, // and [00010010] N[22] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0].
kA64And4rrro, // and [00001010] shift[23-22] [N=0] rm[20-16] imm_6[15-10] rn[9-5] rd[4-0].
kA64Asr3rrd, // asr [0001001100] immr[21-16] imms[15-10] rn[9-5] rd[4-0].
diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc
index 329bb1e770..2f1ae66bfc 100644
--- a/compiler/dex/quick/arm64/assemble_arm64.cc
+++ b/compiler/dex/quick/arm64/assemble_arm64.cc
@@ -131,6 +131,10 @@ const A64EncodingMap Arm64Mir2Lir::EncodingMap[kA64Last] = {
kFmtRegX, 4, 0, kFmtImm21, -1, -1, kFmtUnused, -1, -1,
kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | NEEDS_FIXUP,
"adr", "!0x, #!1d", kFixupAdr),
+ ENCODING_MAP(kA64Adrp2xd, NO_VARIANTS(0x90000000),
+ kFmtRegX, 4, 0, kFmtImm21, -1, -1, kFmtUnused, -1, -1,
+ kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | NEEDS_FIXUP,
+ "adrp", "!0x, #!1d", kFixupLabel),
ENCODING_MAP(WIDE(kA64And3Rrl), SF_VARIANTS(0x12000000),
kFmtRegROrSp, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 22, 10,
kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
@@ -682,7 +686,9 @@ void Arm64Mir2Lir::InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir)
#define PADDING_NOP (UINT32_C(0xd503201f))
uint8_t* Arm64Mir2Lir::EncodeLIRs(uint8_t* write_pos, LIR* lir) {
+ uint8_t* const write_buffer = write_pos;
for (; lir != nullptr; lir = NEXT_LIR(lir)) {
+ lir->offset = (write_pos - write_buffer);
bool opcode_is_wide = IS_WIDE(lir->opcode);
A64Opcode opcode = UNWIDE(lir->opcode);
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index 823cb60d97..4abbd77d88 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -23,10 +23,12 @@
#include "dex/mir_graph.h"
#include "dex/quick/mir_to_lir-inl.h"
#include "driver/compiler_driver.h"
+#include "driver/compiler_options.h"
#include "gc/accounting/card_table.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "mirror/art_method.h"
#include "mirror/object_array-inl.h"
+#include "utils/dex_cache_arrays_layout-inl.h"
namespace art {
@@ -280,7 +282,13 @@ void Arm64Mir2Lir::UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) {
FreeTemp(reg_card_no);
}
+static dwarf::Reg DwarfCoreReg(int num) {
+ return dwarf::Reg::Arm64Core(num);
+}
+
void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
+ DCHECK_EQ(cfi_.GetCurrentCFAOffset(), 0); // empty stack.
+
/*
* On entry, x0 to x7 are live. Let the register allocation
* mechanism know so it doesn't try to use any of them when
@@ -310,8 +318,6 @@ void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method)
bool skip_overflow_check = mir_graph_->MethodIsLeaf() &&
!FrameNeedsStackCheck(frame_size_, kArm64);
- NewLIR0(kPseudoMethodEntry);
-
const size_t kStackOverflowReservedUsableBytes = GetStackOverflowReservedBytes(kArm64);
const bool large_frame = static_cast<size_t>(frame_size_) > kStackOverflowReservedUsableBytes;
bool generate_explicit_stack_overflow_check = large_frame ||
@@ -345,6 +351,7 @@ void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method)
if (spilled_already != frame_size_) {
OpRegImm(kOpSub, rs_sp, frame_size_without_spills);
+ cfi_.AdjustCFAOffset(frame_size_without_spills);
}
if (!skip_overflow_check) {
@@ -361,12 +368,14 @@ void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method)
GenerateTargetLabel(kPseudoThrowTarget);
// Unwinds stack.
m2l_->OpRegImm(kOpAdd, rs_sp, sp_displace_);
+ m2l_->cfi().AdjustCFAOffset(-sp_displace_);
m2l_->ClobberCallerSave();
ThreadOffset<8> func_offset = QUICK_ENTRYPOINT_OFFSET(8, pThrowStackOverflow);
m2l_->LockTemp(rs_xIP0);
m2l_->LoadWordDisp(rs_xSELF, func_offset.Int32Value(), rs_xIP0);
m2l_->NewLIR1(kA64Br1x, rs_xIP0.GetReg());
m2l_->FreeTemp(rs_xIP0);
+ m2l_->cfi().AdjustCFAOffset(sp_displace_);
}
private:
@@ -393,19 +402,20 @@ void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method)
}
void Arm64Mir2Lir::GenExitSequence() {
+ cfi_.RememberState();
/*
* In the exit path, r0/r1 are live - make sure they aren't
* allocated by the register utilities as temps.
*/
LockTemp(rs_x0);
LockTemp(rs_x1);
-
- NewLIR0(kPseudoMethodExit);
-
UnspillRegs(rs_sp, core_spill_mask_, fp_spill_mask_, frame_size_);
// Finally return.
NewLIR0(kA64Ret);
+ // The CFI should be restored for any code that follows the exit block.
+ cfi_.RestoreState();
+ cfi_.DefCFAOffset(frame_size_);
}
void Arm64Mir2Lir::GenSpecialExitSequence() {
@@ -422,11 +432,16 @@ void Arm64Mir2Lir::GenSpecialEntryForSuspend() {
core_vmap_table_.clear();
fp_vmap_table_.clear();
NewLIR4(WIDE(kA64StpPre4rrXD), rs_x0.GetReg(), rs_xLR.GetReg(), rs_sp.GetReg(), -frame_size_ / 8);
+ cfi_.AdjustCFAOffset(frame_size_);
+ // Do not generate CFI for scratch register x0.
+ cfi_.RelOffset(DwarfCoreReg(rxLR), 8);
}
void Arm64Mir2Lir::GenSpecialExitForSuspend() {
// Pop the frame. (ArtMethod* no longer needed but restore it anyway.)
NewLIR4(WIDE(kA64LdpPost4rrXD), rs_x0.GetReg(), rs_xLR.GetReg(), rs_sp.GetReg(), frame_size_ / 8);
+ cfi_.AdjustCFAOffset(-frame_size_);
+ cfi_.Restore(DwarfCoreReg(rxLR));
}
static bool Arm64UseRelativeCall(CompilationUnit* cu, const MethodReference& target_method) {
@@ -438,13 +453,13 @@ static bool Arm64UseRelativeCall(CompilationUnit* cu, const MethodReference& tar
* Bit of a hack here - in the absence of a real scheduling pass,
* emit the next instruction in static & direct invoke sequences.
*/
-static int Arm64NextSDCallInsn(CompilationUnit* cu, CallInfo* info,
- int state, const MethodReference& target_method,
- uint32_t unused_idx,
- uintptr_t direct_code, uintptr_t direct_method,
- InvokeType type) {
+int Arm64Mir2Lir::Arm64NextSDCallInsn(CompilationUnit* cu, CallInfo* info,
+ int state, const MethodReference& target_method,
+ uint32_t unused_idx,
+ uintptr_t direct_code, uintptr_t direct_method,
+ InvokeType type) {
UNUSED(info, unused_idx);
- Mir2Lir* cg = static_cast<Mir2Lir*>(cu->cg.get());
+ Arm64Mir2Lir* cg = static_cast<Arm64Mir2Lir*>(cu->cg.get());
if (direct_code != 0 && direct_method != 0) {
switch (state) {
case 0: // Get the current Method* [sets kArg0]
@@ -465,17 +480,24 @@ static int Arm64NextSDCallInsn(CompilationUnit* cu, CallInfo* info,
return -1;
}
} else {
+ bool use_pc_rel = cg->CanUseOpPcRelDexCacheArrayLoad();
RegStorage arg0_ref = cg->TargetReg(kArg0, kRef);
switch (state) {
case 0: // Get the current Method* [sets kArg0]
// TUNING: we can save a reg copy if Method* has been promoted.
- cg->LoadCurrMethodDirect(arg0_ref);
- break;
+ if (!use_pc_rel) {
+ cg->LoadCurrMethodDirect(arg0_ref);
+ break;
+ }
+ ++state;
+ FALLTHROUGH_INTENDED;
case 1: // Get method->dex_cache_resolved_methods_
- cg->LoadRefDisp(arg0_ref,
- mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value(),
- arg0_ref,
- kNotVolatile);
+ if (!use_pc_rel) {
+ cg->LoadRefDisp(arg0_ref,
+ mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value(),
+ arg0_ref,
+ kNotVolatile);
+ }
// Set up direct code if known.
if (direct_code != 0) {
if (direct_code != static_cast<uintptr_t>(-1)) {
@@ -487,14 +509,23 @@ static int Arm64NextSDCallInsn(CompilationUnit* cu, CallInfo* info,
cg->LoadCodeAddress(target_method, type, kInvokeTgt);
}
}
- break;
+ if (!use_pc_rel || direct_code != 0) {
+ break;
+ }
+ ++state;
+ FALLTHROUGH_INTENDED;
case 2: // Grab target method*
CHECK_EQ(cu->dex_file, target_method.dex_file);
- cg->LoadRefDisp(arg0_ref,
- mirror::ObjectArray<mirror::Object>::OffsetOfElement(
- target_method.dex_method_index).Int32Value(),
- arg0_ref,
- kNotVolatile);
+ if (!use_pc_rel) {
+ cg->LoadRefDisp(arg0_ref,
+ mirror::ObjectArray<mirror::Object>::OffsetOfElement(
+ target_method.dex_method_index).Int32Value(),
+ arg0_ref,
+ kNotVolatile);
+ } else {
+ size_t offset = cg->dex_cache_arrays_layout_.MethodOffset(target_method.dex_method_index);
+ cg->OpPcRelDexCacheArrayLoad(cu->dex_file, offset, arg0_ref);
+ }
break;
case 3: // Grab the code from the method*
if (direct_code == 0) {
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index 54fd46de0e..8184f02287 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -78,6 +78,9 @@ class Arm64Mir2Lir FINAL : public Mir2Lir {
/// @copydoc Mir2Lir::UnconditionallyMarkGCCard(RegStorage)
void UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) OVERRIDE;
+ bool CanUseOpPcRelDexCacheArrayLoad() const OVERRIDE;
+ void OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset, RegStorage r_dest) OVERRIDE;
+
LIR* OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg, RegStorage base_reg,
int offset, int check_value, LIR* target, LIR** compare) OVERRIDE;
@@ -393,9 +396,16 @@ class Arm64Mir2Lir FINAL : public Mir2Lir {
void GenDivRemLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
RegLocation rl_src2, bool is_div, int flags);
+ static int Arm64NextSDCallInsn(CompilationUnit* cu, CallInfo* info,
+ int state, const MethodReference& target_method,
+ uint32_t unused_idx,
+ uintptr_t direct_code, uintptr_t direct_method,
+ InvokeType type);
+
static const A64EncodingMap EncodingMap[kA64Last];
ArenaVector<LIR*> call_method_insns_;
+ ArenaVector<LIR*> dex_cache_access_insns_;
int GenDalvikArgsBulkCopy(CallInfo* info, int first, int count) OVERRIDE;
};
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index 2372ccc527..20f61f2261 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -943,6 +943,23 @@ void Arm64Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
lir->target = target;
}
+bool Arm64Mir2Lir::CanUseOpPcRelDexCacheArrayLoad() const {
+ return dex_cache_arrays_layout_.Valid();
+}
+
+void Arm64Mir2Lir::OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset,
+ RegStorage r_dest) {
+ LIR* adrp = NewLIR2(kA64Adrp2xd, r_dest.GetReg(), 0);
+ adrp->operands[2] = WrapPointer(dex_file);
+ adrp->operands[3] = offset;
+ adrp->operands[4] = WrapPointer(adrp);
+ dex_cache_access_insns_.push_back(adrp);
+ LIR* ldr = LoadBaseDisp(r_dest, 0, r_dest, kReference, kNotVolatile);
+ ldr->operands[4] = adrp->operands[4];
+ ldr->flags.fixup = kFixupLabel;
+ dex_cache_access_insns_.push_back(ldr);
+}
+
LIR* Arm64Mir2Lir::OpVldm(RegStorage r_base, int count) {
UNUSED(r_base, count);
LOG(FATAL) << "Unexpected use of OpVldm for Arm64";
@@ -1441,6 +1458,14 @@ static uint32_t GenPairWise(uint32_t reg_mask, int* reg1, int* reg2) {
return reg_mask;
}
+static dwarf::Reg DwarfCoreReg(int num) {
+ return dwarf::Reg::Arm64Core(num);
+}
+
+static dwarf::Reg DwarfFpReg(int num) {
+ return dwarf::Reg::Arm64Fp(num);
+}
+
static void SpillCoreRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32_t reg_mask) {
int reg1 = -1, reg2 = -1;
const int reg_log2_size = 3;
@@ -1449,9 +1474,12 @@ static void SpillCoreRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32
reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
if (UNLIKELY(reg2 < 0)) {
m2l->NewLIR3(WIDE(kA64Str3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
+ m2l->cfi().RelOffset(DwarfCoreReg(reg1), offset << reg_log2_size);
} else {
m2l->NewLIR4(WIDE(kA64Stp4rrXD), RegStorage::Solo64(reg2).GetReg(),
RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
+ m2l->cfi().RelOffset(DwarfCoreReg(reg2), offset << reg_log2_size);
+ m2l->cfi().RelOffset(DwarfCoreReg(reg1), (offset + 1) << reg_log2_size);
}
}
}
@@ -1466,9 +1494,12 @@ static void SpillFPRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32_t
if (UNLIKELY(reg2 < 0)) {
m2l->NewLIR3(WIDE(kA64Str3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(),
offset);
+ m2l->cfi().RelOffset(DwarfFpReg(reg1), offset << reg_log2_size);
} else {
m2l->NewLIR4(WIDE(kA64Stp4ffXD), RegStorage::FloatSolo64(reg2).GetReg(),
RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), offset);
+ m2l->cfi().RelOffset(DwarfFpReg(reg2), offset << reg_log2_size);
+ m2l->cfi().RelOffset(DwarfFpReg(reg1), (offset + 1) << reg_log2_size);
}
}
}
@@ -1476,6 +1507,7 @@ static void SpillFPRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32_t
static int SpillRegsPreSub(Arm64Mir2Lir* m2l, uint32_t core_reg_mask, uint32_t fp_reg_mask,
int frame_size) {
m2l->OpRegRegImm(kOpSub, rs_sp, rs_sp, frame_size);
+ m2l->cfi().AdjustCFAOffset(frame_size);
int core_count = POPCOUNT(core_reg_mask);
@@ -1535,11 +1567,15 @@ static int SpillRegsPreIndexed(Arm64Mir2Lir* m2l, RegStorage base, uint32_t core
RegStorage::FloatSolo64(reg1).GetReg(),
RegStorage::FloatSolo64(reg1).GetReg(),
base.GetReg(), -all_offset);
+ m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize);
+ m2l->cfi().RelOffset(DwarfFpReg(reg1), kArm64PointerSize);
} else {
m2l->NewLIR4(WIDE(kA64StpPre4ffXD),
RegStorage::FloatSolo64(reg1).GetReg(),
RegStorage::FloatSolo64(reg1).GetReg(),
base.GetReg(), -all_offset);
+ m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize);
+ m2l->cfi().RelOffset(DwarfFpReg(reg1), 0);
cur_offset = 0; // That core reg needs to go into the upper half.
}
} else {
@@ -1547,10 +1583,15 @@ static int SpillRegsPreIndexed(Arm64Mir2Lir* m2l, RegStorage base, uint32_t core
fp_reg_mask = GenPairWise(fp_reg_mask, &reg1, &reg2);
m2l->NewLIR4(WIDE(kA64StpPre4ffXD), RegStorage::FloatSolo64(reg2).GetReg(),
RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), -all_offset);
+ m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize);
+ m2l->cfi().RelOffset(DwarfFpReg(reg2), 0);
+ m2l->cfi().RelOffset(DwarfFpReg(reg1), kArm64PointerSize);
} else {
fp_reg_mask = ExtractReg(fp_reg_mask, &reg1);
m2l->NewLIR4(WIDE(kA64StpPre4ffXD), rs_d0.GetReg(), RegStorage::FloatSolo64(reg1).GetReg(),
base.GetReg(), -all_offset);
+ m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize);
+ m2l->cfi().RelOffset(DwarfFpReg(reg1), kArm64PointerSize);
}
}
} else {
@@ -1563,12 +1604,19 @@ static int SpillRegsPreIndexed(Arm64Mir2Lir* m2l, RegStorage base, uint32_t core
core_reg_mask = ExtractReg(core_reg_mask, &reg1);
m2l->NewLIR4(WIDE(kA64StpPre4rrXD), rs_xzr.GetReg(),
RegStorage::Solo64(reg1).GetReg(), base.GetReg(), -all_offset);
+ m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize);
+ m2l->cfi().RelOffset(DwarfCoreReg(reg1), kArm64PointerSize);
} else {
core_reg_mask = GenPairWise(core_reg_mask, &reg1, &reg2);
m2l->NewLIR4(WIDE(kA64StpPre4rrXD), RegStorage::Solo64(reg2).GetReg(),
RegStorage::Solo64(reg1).GetReg(), base.GetReg(), -all_offset);
+ m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize);
+ m2l->cfi().RelOffset(DwarfCoreReg(reg2), 0);
+ m2l->cfi().RelOffset(DwarfCoreReg(reg1), kArm64PointerSize);
}
}
+ DCHECK_EQ(m2l->cfi().GetCurrentCFAOffset(),
+ static_cast<int>(all_offset * kArm64PointerSize));
if (fp_count != 0) {
for (; fp_reg_mask != 0;) {
@@ -1577,10 +1625,13 @@ static int SpillRegsPreIndexed(Arm64Mir2Lir* m2l, RegStorage base, uint32_t core
if (UNLIKELY(reg2 < 0)) {
m2l->NewLIR3(WIDE(kA64Str3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(),
cur_offset);
+ m2l->cfi().RelOffset(DwarfFpReg(reg1), cur_offset * kArm64PointerSize);
// Do not increment offset here, as the second half will be filled by a core reg.
} else {
m2l->NewLIR4(WIDE(kA64Stp4ffXD), RegStorage::FloatSolo64(reg2).GetReg(),
RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), cur_offset);
+ m2l->cfi().RelOffset(DwarfFpReg(reg2), cur_offset * kArm64PointerSize);
+ m2l->cfi().RelOffset(DwarfFpReg(reg1), (cur_offset + 1) * kArm64PointerSize);
cur_offset += 2;
}
}
@@ -1593,6 +1644,7 @@ static int SpillRegsPreIndexed(Arm64Mir2Lir* m2l, RegStorage base, uint32_t core
core_reg_mask = ExtractReg(core_reg_mask, &reg1);
m2l->NewLIR3(WIDE(kA64Str3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(),
cur_offset + 1);
+ m2l->cfi().RelOffset(DwarfCoreReg(reg1), (cur_offset + 1) * kArm64PointerSize);
cur_offset += 2; // Half-slot filled now.
}
}
@@ -1603,6 +1655,8 @@ static int SpillRegsPreIndexed(Arm64Mir2Lir* m2l, RegStorage base, uint32_t core
core_reg_mask = GenPairWise(core_reg_mask, &reg1, &reg2);
m2l->NewLIR4(WIDE(kA64Stp4rrXD), RegStorage::Solo64(reg2).GetReg(),
RegStorage::Solo64(reg1).GetReg(), base.GetReg(), cur_offset);
+ m2l->cfi().RelOffset(DwarfCoreReg(reg2), cur_offset * kArm64PointerSize);
+ m2l->cfi().RelOffset(DwarfCoreReg(reg1), (cur_offset + 1) * kArm64PointerSize);
}
DCHECK_EQ(cur_offset, all_offset);
@@ -1633,10 +1687,13 @@ static void UnSpillCoreRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint
reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
if (UNLIKELY(reg2 < 0)) {
m2l->NewLIR3(WIDE(kA64Ldr3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
+ m2l->cfi().Restore(DwarfCoreReg(reg1));
} else {
DCHECK_LE(offset, 63);
m2l->NewLIR4(WIDE(kA64Ldp4rrXD), RegStorage::Solo64(reg2).GetReg(),
RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
+ m2l->cfi().Restore(DwarfCoreReg(reg2));
+ m2l->cfi().Restore(DwarfCoreReg(reg1));
}
}
}
@@ -1650,9 +1707,12 @@ static void UnSpillFPRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32
if (UNLIKELY(reg2 < 0)) {
m2l->NewLIR3(WIDE(kA64Ldr3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(),
offset);
+ m2l->cfi().Restore(DwarfFpReg(reg1));
} else {
m2l->NewLIR4(WIDE(kA64Ldp4ffXD), RegStorage::FloatSolo64(reg2).GetReg(),
RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), offset);
+ m2l->cfi().Restore(DwarfFpReg(reg2));
+ m2l->cfi().Restore(DwarfFpReg(reg1));
}
}
}
@@ -1694,6 +1754,7 @@ void Arm64Mir2Lir::UnspillRegs(RegStorage base, uint32_t core_reg_mask, uint32_t
early_drop = RoundDown(early_drop, 16);
OpRegImm64(kOpAdd, rs_sp, early_drop);
+ cfi_.AdjustCFAOffset(-early_drop);
}
// Unspill.
@@ -1707,7 +1768,9 @@ void Arm64Mir2Lir::UnspillRegs(RegStorage base, uint32_t core_reg_mask, uint32_t
}
// Drop the (rest of) the frame.
- OpRegImm64(kOpAdd, rs_sp, frame_size - early_drop);
+ int adjust = frame_size - early_drop;
+ OpRegImm64(kOpAdd, rs_sp, adjust);
+ cfi_.AdjustCFAOffset(-adjust);
}
bool Arm64Mir2Lir::GenInlinedReverseBits(CallInfo* info, OpSize size) {
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
index 09a34bf022..c5c0dc5447 100644
--- a/compiler/dex/quick/arm64/target_arm64.cc
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -606,7 +606,8 @@ RegisterClass Arm64Mir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volat
Arm64Mir2Lir::Arm64Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena)
: Mir2Lir(cu, mir_graph, arena),
- call_method_insns_(arena->Adapter()) {
+ call_method_insns_(arena->Adapter()),
+ dex_cache_access_insns_(arena->Adapter()) {
// Sanity check - make sure encoding map lines up.
for (int i = 0; i < kA64Last; i++) {
DCHECK_EQ(UNWIDE(Arm64Mir2Lir::EncodingMap[i].opcode), i)
@@ -846,8 +847,9 @@ RegStorage Arm64Mir2Lir::InToRegStorageArm64Mapper::GetNextReg(ShortyArg arg) {
}
void Arm64Mir2Lir::InstallLiteralPools() {
+ patches_.reserve(call_method_insns_.size() + dex_cache_access_insns_.size());
+
// PC-relative calls to methods.
- patches_.reserve(call_method_insns_.size());
for (LIR* p : call_method_insns_) {
DCHECK_EQ(p->opcode, kA64Bl1t);
uint32_t target_method_idx = p->operands[1];
@@ -856,6 +858,18 @@ void Arm64Mir2Lir::InstallLiteralPools() {
target_dex_file, target_method_idx));
}
+ // PC-relative references to dex cache arrays.
+ for (LIR* p : dex_cache_access_insns_) {
+ DCHECK(p->opcode == kA64Adrp2xd || p->opcode == kA64Ldr3rXD);
+ const LIR* adrp = UnwrapPointer<LIR>(p->operands[4]);
+ DCHECK_EQ(adrp->opcode, kA64Adrp2xd);
+ const DexFile* dex_file = UnwrapPointer<DexFile>(adrp->operands[2]);
+ uint32_t offset = adrp->operands[3];
+ DCHECK(!p->flags.is_nop);
+ DCHECK(!adrp->flags.is_nop);
+ patches_.push_back(LinkerPatch::DexCacheArrayPatch(p->offset, dex_file, adrp->offset, offset));
+ }
+
// And do the normal processing.
Mir2Lir::InstallLiteralPools();
}
diff --git a/compiler/dex/quick/arm64/utility_arm64.cc b/compiler/dex/quick/arm64/utility_arm64.cc
index f48290d6f7..e9ad8ba175 100644
--- a/compiler/dex/quick/arm64/utility_arm64.cc
+++ b/compiler/dex/quick/arm64/utility_arm64.cc
@@ -589,13 +589,11 @@ LIR* Arm64Mir2Lir::OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r
DCHECK_EQ(shift, 0);
// Binary, but rm is encoded twice.
return NewLIR2(kA64Rev2rr | wide, r_dest_src1.GetReg(), r_src2.GetReg());
- break;
case kOpRevsh:
// Binary, but rm is encoded twice.
NewLIR2(kA64Rev162rr | wide, r_dest_src1.GetReg(), r_src2.GetReg());
// "sxth r1, r2" is "sbfm r1, r2, #0, #15"
return NewLIR4(kA64Sbfm4rrdd | wide, r_dest_src1.GetReg(), r_dest_src1.GetReg(), 0, 15);
- break;
case kOp2Byte:
DCHECK_EQ(shift, ENCODE_NO_SHIFT);
// "sbfx r1, r2, #imm1, #imm2" is "sbfm r1, r2, #imm1, #(imm1 + imm2 - 1)".
@@ -645,10 +643,9 @@ LIR* Arm64Mir2Lir::OpRegRegExtend(OpKind op, RegStorage r_dest_src1, RegStorage
// Note: intentional fallthrough
case kOpSub:
return OpRegRegRegExtend(op, r_dest_src1, r_dest_src1, r_src2, ext, amount);
- break;
default:
LOG(FATAL) << "Bad Opcode: " << opcode;
- break;
+ UNREACHABLE();
}
DCHECK(!IsPseudoLirOp(opcode));
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index df72830801..5ea36c2769 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -29,6 +29,7 @@
#include "dex/quick/dex_file_to_method_inliner_map.h"
#include "dex/verification_results.h"
#include "dex/verified_method.h"
+#include "utils/dex_cache_arrays_layout-inl.h"
#include "verifier/dex_gc_map.h"
#include "verifier/method_verifier.h"
#include "vmap_table.h"
@@ -202,12 +203,17 @@ void Mir2Lir::DumpLIRInsn(LIR* lir, unsigned char* base_addr) {
/* Handle pseudo-ops individually, and all regular insns as a group */
switch (lir->opcode) {
- case kPseudoMethodEntry:
- LOG(INFO) << "-------- method entry "
- << PrettyMethod(cu_->method_idx, *cu_->dex_file);
+ case kPseudoPrologueBegin:
+ LOG(INFO) << "-------- PrologueBegin";
break;
- case kPseudoMethodExit:
- LOG(INFO) << "-------- Method_Exit";
+ case kPseudoPrologueEnd:
+ LOG(INFO) << "-------- PrologueEnd";
+ break;
+ case kPseudoEpilogueBegin:
+ LOG(INFO) << "-------- EpilogueBegin";
+ break;
+ case kPseudoEpilogueEnd:
+ LOG(INFO) << "-------- EpilogueEnd";
break;
case kPseudoBarrier:
LOG(INFO) << "-------- BARRIER";
@@ -266,8 +272,9 @@ void Mir2Lir::DumpLIRInsn(LIR* lir, unsigned char* base_addr) {
lir, base_addr));
std::string op_operands(BuildInsnString(GetTargetInstFmt(lir->opcode),
lir, base_addr));
- LOG(INFO) << StringPrintf("%5p: %-9s%s%s",
+ LOG(INFO) << StringPrintf("%5p|0x%02x: %-9s%s%s",
base_addr + offset,
+ lir->dalvik_offset,
op_name.c_str(), op_operands.c_str(),
lir->flags.is_nop ? "(nop)" : "");
}
@@ -534,13 +541,11 @@ void Mir2Lir::InstallSwitchTables() {
DCHECK(tab_rec->anchor->flags.fixup != kFixupNone);
bx_offset = tab_rec->anchor->offset + 4;
break;
- case kX86:
- bx_offset = 0;
- break;
case kX86_64:
// RIP relative to switch table.
bx_offset = tab_rec->offset;
break;
+ case kX86:
case kArm64:
case kMips:
case kMips64:
@@ -712,14 +717,17 @@ void Mir2Lir::CreateMappingTables() {
DCHECK_EQ(static_cast<size_t>(write_pos - &encoded_mapping_table_[0]), hdr_data_size);
uint8_t* write_pos2 = write_pos + pc2dex_data_size;
+ bool is_in_prologue_or_epilogue = false;
pc2dex_offset = 0u;
pc2dex_dalvik_offset = 0u;
dex2pc_offset = 0u;
dex2pc_dalvik_offset = 0u;
for (LIR* tgt_lir = first_lir_insn_; tgt_lir != nullptr; tgt_lir = NEXT_LIR(tgt_lir)) {
- if (generate_src_map && !tgt_lir->flags.is_nop) {
- src_mapping_table_.push_back(SrcMapElem({tgt_lir->offset,
- static_cast<int32_t>(tgt_lir->dalvik_offset)}));
+ if (generate_src_map && !tgt_lir->flags.is_nop && tgt_lir->opcode >= 0) {
+ if (!is_in_prologue_or_epilogue) {
+ src_mapping_table_.push_back(SrcMapElem({tgt_lir->offset,
+ static_cast<int32_t>(tgt_lir->dalvik_offset)}));
+ }
}
if (!tgt_lir->flags.is_nop && (tgt_lir->opcode == kPseudoSafepointPC)) {
DCHECK(pc2dex_offset <= tgt_lir->offset);
@@ -737,6 +745,12 @@ void Mir2Lir::CreateMappingTables() {
dex2pc_offset = tgt_lir->offset;
dex2pc_dalvik_offset = tgt_lir->dalvik_offset;
}
+ if (tgt_lir->opcode == kPseudoPrologueBegin || tgt_lir->opcode == kPseudoEpilogueBegin) {
+ is_in_prologue_or_epilogue = true;
+ }
+ if (tgt_lir->opcode == kPseudoPrologueEnd || tgt_lir->opcode == kPseudoEpilogueEnd) {
+ is_in_prologue_or_epilogue = false;
+ }
}
DCHECK_EQ(static_cast<size_t>(write_pos - &encoded_mapping_table_[0]),
hdr_data_size + pc2dex_data_size);
@@ -1053,6 +1067,12 @@ Mir2Lir::Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena
mem_ref_type_(ResourceMask::kHeapRef),
mask_cache_(arena),
safepoints_(arena->Adapter()),
+ dex_cache_arrays_layout_(cu->compiler_driver->GetDexCacheArraysLayout(cu->dex_file)),
+ pc_rel_temp_(nullptr),
+ dex_cache_arrays_min_offset_(std::numeric_limits<uint32_t>::max()),
+ cfi_(&last_lir_insn_,
+ cu->compiler_driver->GetCompilerOptions().GetIncludeDebugSymbols(),
+ arena),
in_to_reg_storage_mapping_(arena) {
switch_tables_.reserve(4);
fill_array_data_.reserve(4);
@@ -1137,14 +1157,6 @@ CompiledMethod* Mir2Lir::GetCompiledMethod() {
return lhs.LiteralOffset() < rhs.LiteralOffset();
});
- std::unique_ptr<std::vector<uint8_t>> cfi_info(
- cu_->compiler_driver->GetCompilerOptions().GetGenerateGDBInformation() ?
- ReturnFrameDescriptionEntry() :
- nullptr);
- ArrayRef<const uint8_t> cfi_ref;
- if (cfi_info.get() != nullptr) {
- cfi_ref = ArrayRef<const uint8_t>(*cfi_info);
- }
return CompiledMethod::SwapAllocCompiledMethod(
cu_->compiler_driver, cu_->instruction_set,
ArrayRef<const uint8_t>(code_buffer_),
@@ -1153,8 +1165,8 @@ CompiledMethod* Mir2Lir::GetCompiledMethod() {
ArrayRef<const uint8_t>(encoded_mapping_table_),
ArrayRef<const uint8_t>(vmap_encoder.GetData()),
ArrayRef<const uint8_t>(native_gc_map_),
- cfi_ref,
- ArrayRef<LinkerPatch>(patches_));
+ ArrayRef<const uint8_t>(*cfi_.Patch(code_buffer_.size())),
+ ArrayRef<const LinkerPatch>(patches_));
}
size_t Mir2Lir::GetMaxPossibleCompilerTemps() const {
@@ -1304,9 +1316,15 @@ void Mir2Lir::LoadClassType(const DexFile& dex_file, uint32_t type_idx,
OpPcRelLoad(TargetReg(symbolic_reg, kRef), data_target);
}
-std::vector<uint8_t>* Mir2Lir::ReturnFrameDescriptionEntry() {
- // Default case is to do nothing.
- return nullptr;
+bool Mir2Lir::CanUseOpPcRelDexCacheArrayLoad() const {
+ return false;
+}
+
+void Mir2Lir::OpPcRelDexCacheArrayLoad(const DexFile* dex_file ATTRIBUTE_UNUSED,
+ int offset ATTRIBUTE_UNUSED,
+ RegStorage r_dest ATTRIBUTE_UNUSED) {
+ LOG(FATAL) << "No generic implementation.";
+ UNREACHABLE();
}
RegLocation Mir2Lir::NarrowRegLoc(RegLocation loc) {
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
index 8e3f4ef726..4ac6c0c5b5 100644
--- a/compiler/dex/quick/dex_file_method_inliner.cc
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -413,6 +413,17 @@ bool DexFileMethodInliner::AnalyseMethodCode(verifier::MethodVerifier* verifier)
return success && AddInlineMethod(verifier->GetMethodReference().dex_method_index, method);
}
+InlineMethodFlags DexFileMethodInliner::IsIntrinsicOrSpecial(uint32_t method_index) {
+ ReaderMutexLock mu(Thread::Current(), lock_);
+ auto it = inline_methods_.find(method_index);
+ if (it != inline_methods_.end()) {
+ DCHECK_NE(it->second.flags & (kInlineIntrinsic | kInlineSpecial), 0);
+ return it->second.flags;
+ } else {
+ return kNoInlineMethodFlags;
+ }
+}
+
bool DexFileMethodInliner::IsIntrinsic(uint32_t method_index, InlineMethod* intrinsic) {
ReaderMutexLock mu(Thread::Current(), lock_);
auto it = inline_methods_.find(method_index);
diff --git a/compiler/dex/quick/dex_file_method_inliner.h b/compiler/dex/quick/dex_file_method_inliner.h
index cb521da9df..d1e562119c 100644
--- a/compiler/dex/quick/dex_file_method_inliner.h
+++ b/compiler/dex/quick/dex_file_method_inliner.h
@@ -65,6 +65,11 @@ class DexFileMethodInliner {
SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) LOCKS_EXCLUDED(lock_);
/**
+ * Check whether a particular method index corresponds to an intrinsic or special function.
+ */
+ InlineMethodFlags IsIntrinsicOrSpecial(uint32_t method_index) LOCKS_EXCLUDED(lock_);
+
+ /**
* Check whether a particular method index corresponds to an intrinsic function.
*/
bool IsIntrinsic(uint32_t method_index, InlineMethod* intrinsic) LOCKS_EXCLUDED(lock_);
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 2bcaaca822..b132c4cc54 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -24,12 +24,14 @@
#include "dex/mir_graph.h"
#include "dex/quick/arm/arm_lir.h"
#include "driver/compiler_driver.h"
+#include "driver/compiler_options.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "mirror/array.h"
#include "mirror/object_array-inl.h"
#include "mirror/object-inl.h"
#include "mirror/object_reference.h"
#include "utils.h"
+#include "utils/dex_cache_arrays_layout-inl.h"
#include "verifier/method_verifier.h"
namespace art {
@@ -56,6 +58,133 @@ ALWAYS_INLINE static inline bool ForceSlowTypePath(CompilationUnit* cu) {
return (cu->enable_debug & (1 << kDebugSlowTypePath)) != 0;
}
+void Mir2Lir::GenIfNullUseHelperImmMethod(
+ RegStorage r_result, QuickEntrypointEnum trampoline, int imm, RegStorage r_method) {
+ class CallHelperImmMethodSlowPath : public LIRSlowPath {
+ public:
+ CallHelperImmMethodSlowPath(Mir2Lir* m2l, LIR* fromfast, LIR* cont,
+ QuickEntrypointEnum trampoline_in, int imm_in,
+ RegStorage r_method_in, RegStorage r_result_in)
+ : LIRSlowPath(m2l, fromfast, cont), trampoline_(trampoline_in),
+ imm_(imm_in), r_method_(r_method_in), r_result_(r_result_in) {
+ }
+
+ void Compile() {
+ GenerateTargetLabel();
+ if (r_method_.Valid()) {
+ m2l_->CallRuntimeHelperImmReg(trampoline_, imm_, r_method_, true);
+ } else {
+ m2l_->CallRuntimeHelperImmMethod(trampoline_, imm_, true);
+ }
+ m2l_->OpRegCopy(r_result_, m2l_->TargetReg(kRet0, kRef));
+ m2l_->OpUnconditionalBranch(cont_);
+ }
+
+ private:
+ QuickEntrypointEnum trampoline_;
+ const int imm_;
+ const RegStorage r_method_;
+ const RegStorage r_result_;
+ };
+
+ LIR* branch = OpCmpImmBranch(kCondEq, r_result, 0, NULL);
+ LIR* cont = NewLIR0(kPseudoTargetLabel);
+
+ AddSlowPath(new (arena_) CallHelperImmMethodSlowPath(this, branch, cont, trampoline, imm,
+ r_method, r_result));
+}
+
+RegStorage Mir2Lir::GenGetOtherTypeForSgetSput(const MirSFieldLoweringInfo& field_info,
+ int opt_flags) {
+ DCHECK_NE(field_info.StorageIndex(), DexFile::kDexNoIndex);
+ // May do runtime call so everything to home locations.
+ FlushAllRegs();
+ RegStorage r_base = TargetReg(kArg0, kRef);
+ LockTemp(r_base);
+ RegStorage r_method = RegStorage::InvalidReg(); // Loaded lazily, maybe in the slow-path.
+ if (CanUseOpPcRelDexCacheArrayLoad()) {
+ uint32_t offset = dex_cache_arrays_layout_.TypeOffset(field_info.StorageIndex());
+ OpPcRelDexCacheArrayLoad(cu_->dex_file, offset, r_base);
+ } else {
+ // Using fixed register to sync with possible call to runtime support.
+ r_method = LoadCurrMethodWithHint(TargetReg(kArg1, kRef));
+ LoadRefDisp(r_method, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), r_base,
+ kNotVolatile);
+ int32_t offset_of_field = ObjArray::OffsetOfElement(field_info.StorageIndex()).Int32Value();
+ LoadRefDisp(r_base, offset_of_field, r_base, kNotVolatile);
+ }
+ // r_base now points at static storage (Class*) or nullptr if the type is not yet resolved.
+ LIR* unresolved_branch = nullptr;
+ if (!field_info.IsClassInDexCache() && (opt_flags & MIR_CLASS_IS_IN_DEX_CACHE) == 0) {
+ // Check if r_base is nullptr.
+ unresolved_branch = OpCmpImmBranch(kCondEq, r_base, 0, nullptr);
+ }
+ LIR* uninit_branch = nullptr;
+ if (!field_info.IsClassInitialized() && (opt_flags & MIR_CLASS_IS_INITIALIZED) == 0) {
+ // Check if r_base is not yet initialized class.
+ RegStorage r_tmp = TargetReg(kArg2, kNotWide);
+ LockTemp(r_tmp);
+ uninit_branch = OpCmpMemImmBranch(kCondLt, r_tmp, r_base,
+ mirror::Class::StatusOffset().Int32Value(),
+ mirror::Class::kStatusInitialized, nullptr, nullptr);
+ FreeTemp(r_tmp);
+ }
+ if (unresolved_branch != nullptr || uninit_branch != nullptr) {
+ //
+ // Slow path to ensure a class is initialized for sget/sput.
+ //
+ class StaticFieldSlowPath : public Mir2Lir::LIRSlowPath {
+ public:
+ // There are up to two branches to the static field slow path, the "unresolved" when the type
+ // entry in the dex cache is nullptr, and the "uninit" when the class is not yet initialized.
+ // At least one will be non-nullptr here, otherwise we wouldn't generate the slow path.
+ StaticFieldSlowPath(Mir2Lir* m2l, LIR* unresolved, LIR* uninit, LIR* cont, int storage_index,
+ RegStorage r_base_in, RegStorage r_method_in)
+ : LIRSlowPath(m2l, unresolved != nullptr ? unresolved : uninit, cont),
+ second_branch_(unresolved != nullptr ? uninit : nullptr),
+ storage_index_(storage_index), r_base_(r_base_in), r_method_(r_method_in) {
+ }
+
+ void Compile() {
+ LIR* target = GenerateTargetLabel();
+ if (second_branch_ != nullptr) {
+ second_branch_->target = target;
+ }
+ if (r_method_.Valid()) {
+ // ArtMethod* was loaded in normal path - use it.
+ m2l_->CallRuntimeHelperImmReg(kQuickInitializeStaticStorage, storage_index_, r_method_,
+ true);
+ } else {
+ // ArtMethod* wasn't loaded in normal path - use a helper that loads it.
+ m2l_->CallRuntimeHelperImmMethod(kQuickInitializeStaticStorage, storage_index_, true);
+ }
+ // Copy helper's result into r_base, a no-op on all but MIPS.
+ m2l_->OpRegCopy(r_base_, m2l_->TargetReg(kRet0, kRef));
+
+ m2l_->OpUnconditionalBranch(cont_);
+ }
+
+ private:
+ // Second branch to the slow path, or nullptr if there's only one branch.
+ LIR* const second_branch_;
+
+ const int storage_index_;
+ const RegStorage r_base_;
+ RegStorage r_method_;
+ };
+
+ // The slow path is invoked if the r_base is nullptr or the class pointed
+ // to by it is not initialized.
+ LIR* cont = NewLIR0(kPseudoTargetLabel);
+ AddSlowPath(new (arena_) StaticFieldSlowPath(this, unresolved_branch, uninit_branch, cont,
+ field_info.StorageIndex(), r_base, r_method));
+ }
+ if (IsTemp(r_method)) {
+ FreeTemp(r_method);
+ }
+ return r_base;
+}
+
/*
* Generate a kPseudoBarrier marker to indicate the boundary of special
* blocks.
@@ -571,41 +700,6 @@ void Mir2Lir::GenFillArrayData(MIR* mir, DexOffset table_offset, RegLocation rl_
CallRuntimeHelperImmRegLocation(kQuickHandleFillArrayData, table_offset_from_start, rl_src, true);
}
-//
-// Slow path to ensure a class is initialized for sget/sput.
-//
-class StaticFieldSlowPath : public Mir2Lir::LIRSlowPath {
- public:
- // There are up to two branches to the static field slow path, the "unresolved" when the type
- // entry in the dex cache is null, and the "uninit" when the class is not yet initialized.
- // At least one will be non-null here, otherwise we wouldn't generate the slow path.
- StaticFieldSlowPath(Mir2Lir* m2l, LIR* unresolved, LIR* uninit, LIR* cont, int storage_index,
- RegStorage r_base)
- : LIRSlowPath(m2l, unresolved != nullptr ? unresolved : uninit, cont),
- second_branch_(unresolved != nullptr ? uninit : nullptr),
- storage_index_(storage_index), r_base_(r_base) {
- }
-
- void Compile() {
- LIR* target = GenerateTargetLabel();
- if (second_branch_ != nullptr) {
- second_branch_->target = target;
- }
- m2l_->CallRuntimeHelperImm(kQuickInitializeStaticStorage, storage_index_, true);
- // Copy helper's result into r_base, a no-op on all but MIPS.
- m2l_->OpRegCopy(r_base_, m2l_->TargetReg(kRet0, kRef));
-
- m2l_->OpUnconditionalBranch(cont_);
- }
-
- private:
- // Second branch to the slow path, or null if there's only one branch.
- LIR* const second_branch_;
-
- const int storage_index_;
- const RegStorage r_base_;
-};
-
void Mir2Lir::GenSput(MIR* mir, RegLocation rl_src, OpSize size) {
const MirSFieldLoweringInfo& field_info = mir_graph_->GetSFieldLoweringInfo(mir);
DCHECK_EQ(SPutMemAccessType(mir->dalvikInsn.opcode), field_info.MemAccessType());
@@ -615,65 +709,23 @@ void Mir2Lir::GenSput(MIR* mir, RegLocation rl_src, OpSize size) {
RegStorage r_base;
if (field_info.IsReferrersClass()) {
// Fast path, static storage base is this method's class
- RegLocation rl_method = LoadCurrMethod();
r_base = AllocTempRef();
- LoadRefDisp(rl_method.reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), r_base,
+ RegStorage r_method = LoadCurrMethodWithHint(r_base);
+ LoadRefDisp(r_method, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), r_base,
kNotVolatile);
- if (IsTemp(rl_method.reg)) {
- FreeTemp(rl_method.reg);
- }
} else {
// Medium path, static storage base in a different class which requires checks that the other
// class is initialized.
- // TODO: remove initialized check now that we are initializing classes in the compiler driver.
- DCHECK_NE(field_info.StorageIndex(), DexFile::kDexNoIndex);
- // May do runtime call so everything to home locations.
- FlushAllRegs();
- // Using fixed register to sync with possible call to runtime support.
- RegStorage r_method = TargetReg(kArg1, kRef);
- LockTemp(r_method);
- LoadCurrMethodDirect(r_method);
- r_base = TargetReg(kArg0, kRef);
- LockTemp(r_base);
- LoadRefDisp(r_method, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), r_base,
- kNotVolatile);
- int32_t offset_of_field = ObjArray::OffsetOfElement(field_info.StorageIndex()).Int32Value();
- LoadRefDisp(r_base, offset_of_field, r_base, kNotVolatile);
- // r_base now points at static storage (Class*) or NULL if the type is not yet resolved.
- LIR* unresolved_branch = nullptr;
- if (!field_info.IsClassInDexCache() &&
- (mir->optimization_flags & MIR_CLASS_IS_IN_DEX_CACHE) == 0) {
- // Check if r_base is NULL.
- unresolved_branch = OpCmpImmBranch(kCondEq, r_base, 0, NULL);
- }
- LIR* uninit_branch = nullptr;
+ r_base = GenGetOtherTypeForSgetSput(field_info, mir->optimization_flags);
if (!field_info.IsClassInitialized() &&
(mir->optimization_flags & MIR_CLASS_IS_INITIALIZED) == 0) {
- // Check if r_base is not yet initialized class.
- RegStorage r_tmp = TargetReg(kArg2, kNotWide);
- LockTemp(r_tmp);
- uninit_branch = OpCmpMemImmBranch(kCondLt, r_tmp, r_base,
- mirror::Class::StatusOffset().Int32Value(),
- mirror::Class::kStatusInitialized, nullptr, nullptr);
- FreeTemp(r_tmp);
- }
- if (unresolved_branch != nullptr || uninit_branch != nullptr) {
- // The slow path is invoked if the r_base is NULL or the class pointed
- // to by it is not initialized.
- LIR* cont = NewLIR0(kPseudoTargetLabel);
- AddSlowPath(new (arena_) StaticFieldSlowPath(this, unresolved_branch, uninit_branch, cont,
- field_info.StorageIndex(), r_base));
-
- if (uninit_branch != nullptr) {
- // Ensure load of status and store of value don't re-order.
- // TODO: Presumably the actual value store is control-dependent on the status load,
- // and will thus not be reordered in any case, since stores are never speculated.
- // Does later code "know" that the class is now initialized? If so, we still
- // need the barrier to guard later static loads.
- GenMemBarrier(kLoadAny);
- }
+ // Ensure load of status and store of value don't re-order.
+ // TODO: Presumably the actual value store is control-dependent on the status load,
+ // and will thus not be reordered in any case, since stores are never speculated.
+ // Does later code "know" that the class is now initialized? If so, we still
+ // need the barrier to guard later static loads.
+ GenMemBarrier(kLoadAny);
}
- FreeTemp(r_method);
}
// rBase now holds static storage base
RegisterClass reg_class = RegClassForFieldLoadStore(size, field_info.IsVolatile());
@@ -735,57 +787,19 @@ void Mir2Lir::GenSget(MIR* mir, RegLocation rl_dest, OpSize size, Primitive::Typ
RegStorage r_base;
if (field_info.IsReferrersClass()) {
// Fast path, static storage base is this method's class
- RegLocation rl_method = LoadCurrMethod();
r_base = AllocTempRef();
- LoadRefDisp(rl_method.reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), r_base,
+ RegStorage r_method = LoadCurrMethodWithHint(r_base);
+ LoadRefDisp(r_method, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), r_base,
kNotVolatile);
} else {
// Medium path, static storage base in a different class which requires checks that the other
// class is initialized
- DCHECK_NE(field_info.StorageIndex(), DexFile::kDexNoIndex);
- // May do runtime call so everything to home locations.
- FlushAllRegs();
- // Using fixed register to sync with possible call to runtime support.
- RegStorage r_method = TargetReg(kArg1, kRef);
- LockTemp(r_method);
- LoadCurrMethodDirect(r_method);
- r_base = TargetReg(kArg0, kRef);
- LockTemp(r_base);
- LoadRefDisp(r_method, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), r_base,
- kNotVolatile);
- int32_t offset_of_field = ObjArray::OffsetOfElement(field_info.StorageIndex()).Int32Value();
- LoadRefDisp(r_base, offset_of_field, r_base, kNotVolatile);
- // r_base now points at static storage (Class*) or NULL if the type is not yet resolved.
- LIR* unresolved_branch = nullptr;
- if (!field_info.IsClassInDexCache() &&
- (mir->optimization_flags & MIR_CLASS_IS_IN_DEX_CACHE) == 0) {
- // Check if r_base is NULL.
- unresolved_branch = OpCmpImmBranch(kCondEq, r_base, 0, NULL);
- }
- LIR* uninit_branch = nullptr;
+ r_base = GenGetOtherTypeForSgetSput(field_info, mir->optimization_flags);
if (!field_info.IsClassInitialized() &&
(mir->optimization_flags & MIR_CLASS_IS_INITIALIZED) == 0) {
- // Check if r_base is not yet initialized class.
- RegStorage r_tmp = TargetReg(kArg2, kNotWide);
- LockTemp(r_tmp);
- uninit_branch = OpCmpMemImmBranch(kCondLt, r_tmp, r_base,
- mirror::Class::StatusOffset().Int32Value(),
- mirror::Class::kStatusInitialized, nullptr, nullptr);
- FreeTemp(r_tmp);
+ // Ensure load of status and load of value don't re-order.
+ GenMemBarrier(kLoadAny);
}
- if (unresolved_branch != nullptr || uninit_branch != nullptr) {
- // The slow path is invoked if the r_base is NULL or the class pointed
- // to by it is not initialized.
- LIR* cont = NewLIR0(kPseudoTargetLabel);
- AddSlowPath(new (arena_) StaticFieldSlowPath(this, unresolved_branch, uninit_branch, cont,
- field_info.StorageIndex(), r_base));
-
- if (uninit_branch != nullptr) {
- // Ensure load of status and load of value don't re-order.
- GenMemBarrier(kLoadAny);
- }
- }
- FreeTemp(r_method);
}
// r_base now holds static storage base
RegisterClass reg_class = RegClassForFieldLoadStore(size, field_info.IsVolatile());
@@ -1022,64 +1036,41 @@ void Mir2Lir::GenArrayObjPut(int opt_flags, RegLocation rl_array, RegLocation rl
}
void Mir2Lir::GenConstClass(uint32_t type_idx, RegLocation rl_dest) {
- RegLocation rl_method = LoadCurrMethod();
- CheckRegLocation(rl_method);
- RegStorage res_reg = AllocTempRef();
+ RegLocation rl_result;
if (!cu_->compiler_driver->CanAccessTypeWithoutChecks(cu_->method_idx,
*cu_->dex_file,
type_idx)) {
// Call out to helper which resolves type and verifies access.
// Resolved type returned in kRet0.
- CallRuntimeHelperImmReg(kQuickInitializeTypeAndVerifyAccess, type_idx, rl_method.reg, true);
- RegLocation rl_result = GetReturn(kRefReg);
- StoreValue(rl_dest, rl_result);
+ CallRuntimeHelperImmMethod(kQuickInitializeTypeAndVerifyAccess, type_idx, true);
+ rl_result = GetReturn(kRefReg);
} else {
- RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
- // We're don't need access checks, load type from dex cache
- int32_t dex_cache_offset =
- mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value();
- LoadRefDisp(rl_method.reg, dex_cache_offset, res_reg, kNotVolatile);
- int32_t offset_of_type = ClassArray::OffsetOfElement(type_idx).Int32Value();
- LoadRefDisp(res_reg, offset_of_type, rl_result.reg, kNotVolatile);
+ rl_result = EvalLoc(rl_dest, kRefReg, true);
+ // We don't need access checks, load type from dex cache
+ RegStorage r_method = RegStorage::InvalidReg();
+ if (CanUseOpPcRelDexCacheArrayLoad()) {
+ size_t offset = dex_cache_arrays_layout_.TypeOffset(type_idx);
+ OpPcRelDexCacheArrayLoad(cu_->dex_file, offset, rl_result.reg);
+ } else {
+ RegLocation rl_method = LoadCurrMethod();
+ CheckRegLocation(rl_method);
+ r_method = rl_method.reg;
+ int32_t dex_cache_offset =
+ mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value();
+ RegStorage res_reg = AllocTempRef();
+ LoadRefDisp(r_method, dex_cache_offset, res_reg, kNotVolatile);
+ int32_t offset_of_type = ClassArray::OffsetOfElement(type_idx).Int32Value();
+ LoadRefDisp(res_reg, offset_of_type, rl_result.reg, kNotVolatile);
+ FreeTemp(res_reg);
+ }
if (!cu_->compiler_driver->CanAssumeTypeIsPresentInDexCache(*cu_->dex_file,
type_idx) || ForceSlowTypePath(cu_)) {
// Slow path, at runtime test if type is null and if so initialize
FlushAllRegs();
- LIR* branch = OpCmpImmBranch(kCondEq, rl_result.reg, 0, NULL);
- LIR* cont = NewLIR0(kPseudoTargetLabel);
-
- // Object to generate the slow path for class resolution.
- class SlowPath : public LIRSlowPath {
- public:
- SlowPath(Mir2Lir* m2l, LIR* fromfast, LIR* cont_in, const int type_idx_in,
- const RegLocation& rl_method_in, const RegLocation& rl_result_in)
- : LIRSlowPath(m2l, fromfast, cont_in),
- type_idx_(type_idx_in), rl_method_(rl_method_in), rl_result_(rl_result_in) {
- }
-
- void Compile() {
- GenerateTargetLabel();
-
- m2l_->CallRuntimeHelperImmReg(kQuickInitializeType, type_idx_, rl_method_.reg, true);
- m2l_->OpRegCopy(rl_result_.reg, m2l_->TargetReg(kRet0, kRef));
- m2l_->OpUnconditionalBranch(cont_);
- }
-
- private:
- const int type_idx_;
- const RegLocation rl_method_;
- const RegLocation rl_result_;
- };
-
- // Add to list for future.
- AddSlowPath(new (arena_) SlowPath(this, branch, cont, type_idx, rl_method, rl_result));
-
- StoreValue(rl_dest, rl_result);
- } else {
- // Fast path, we're done - just store result
- StoreValue(rl_dest, rl_result);
+ GenIfNullUseHelperImmMethod(rl_result.reg, kQuickInitializeType, type_idx, r_method);
}
}
+ StoreValue(rl_dest, rl_result);
}
void Mir2Lir::GenConstString(uint32_t string_idx, RegLocation rl_dest) {
@@ -1092,64 +1083,42 @@ void Mir2Lir::GenConstString(uint32_t string_idx, RegLocation rl_dest) {
FlushAllRegs();
LockCallTemps(); // Using explicit registers
- // If the Method* is already in a register, we can save a copy.
- RegLocation rl_method = mir_graph_->GetMethodLoc();
- RegStorage r_method;
- if (rl_method.location == kLocPhysReg) {
- // A temp would conflict with register use below.
- DCHECK(!IsTemp(rl_method.reg));
- r_method = rl_method.reg;
- } else {
- r_method = TargetReg(kArg2, kRef);
- LoadCurrMethodDirect(r_method);
- }
- // Method to declaring class.
- LoadRefDisp(r_method, mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
- TargetReg(kArg0, kRef), kNotVolatile);
- // Declaring class to dex cache strings.
- LoadRefDisp(TargetReg(kArg0, kRef), mirror::Class::DexCacheStringsOffset().Int32Value(),
- TargetReg(kArg0, kRef), kNotVolatile);
-
// Might call out to helper, which will return resolved string in kRet0
- LoadRefDisp(TargetReg(kArg0, kRef), offset_of_string, TargetReg(kRet0, kRef), kNotVolatile);
- LIR* fromfast = OpCmpImmBranch(kCondEq, TargetReg(kRet0, kRef), 0, NULL);
- LIR* cont = NewLIR0(kPseudoTargetLabel);
-
- {
- // Object to generate the slow path for string resolution.
- class SlowPath : public LIRSlowPath {
- public:
- SlowPath(Mir2Lir* m2l, LIR* fromfast_in, LIR* cont_in, RegStorage r_method_in,
- int32_t string_idx_in)
- : LIRSlowPath(m2l, fromfast_in, cont_in),
- r_method_(r_method_in), string_idx_(string_idx_in) {
- }
-
- void Compile() {
- GenerateTargetLabel();
- m2l_->CallRuntimeHelperImmReg(kQuickResolveString, string_idx_, r_method_, true);
- m2l_->OpUnconditionalBranch(cont_);
- }
-
- private:
- const RegStorage r_method_;
- const int32_t string_idx_;
- };
-
- AddSlowPath(new (arena_) SlowPath(this, fromfast, cont, r_method, string_idx));
+ RegStorage ret0 = TargetReg(kRet0, kRef);
+ RegStorage r_method = RegStorage::InvalidReg();
+ if (CanUseOpPcRelDexCacheArrayLoad()) {
+ size_t offset = dex_cache_arrays_layout_.StringOffset(string_idx);
+ OpPcRelDexCacheArrayLoad(cu_->dex_file, offset, ret0);
+ } else {
+ r_method = LoadCurrMethodWithHint(TargetReg(kArg1, kRef));
+ // Method to declaring class.
+ RegStorage arg0 = TargetReg(kArg0, kRef);
+ LoadRefDisp(r_method, mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
+ arg0, kNotVolatile);
+ // Declaring class to dex cache strings.
+ LoadRefDisp(arg0, mirror::Class::DexCacheStringsOffset().Int32Value(), arg0, kNotVolatile);
+
+ LoadRefDisp(arg0, offset_of_string, ret0, kNotVolatile);
}
+ GenIfNullUseHelperImmMethod(ret0, kQuickResolveString, string_idx, r_method);
GenBarrier();
StoreValue(rl_dest, GetReturn(kRefReg));
} else {
- RegLocation rl_method = LoadCurrMethod();
- RegStorage res_reg = AllocTempRef();
RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
- LoadRefDisp(rl_method.reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), res_reg,
- kNotVolatile);
- LoadRefDisp(res_reg, mirror::Class::DexCacheStringsOffset().Int32Value(), res_reg,
- kNotVolatile);
- LoadRefDisp(res_reg, offset_of_string, rl_result.reg, kNotVolatile);
+ if (CanUseOpPcRelDexCacheArrayLoad()) {
+ size_t offset = dex_cache_arrays_layout_.StringOffset(string_idx);
+ OpPcRelDexCacheArrayLoad(cu_->dex_file, offset, rl_result.reg);
+ } else {
+ RegLocation rl_method = LoadCurrMethod();
+ RegStorage res_reg = AllocTempRef();
+ LoadRefDisp(rl_method.reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), res_reg,
+ kNotVolatile);
+ LoadRefDisp(res_reg, mirror::Class::DexCacheStringsOffset().Int32Value(), res_reg,
+ kNotVolatile);
+ LoadRefDisp(res_reg, offset_of_string, rl_result.reg, kNotVolatile);
+ FreeTemp(res_reg);
+ }
StoreValue(rl_dest, rl_result);
}
}
@@ -1224,14 +1193,20 @@ void Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx, Re
RegStorage check_class = AllocTypedTemp(false, kRefReg);
RegStorage object_class = AllocTypedTemp(false, kRefReg);
- LoadCurrMethodDirect(check_class);
if (use_declaring_class) {
- LoadRefDisp(check_class, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), check_class,
+ RegStorage r_method = LoadCurrMethodWithHint(check_class);
+ LoadRefDisp(r_method, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), check_class,
+ kNotVolatile);
+ LoadRefDisp(object.reg, mirror::Object::ClassOffset().Int32Value(), object_class,
kNotVolatile);
+ } else if (CanUseOpPcRelDexCacheArrayLoad()) {
+ size_t offset = dex_cache_arrays_layout_.TypeOffset(type_idx);
+ OpPcRelDexCacheArrayLoad(cu_->dex_file, offset, check_class);
LoadRefDisp(object.reg, mirror::Object::ClassOffset().Int32Value(), object_class,
kNotVolatile);
} else {
- LoadRefDisp(check_class, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
+ RegStorage r_method = LoadCurrMethodWithHint(check_class);
+ LoadRefDisp(r_method, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
check_class, kNotVolatile);
LoadRefDisp(object.reg, mirror::Object::ClassOffset().Int32Value(), object_class,
kNotVolatile);
@@ -1267,20 +1242,19 @@ void Mir2Lir::GenInstanceofCallingHelper(bool needs_access_check, bool type_know
FlushAllRegs();
// May generate a call - use explicit registers
LockCallTemps();
- RegStorage method_reg = TargetReg(kArg1, kRef);
- LoadCurrMethodDirect(method_reg); // kArg1 <= current Method*
RegStorage class_reg = TargetReg(kArg2, kRef); // kArg2 will hold the Class*
RegStorage ref_reg = TargetReg(kArg0, kRef); // kArg0 will hold the ref.
RegStorage ret_reg = GetReturn(kRefReg).reg;
if (needs_access_check) {
// Check we have access to type_idx and if not throw IllegalAccessError,
// returns Class* in kArg0
- CallRuntimeHelperImm(kQuickInitializeTypeAndVerifyAccess, type_idx, true);
+ CallRuntimeHelperImmMethod(kQuickInitializeTypeAndVerifyAccess, type_idx, true);
OpRegCopy(class_reg, ret_reg); // Align usage with fast path
LoadValueDirectFixed(rl_src, ref_reg); // kArg0 <= ref
} else if (use_declaring_class) {
+ RegStorage r_method = LoadCurrMethodWithHint(TargetReg(kArg1, kRef));
LoadValueDirectFixed(rl_src, ref_reg); // kArg0 <= ref
- LoadRefDisp(method_reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
+ LoadRefDisp(r_method, mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
class_reg, kNotVolatile);
} else {
if (can_assume_type_is_in_dex_cache) {
@@ -1288,42 +1262,23 @@ void Mir2Lir::GenInstanceofCallingHelper(bool needs_access_check, bool type_know
LoadValueDirectFixed(rl_src, ref_reg); // kArg0 <= ref
}
- // Load dex cache entry into class_reg (kArg2)
- LoadRefDisp(method_reg, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
- class_reg, kNotVolatile);
- int32_t offset_of_type = ClassArray::OffsetOfElement(type_idx).Int32Value();
- LoadRefDisp(class_reg, offset_of_type, class_reg, kNotVolatile);
+ RegStorage r_method = RegStorage::InvalidReg();
+ if (CanUseOpPcRelDexCacheArrayLoad()) {
+ size_t offset = dex_cache_arrays_layout_.TypeOffset(type_idx);
+ OpPcRelDexCacheArrayLoad(cu_->dex_file, offset, class_reg);
+ } else {
+ r_method = LoadCurrMethodWithHint(TargetReg(kArg1, kRef));
+ // Load dex cache entry into class_reg (kArg2)
+ LoadRefDisp(r_method, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
+ class_reg, kNotVolatile);
+ int32_t offset_of_type = ClassArray::OffsetOfElement(type_idx).Int32Value();
+ LoadRefDisp(class_reg, offset_of_type, class_reg, kNotVolatile);
+ }
if (!can_assume_type_is_in_dex_cache) {
- LIR* slow_path_branch = OpCmpImmBranch(kCondEq, class_reg, 0, NULL);
- LIR* slow_path_target = NewLIR0(kPseudoTargetLabel);
+ GenIfNullUseHelperImmMethod(class_reg, kQuickInitializeType, type_idx, r_method);
// Should load value here.
LoadValueDirectFixed(rl_src, ref_reg); // kArg0 <= ref
-
- class InitTypeSlowPath : public Mir2Lir::LIRSlowPath {
- public:
- InitTypeSlowPath(Mir2Lir* m2l, LIR* branch, LIR* cont, uint32_t type_idx_in,
- RegLocation rl_src_in)
- : LIRSlowPath(m2l, branch, cont), type_idx_(type_idx_in),
- rl_src_(rl_src_in) {
- }
-
- void Compile() OVERRIDE {
- GenerateTargetLabel();
-
- m2l_->CallRuntimeHelperImm(kQuickInitializeType, type_idx_, true);
- m2l_->OpRegCopy(m2l_->TargetReg(kArg2, kRef),
- m2l_->TargetReg(kRet0, kRef)); // Align usage with fast path
- m2l_->OpUnconditionalBranch(cont_);
- }
-
- private:
- uint32_t type_idx_;
- RegLocation rl_src_;
- };
-
- AddSlowPath(new (arena_) InitTypeSlowPath(this, slow_path_branch, slow_path_target,
- type_idx, rl_src));
}
}
/* kArg0 is ref, kArg2 is class. If ref==null, use directly as bool result */
@@ -1426,55 +1381,34 @@ void Mir2Lir::GenCheckCast(int opt_flags, uint32_t insn_idx, uint32_t type_idx,
FlushAllRegs();
// May generate a call - use explicit registers
LockCallTemps();
- RegStorage method_reg = TargetReg(kArg1, kRef);
- LoadCurrMethodDirect(method_reg); // kArg1 <= current Method*
RegStorage class_reg = TargetReg(kArg2, kRef); // kArg2 will hold the Class*
if (needs_access_check) {
// Check we have access to type_idx and if not throw IllegalAccessError,
// returns Class* in kRet0
// InitializeTypeAndVerifyAccess(idx, method)
- CallRuntimeHelperImm(kQuickInitializeTypeAndVerifyAccess, type_idx, true);
+ CallRuntimeHelperImmMethod(kQuickInitializeTypeAndVerifyAccess, type_idx, true);
OpRegCopy(class_reg, TargetReg(kRet0, kRef)); // Align usage with fast path
} else if (use_declaring_class) {
+ RegStorage method_reg = LoadCurrMethodWithHint(TargetReg(kArg1, kRef));
LoadRefDisp(method_reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
class_reg, kNotVolatile);
} else {
// Load dex cache entry into class_reg (kArg2)
- LoadRefDisp(method_reg, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
- class_reg, kNotVolatile);
- int32_t offset_of_type = ClassArray::OffsetOfElement(type_idx).Int32Value();
- LoadRefDisp(class_reg, offset_of_type, class_reg, kNotVolatile);
+ RegStorage r_method = RegStorage::InvalidReg();
+ if (CanUseOpPcRelDexCacheArrayLoad()) {
+ size_t offset = dex_cache_arrays_layout_.TypeOffset(type_idx);
+ OpPcRelDexCacheArrayLoad(cu_->dex_file, offset, class_reg);
+ } else {
+ r_method = LoadCurrMethodWithHint(TargetReg(kArg1, kRef));
+
+ LoadRefDisp(r_method, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
+ class_reg, kNotVolatile);
+ int32_t offset_of_type = ClassArray::OffsetOfElement(type_idx).Int32Value();
+ LoadRefDisp(class_reg, offset_of_type, class_reg, kNotVolatile);
+ }
if (!cu_->compiler_driver->CanAssumeTypeIsPresentInDexCache(*cu_->dex_file, type_idx)) {
// Need to test presence of type in dex cache at runtime
- LIR* hop_branch = OpCmpImmBranch(kCondEq, class_reg, 0, NULL);
- LIR* cont = NewLIR0(kPseudoTargetLabel);
-
- // Slow path to initialize the type. Executed if the type is NULL.
- class SlowPath : public LIRSlowPath {
- public:
- SlowPath(Mir2Lir* m2l, LIR* fromfast, LIR* cont_in, const int type_idx_in,
- const RegStorage class_reg_in)
- : LIRSlowPath(m2l, fromfast, cont_in),
- type_idx_(type_idx_in), class_reg_(class_reg_in) {
- }
-
- void Compile() {
- GenerateTargetLabel();
-
- // Call out to helper, which will return resolved type in kArg0
- // InitializeTypeFromCode(idx, method)
- m2l_->CallRuntimeHelperImmReg(kQuickInitializeType, type_idx_,
- m2l_->TargetReg(kArg1, kRef), true);
- m2l_->OpRegCopy(class_reg_, m2l_->TargetReg(kRet0, kRef)); // Align usage with fast path
- m2l_->OpUnconditionalBranch(cont_);
- }
-
- public:
- const int type_idx_;
- const RegStorage class_reg_;
- };
-
- AddSlowPath(new (arena_) SlowPath(this, hop_branch, cont, type_idx, class_reg));
+ GenIfNullUseHelperImmMethod(class_reg, kQuickInitializeType, type_idx, r_method);
}
}
// At this point, class_reg (kArg2) has class
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 2d41ba1795..db7095dafb 100755
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -24,6 +24,7 @@
#include "dex/quick/dex_file_to_method_inliner_map.h"
#include "dex_file-inl.h"
#include "driver/compiler_driver.h"
+#include "driver/compiler_options.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "invoke_type.h"
#include "mirror/array.h"
@@ -1434,10 +1435,12 @@ bool Mir2Lir::GenInlinedUnsafePut(CallInfo* info, bool is_long,
void Mir2Lir::GenInvoke(CallInfo* info) {
DCHECK(cu_->compiler_driver->GetMethodInlinerMap() != nullptr);
- const DexFile* dex_file = info->method_ref.dex_file;
- if (cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(dex_file)
- ->GenIntrinsic(this, info)) {
- return;
+ if (mir_graph_->GetMethodLoweringInfo(info->mir).IsIntrinsic()) {
+ const DexFile* dex_file = info->method_ref.dex_file;
+ auto* inliner = cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(dex_file);
+ if (inliner->GenIntrinsic(this, info)) {
+ return;
+ }
}
GenInvokeNoInline(info);
}
diff --git a/compiler/dex/quick/gen_loadstore.cc b/compiler/dex/quick/gen_loadstore.cc
index b71691f20a..54e5742837 100644
--- a/compiler/dex/quick/gen_loadstore.cc
+++ b/compiler/dex/quick/gen_loadstore.cc
@@ -340,6 +340,20 @@ void Mir2Lir::LoadCurrMethodDirect(RegStorage r_tgt) {
LoadValueDirectFixed(mir_graph_->GetMethodLoc(), r_tgt);
}
+RegStorage Mir2Lir::LoadCurrMethodWithHint(RegStorage r_hint) {
+ // If the method is promoted to a register, return that register, otherwise load it to r_hint.
+ // (Replacement for LoadCurrMethod() usually used when LockCallTemps() is in effect.)
+ DCHECK(r_hint.Valid());
+ RegLocation rl_method = mir_graph_->GetMethodLoc();
+ if (rl_method.location == kLocPhysReg) {
+ DCHECK(!IsTemp(rl_method.reg));
+ return rl_method.reg;
+ } else {
+ LoadCurrMethodDirect(r_hint);
+ return r_hint;
+ }
+}
+
RegLocation Mir2Lir::LoadCurrMethod() {
return LoadValue(mir_graph_->GetMethodLoc(), kRefReg);
}
diff --git a/compiler/dex/quick/lazy_debug_frame_opcode_writer.cc b/compiler/dex/quick/lazy_debug_frame_opcode_writer.cc
new file mode 100644
index 0000000000..5cfb0ff557
--- /dev/null
+++ b/compiler/dex/quick/lazy_debug_frame_opcode_writer.cc
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "lazy_debug_frame_opcode_writer.h"
+#include "mir_to_lir.h"
+
+namespace art {
+namespace dwarf {
+
+const ArenaVector<uint8_t>* LazyDebugFrameOpCodeWriter::Patch(size_t code_size) {
+ if (!this->enabled_) {
+ DCHECK(this->data()->empty());
+ return this->data();
+ }
+ if (!patched_) {
+ patched_ = true;
+ // Move our data buffer to temporary variable.
+ ArenaVector<uint8_t> old_opcodes(this->opcodes_.get_allocator());
+ old_opcodes.swap(this->opcodes_);
+ // Refill our data buffer with patched opcodes.
+ this->opcodes_.reserve(old_opcodes.size() + advances_.size() + 4);
+ size_t pos = 0;
+ for (auto advance : advances_) {
+ DCHECK_GE(advance.pos, pos);
+ // Copy old data up to the point when advance was issued.
+ this->opcodes_.insert(this->opcodes_.end(),
+ old_opcodes.begin() + pos,
+ old_opcodes.begin() + advance.pos);
+ pos = advance.pos;
+ // This may be null if there is no slow-path code after return.
+ LIR* next_lir = NEXT_LIR(advance.last_lir_insn);
+ // Insert the advance command with its final offset.
+ Base::AdvancePC(next_lir != nullptr ? next_lir->offset : code_size);
+ }
+ // Copy the final segment.
+ this->opcodes_.insert(this->opcodes_.end(),
+ old_opcodes.begin() + pos,
+ old_opcodes.end());
+ Base::AdvancePC(code_size);
+ }
+ return this->data();
+}
+
+} // namespace dwarf
+} // namespace art
diff --git a/compiler/dex/quick/lazy_debug_frame_opcode_writer.h b/compiler/dex/quick/lazy_debug_frame_opcode_writer.h
new file mode 100644
index 0000000000..94ffd7f957
--- /dev/null
+++ b/compiler/dex/quick/lazy_debug_frame_opcode_writer.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEX_QUICK_LAZY_DEBUG_FRAME_OPCODE_WRITER_H_
+#define ART_COMPILER_DEX_QUICK_LAZY_DEBUG_FRAME_OPCODE_WRITER_H_
+
+#include "base/arena_allocator.h"
+#include "base/arena_containers.h"
+#include "dwarf/debug_frame_opcode_writer.h"
+
+namespace art {
+struct LIR;
+namespace dwarf {
+
+// When we are generating the CFI code, we do not know the instuction offsets,
+// this class stores the LIR references and patches the instruction stream later.
+class LazyDebugFrameOpCodeWriter FINAL
+ : public DebugFrameOpCodeWriter<ArenaAllocatorAdapter<uint8_t>> {
+ typedef DebugFrameOpCodeWriter<ArenaAllocatorAdapter<uint8_t>> Base;
+ public:
+ // This method is implicitely called the by opcode writers.
+ virtual void ImplicitlyAdvancePC() OVERRIDE {
+ DCHECK_EQ(patched_, false);
+ DCHECK_EQ(this->current_pc_, 0);
+ advances_.push_back({this->data()->size(), *last_lir_insn_});
+ }
+
+ const ArenaVector<uint8_t>* Patch(size_t code_size);
+
+ explicit LazyDebugFrameOpCodeWriter(LIR** last_lir_insn, bool enable_writes,
+ ArenaAllocator* allocator)
+ : Base(enable_writes, allocator->Adapter()),
+ last_lir_insn_(last_lir_insn),
+ advances_(allocator->Adapter()),
+ patched_(false) {
+ }
+
+ private:
+ typedef struct {
+ size_t pos;
+ LIR* last_lir_insn;
+ } Advance;
+
+ using Base::data; // Hidden. Use Patch method instead.
+
+ LIR** last_lir_insn_;
+ ArenaVector<Advance> advances_;
+ bool patched_;
+
+ DISALLOW_COPY_AND_ASSIGN(LazyDebugFrameOpCodeWriter);
+};
+
+} // namespace dwarf
+} // namespace art
+
+#endif // ART_COMPILER_DEX_QUICK_LAZY_DEBUG_FRAME_OPCODE_WRITER_H_
diff --git a/compiler/dex/quick/local_optimizations.cc b/compiler/dex/quick/local_optimizations.cc
index e5738998a0..6cdf56773e 100644
--- a/compiler/dex/quick/local_optimizations.cc
+++ b/compiler/dex/quick/local_optimizations.cc
@@ -493,15 +493,14 @@ void Mir2Lir::ApplyLoadHoisting(LIR* head_lir, LIR* tail_lir) {
/* Found a slot to hoist to */
if (slot >= 0) {
LIR* cur_lir = prev_inst_list[slot];
- LIR* new_load_lir =
- static_cast<LIR*>(arena_->Alloc(sizeof(LIR), kArenaAllocLIR));
- *new_load_lir = *this_lir;
+ LIR* prev_lir = PREV_LIR(this_lir);
+ UnlinkLIR(this_lir);
/*
* Insertion is guaranteed to succeed since check_lir
* is never the first LIR on the list
*/
- InsertLIRBefore(cur_lir, new_load_lir);
- NopLIR(this_lir);
+ InsertLIRBefore(cur_lir, this_lir);
+ this_lir = prev_lir; // Continue the loop with the next LIR.
}
}
}
diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc
index de66b35418..05570e4bde 100644
--- a/compiler/dex/quick/mips/call_mips.cc
+++ b/compiler/dex/quick/mips/call_mips.cc
@@ -238,7 +238,12 @@ void MipsMir2Lir::UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) {
FreeTemp(reg_card_no);
}
+static dwarf::Reg DwarfCoreReg(int num) {
+ return dwarf::Reg::MipsCore(num);
+}
+
void MipsMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
+ DCHECK_EQ(cfi_.GetCurrentCFAOffset(), 0);
int spill_count = num_core_spills_ + num_fp_spills_;
/*
* On entry, A0, A1, A2 & A3 are live. On Mips64, A4, A5, A6 & A7 are also live.
@@ -275,7 +280,6 @@ void MipsMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method)
*/
skip_overflow_check = mir_graph_->MethodIsLeaf() && !FrameNeedsStackCheck(frame_size_, target);
- NewLIR0(kPseudoMethodEntry);
RegStorage check_reg = AllocPtrSizeTemp();
RegStorage new_sp = AllocPtrSizeTemp();
const RegStorage rs_sp = TargetPtrReg(kSp);
@@ -305,10 +309,12 @@ void MipsMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method)
// RA is offset 0 since we push in reverse order.
m2l_->LoadWordDisp(m2l_->TargetPtrReg(kSp), 0, m2l_->TargetPtrReg(kLr));
m2l_->OpRegImm(kOpAdd, m2l_->TargetPtrReg(kSp), sp_displace_);
+ m2l_->cfi().AdjustCFAOffset(-sp_displace_);
m2l_->ClobberCallerSave();
RegStorage r_tgt = m2l_->CallHelperSetup(kQuickThrowStackOverflow); // Doesn't clobber LR.
m2l_->CallHelper(r_tgt, kQuickThrowStackOverflow, false /* MarkSafepointPC */,
false /* UseLink */);
+ m2l_->cfi().AdjustCFAOffset(sp_displace_);
}
private:
@@ -319,8 +325,10 @@ void MipsMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method)
AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, spill_count * ptr_size));
// TODO: avoid copy for small frame sizes.
OpRegCopy(rs_sp, new_sp); // Establish stack.
+ cfi_.AdjustCFAOffset(frame_sub);
} else {
OpRegImm(kOpSub, rs_sp, frame_sub);
+ cfi_.AdjustCFAOffset(frame_sub);
}
FlushIns(ArgLocs, rl_method);
@@ -338,6 +346,7 @@ void MipsMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method)
}
void MipsMir2Lir::GenExitSequence() {
+ cfi_.RememberState();
/*
* In the exit path, rMIPS_RET0/rMIPS_RET1 are live - make sure they aren't
* allocated by the register utilities as temps.
@@ -345,9 +354,11 @@ void MipsMir2Lir::GenExitSequence() {
LockTemp(TargetPtrReg(kRet0));
LockTemp(TargetPtrReg(kRet1));
- NewLIR0(kPseudoMethodExit);
UnSpillCoreRegs();
OpReg(kOpBx, TargetPtrReg(kLr));
+ // The CFI should be restored for any code that follows the exit block.
+ cfi_.RestoreState();
+ cfi_.DefCFAOffset(frame_size_);
}
void MipsMir2Lir::GenSpecialExitSequence() {
@@ -366,15 +377,20 @@ void MipsMir2Lir::GenSpecialEntryForSuspend() {
fp_vmap_table_.clear();
const RegStorage rs_sp = TargetPtrReg(kSp);
OpRegImm(kOpSub, rs_sp, frame_size_);
+ cfi_.AdjustCFAOffset(frame_size_);
StoreWordDisp(rs_sp, frame_size_ - (cu_->target64 ? 8 : 4), TargetPtrReg(kLr));
+ cfi_.RelOffset(DwarfCoreReg(rRA), frame_size_ - (cu_->target64 ? 8 : 4));
StoreWordDisp(rs_sp, 0, TargetPtrReg(kArg0));
+ // Do not generate CFI for scratch register A0.
}
void MipsMir2Lir::GenSpecialExitForSuspend() {
// Pop the frame. Don't pop ArtMethod*, it's no longer needed.
const RegStorage rs_sp = TargetPtrReg(kSp);
LoadWordDisp(rs_sp, frame_size_ - (cu_->target64 ? 8 : 4), TargetPtrReg(kLr));
+ cfi_.Restore(DwarfCoreReg(rRA));
OpRegImm(kOpAdd, rs_sp, frame_size_);
+ cfi_.AdjustCFAOffset(-frame_size_);
}
/*
@@ -387,73 +403,73 @@ static int NextSDCallInsn(CompilationUnit* cu, CallInfo* info ATTRIBUTE_UNUSED,
Mir2Lir* cg = static_cast<Mir2Lir*>(cu->cg.get());
if (direct_code != 0 && direct_method != 0) {
switch (state) {
- case 0: // Get the current Method* [sets kArg0]
- if (direct_code != static_cast<uintptr_t>(-1)) {
- if (cu->target64) {
- cg->LoadConstantWide(cg->TargetPtrReg(kInvokeTgt), direct_code);
+ case 0: // Get the current Method* [sets kArg0]
+ if (direct_code != static_cast<uintptr_t>(-1)) {
+ if (cu->target64) {
+ cg->LoadConstantWide(cg->TargetPtrReg(kInvokeTgt), direct_code);
+ } else {
+ cg->LoadConstant(cg->TargetPtrReg(kInvokeTgt), direct_code);
+ }
} else {
- cg->LoadConstant(cg->TargetPtrReg(kInvokeTgt), direct_code);
+ cg->LoadCodeAddress(target_method, type, kInvokeTgt);
}
- } else {
- cg->LoadCodeAddress(target_method, type, kInvokeTgt);
- }
- if (direct_method != static_cast<uintptr_t>(-1)) {
- if (cu->target64) {
- cg->LoadConstantWide(cg->TargetReg(kArg0, kRef), direct_method);
+ if (direct_method != static_cast<uintptr_t>(-1)) {
+ if (cu->target64) {
+ cg->LoadConstantWide(cg->TargetReg(kArg0, kRef), direct_method);
+ } else {
+ cg->LoadConstant(cg->TargetReg(kArg0, kRef), direct_method);
+ }
} else {
- cg->LoadConstant(cg->TargetReg(kArg0, kRef), direct_method);
+ cg->LoadMethodAddress(target_method, type, kArg0);
}
- } else {
- cg->LoadMethodAddress(target_method, type, kArg0);
- }
- break;
- default:
- return -1;
+ break;
+ default:
+ return -1;
}
} else {
RegStorage arg0_ref = cg->TargetReg(kArg0, kRef);
switch (state) {
- case 0: // Get the current Method* [sets kArg0]
- // TUNING: we can save a reg copy if Method* has been promoted.
- cg->LoadCurrMethodDirect(arg0_ref);
- break;
- case 1: // Get method->dex_cache_resolved_methods_
- cg->LoadRefDisp(arg0_ref,
- mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value(),
- arg0_ref,
- kNotVolatile);
- // Set up direct code if known.
- if (direct_code != 0) {
- if (direct_code != static_cast<uintptr_t>(-1)) {
- if (cu->target64) {
- cg->LoadConstantWide(cg->TargetPtrReg(kInvokeTgt), direct_code);
+ case 0: // Get the current Method* [sets kArg0]
+ // TUNING: we can save a reg copy if Method* has been promoted.
+ cg->LoadCurrMethodDirect(arg0_ref);
+ break;
+ case 1: // Get method->dex_cache_resolved_methods_
+ cg->LoadRefDisp(arg0_ref,
+ mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value(),
+ arg0_ref,
+ kNotVolatile);
+ // Set up direct code if known.
+ if (direct_code != 0) {
+ if (direct_code != static_cast<uintptr_t>(-1)) {
+ if (cu->target64) {
+ cg->LoadConstantWide(cg->TargetPtrReg(kInvokeTgt), direct_code);
+ } else {
+ cg->LoadConstant(cg->TargetPtrReg(kInvokeTgt), direct_code);
+ }
} else {
- cg->LoadConstant(cg->TargetPtrReg(kInvokeTgt), direct_code);
+ CHECK_LT(target_method.dex_method_index, target_method.dex_file->NumMethodIds());
+ cg->LoadCodeAddress(target_method, type, kInvokeTgt);
}
- } else {
- CHECK_LT(target_method.dex_method_index, target_method.dex_file->NumMethodIds());
- cg->LoadCodeAddress(target_method, type, kInvokeTgt);
}
- }
- break;
- case 2: // Grab target method*
- CHECK_EQ(cu->dex_file, target_method.dex_file);
- cg->LoadRefDisp(arg0_ref,
- mirror::ObjectArray<mirror::Object>::
- OffsetOfElement(target_method.dex_method_index).Int32Value(),
- arg0_ref,
- kNotVolatile);
- break;
- case 3: // Grab the code from the method*
- if (direct_code == 0) {
- int32_t offset = mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
- InstructionSetPointerSize(cu->instruction_set)).Int32Value();
- // Get the compiled code address [use *alt_from or kArg0, set kInvokeTgt]
- cg->LoadWordDisp(arg0_ref, offset, cg->TargetPtrReg(kInvokeTgt));
- }
- break;
- default:
- return -1;
+ break;
+ case 2: // Grab target method*
+ CHECK_EQ(cu->dex_file, target_method.dex_file);
+ cg->LoadRefDisp(arg0_ref,
+ mirror::ObjectArray<mirror::Object>::
+ OffsetOfElement(target_method.dex_method_index).Int32Value(),
+ arg0_ref,
+ kNotVolatile);
+ break;
+ case 3: // Grab the code from the method*
+ if (direct_code == 0) {
+ int32_t offset = mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+ InstructionSetPointerSize(cu->instruction_set)).Int32Value();
+ // Get the compiled code address [use *alt_from or kArg0, set kInvokeTgt]
+ cg->LoadWordDisp(arg0_ref, offset, cg->TargetPtrReg(kInvokeTgt));
+ }
+ break;
+ default:
+ return -1;
}
}
return state + 1;
diff --git a/compiler/dex/quick/mips/int_mips.cc b/compiler/dex/quick/mips/int_mips.cc
index 626b36ea28..1ca8bb618b 100644
--- a/compiler/dex/quick/mips/int_mips.cc
+++ b/compiler/dex/quick/mips/int_mips.cc
@@ -237,12 +237,12 @@ void MipsMir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) {
// note the operands are swapped for the mtc1 and mthc1 instr.
// Here if dest is fp reg and src is core reg.
if (fpuIs32Bit_) {
- NewLIR2(kMipsMtc1, r_src.GetLowReg(), r_dest.GetLowReg());
- NewLIR2(kMipsMtc1, r_src.GetHighReg(), r_dest.GetHighReg());
+ NewLIR2(kMipsMtc1, r_src.GetLowReg(), r_dest.GetLowReg());
+ NewLIR2(kMipsMtc1, r_src.GetHighReg(), r_dest.GetHighReg());
} else {
- r_dest = Fp64ToSolo32(r_dest);
- NewLIR2(kMipsMtc1, r_src.GetLowReg(), r_dest.GetReg());
- NewLIR2(kMipsMthc1, r_src.GetHighReg(), r_dest.GetReg());
+ r_dest = Fp64ToSolo32(r_dest);
+ NewLIR2(kMipsMtc1, r_src.GetLowReg(), r_dest.GetReg());
+ NewLIR2(kMipsMthc1, r_src.GetHighReg(), r_dest.GetReg());
}
}
} else {
@@ -309,7 +309,13 @@ RegLocation MipsMir2Lir::GenDivRem(RegLocation rl_dest, RegStorage reg1, RegStor
RegLocation MipsMir2Lir::GenDivRemLit(RegLocation rl_dest, RegStorage reg1, int lit, bool is_div) {
RegStorage t_reg = AllocTemp();
- NewLIR3(kMipsAddiu, t_reg.GetReg(), rZERO, lit);
+ // lit is guarantee to be a 16-bit constant
+ if (IsUint<16>(lit)) {
+ NewLIR3(kMipsOri, t_reg.GetReg(), rZERO, lit);
+ } else {
+ // Addiu will sign extend the entire width (32 or 64) of the register.
+ NewLIR3(kMipsAddiu, t_reg.GetReg(), rZERO, lit);
+ }
RegLocation rl_result = GenDivRem(rl_dest, reg1, t_reg, is_div);
FreeTemp(t_reg);
return rl_result;
@@ -815,20 +821,20 @@ void MipsMir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
}
OpKind op = kOpBkpt;
switch (opcode) {
- case Instruction::SHL_LONG:
- case Instruction::SHL_LONG_2ADDR:
- op = kOpLsl;
- break;
- case Instruction::SHR_LONG:
- case Instruction::SHR_LONG_2ADDR:
- op = kOpAsr;
- break;
- case Instruction::USHR_LONG:
- case Instruction::USHR_LONG_2ADDR:
- op = kOpLsr;
- break;
- default:
- LOG(FATAL) << "Unexpected case: " << opcode;
+ case Instruction::SHL_LONG:
+ case Instruction::SHL_LONG_2ADDR:
+ op = kOpLsl;
+ break;
+ case Instruction::SHR_LONG:
+ case Instruction::SHR_LONG_2ADDR:
+ op = kOpAsr;
+ break;
+ case Instruction::USHR_LONG:
+ case Instruction::USHR_LONG_2ADDR:
+ op = kOpLsr;
+ break;
+ default:
+ LOG(FATAL) << "Unexpected case: " << opcode;
}
rl_shift = LoadValue(rl_shift, kCoreReg);
rl_src1 = LoadValueWide(rl_src1, kCoreReg);
diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc
index a94fad7534..4c0bd8378b 100644
--- a/compiler/dex/quick/mips/target_mips.cc
+++ b/compiler/dex/quick/mips/target_mips.cc
@@ -830,6 +830,10 @@ LIR* MipsMir2Lir::GenAtomic64Store(RegStorage r_base, int displacement, RegStora
return OpReg(kOpBlx, r_tgt);
}
+static dwarf::Reg DwarfCoreReg(int num) {
+ return dwarf::Reg::MipsCore(num);
+}
+
void MipsMir2Lir::SpillCoreRegs() {
if (num_core_spills_ == 0) {
return;
@@ -839,11 +843,13 @@ void MipsMir2Lir::SpillCoreRegs() {
int offset = num_core_spills_ * ptr_size;
const RegStorage rs_sp = TargetPtrReg(kSp);
OpRegImm(kOpSub, rs_sp, offset);
+ cfi_.AdjustCFAOffset(offset);
for (int reg = 0; mask; mask >>= 1, reg++) {
if (mask & 0x1) {
offset -= ptr_size;
StoreWordDisp(rs_sp, offset,
cu_->target64 ? RegStorage::Solo64(reg) : RegStorage::Solo32(reg));
+ cfi_.RelOffset(DwarfCoreReg(reg), offset);
}
}
}
@@ -861,9 +867,11 @@ void MipsMir2Lir::UnSpillCoreRegs() {
offset -= ptr_size;
LoadWordDisp(rs_sp, offset,
cu_->target64 ? RegStorage::Solo64(reg) : RegStorage::Solo32(reg));
+ cfi_.Restore(DwarfCoreReg(reg));
}
}
OpRegImm(kOpAdd, rs_sp, frame_size_);
+ cfi_.AdjustCFAOffset(-frame_size_);
}
bool MipsMir2Lir::IsUnconditionalBranch(LIR* lir) {
diff --git a/compiler/dex/quick/mips/utility_mips.cc b/compiler/dex/quick/mips/utility_mips.cc
index bf0e0fc78b..8ab542270d 100644
--- a/compiler/dex/quick/mips/utility_mips.cc
+++ b/compiler/dex/quick/mips/utility_mips.cc
@@ -283,9 +283,9 @@ LIR* MipsMir2Lir::OpReg(OpKind op, RegStorage r_dest_src) {
break;
case kOpBx:
return NewLIR2(kMipsJalr, rZERO, r_dest_src.GetReg());
- break;
default:
LOG(FATAL) << "Bad case in OpReg";
+ UNREACHABLE();
}
return NewLIR2(opcode, cu_->target64 ? rRAd : rRA, r_dest_src.GetReg());
}
@@ -295,8 +295,8 @@ LIR* MipsMir2Lir::OpRegImm(OpKind op, RegStorage r_dest_src1, int value) {
return OpRegRegImm(op, r_dest_src1, r_dest_src1, value);
} else {
LOG(FATAL) << "Bad case in OpRegImm";
+ UNREACHABLE();
}
- UNREACHABLE();
}
LIR* MipsMir2Lir::OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2) {
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index 0b480a09c6..961cd4f06b 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -1250,10 +1250,17 @@ bool Mir2Lir::MethodBlockCodeGen(BasicBlock* bb) {
if (bb->block_type == kEntryBlock) {
ResetRegPool();
int start_vreg = mir_graph_->GetFirstInVR();
+ AppendLIR(NewLIR0(kPseudoPrologueBegin));
GenEntrySequence(&mir_graph_->reg_location_[start_vreg], mir_graph_->GetMethodLoc());
+ AppendLIR(NewLIR0(kPseudoPrologueEnd));
+ DCHECK_EQ(cfi_.GetCurrentCFAOffset(), frame_size_);
} else if (bb->block_type == kExitBlock) {
ResetRegPool();
+ DCHECK_EQ(cfi_.GetCurrentCFAOffset(), frame_size_);
+ AppendLIR(NewLIR0(kPseudoEpilogueBegin));
GenExitSequence();
+ AppendLIR(NewLIR0(kPseudoEpilogueEnd));
+ DCHECK_EQ(cfi_.GetCurrentCFAOffset(), frame_size_);
}
for (mir = bb->first_mir_insn; mir != NULL; mir = mir->next) {
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index cca4e5a30a..db59714742 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -29,9 +29,11 @@
#include "dex/quick/resource_mask.h"
#include "entrypoints/quick/quick_entrypoints_enum.h"
#include "invoke_type.h"
+#include "lazy_debug_frame_opcode_writer.h"
#include "leb128.h"
#include "safe_map.h"
#include "utils/array_ref.h"
+#include "utils/dex_cache_arrays_layout.h"
#include "utils/stack_checks.h"
namespace art {
@@ -134,6 +136,7 @@ class BasicBlock;
class BitVector;
struct CallInfo;
struct CompilationUnit;
+struct CompilerTemp;
struct InlineMethod;
class MIR;
struct LIR;
@@ -141,6 +144,7 @@ struct RegisterInfo;
class DexFileMethodInliner;
class MIRGraph;
class MirMethodLoweringInfo;
+class MirSFieldLoweringInfo;
typedef int (*NextCallInsn)(CompilationUnit*, CallInfo*, int,
const MethodReference& target_method,
@@ -632,7 +636,7 @@ class Mir2Lir {
RegisterClass ShortyToRegClass(char shorty_type);
RegisterClass LocToRegClass(RegLocation loc);
int ComputeFrameSize();
- virtual void Materialize();
+ void Materialize();
virtual CompiledMethod* GetCompiledMethod();
void MarkSafepointPC(LIR* inst);
void MarkSafepointPCAfter(LIR* after);
@@ -773,9 +777,10 @@ class Mir2Lir {
*/
virtual RegLocation EvalLoc(RegLocation loc, int reg_class, bool update);
- void CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs);
+ virtual void AnalyzeMIR(RefCounts* core_counts, MIR* mir, uint32_t weight);
+ virtual void CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs);
void DumpCounts(const RefCounts* arr, int size, const char* msg);
- void DoPromotion();
+ virtual void DoPromotion();
int VRegOffset(int v_reg);
int SRegOffset(int s_reg);
RegLocation GetReturnWide(RegisterClass reg_class);
@@ -956,6 +961,7 @@ class Mir2Lir {
// Shared by all targets - implemented in gen_loadstore.cc.
RegLocation LoadCurrMethod();
void LoadCurrMethodDirect(RegStorage r_tgt);
+ RegStorage LoadCurrMethodWithHint(RegStorage r_hint);
virtual LIR* LoadConstant(RegStorage r_dest, int value);
// Natural word size.
LIR* LoadWordDisp(RegStorage r_base, int displacement, RegStorage r_dest) {
@@ -1093,6 +1099,18 @@ class Mir2Lir {
virtual void LoadClassType(const DexFile& dex_file, uint32_t type_idx,
SpecialTargetRegister symbolic_reg);
+ // TODO: Support PC-relative dex cache array loads on all platforms and
+ // replace CanUseOpPcRelDexCacheArrayLoad() with dex_cache_arrays_layout_.Valid().
+ virtual bool CanUseOpPcRelDexCacheArrayLoad() const;
+
+ /*
+ * @brief Load an element of one of the dex cache arrays.
+ * @param dex_file the dex file associated with the target dex cache.
+ * @param offset the offset of the element in the fixed dex cache arrays' layout.
+ * @param r_dest the register where to load the element.
+ */
+ virtual void OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset, RegStorage r_dest);
+
// Routines that work for the generic case, but may be overriden by target.
/*
* @brief Compare memory to immediate, and branch if condition true.
@@ -1491,6 +1509,12 @@ class Mir2Lir {
return 0;
}
+ /**
+ * @brief Buffer of DWARF's Call Frame Information opcodes.
+ * @details It is used by debuggers and other tools to unwind the call stack.
+ */
+ dwarf::LazyDebugFrameOpCodeWriter& cfi() { return cfi_; }
+
protected:
Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena);
@@ -1556,11 +1580,6 @@ class Mir2Lir {
bool can_assume_type_is_in_dex_cache,
uint32_t type_idx, RegLocation rl_dest,
RegLocation rl_src);
- /*
- * @brief Generate the eh_frame FDE information if possible.
- * @returns pointer to vector containg FDE information, or NULL.
- */
- virtual std::vector<uint8_t>* ReturnFrameDescriptionEntry();
/**
* @brief Used to insert marker that can be used to associate MIR with LIR.
@@ -1596,7 +1615,6 @@ class Mir2Lir {
*/
virtual bool GenSpecialCase(BasicBlock* bb, MIR* mir, const InlineMethod& special);
- protected:
void ClobberBody(RegisterInfo* p);
void SetCurrentDexPc(DexOffset dexpc) {
current_dalvik_offset_ = dexpc;
@@ -1669,6 +1687,23 @@ class Mir2Lir {
*/
bool GenSpecialIdentity(MIR* mir, const InlineMethod& special);
+ /**
+ * @brief Generate code to check if result is null and, if it is, call helper to load it.
+ * @param r_result the result register.
+ * @param trampoline the helper to call in slow path.
+ * @param imm the immediate passed to the helper.
+ * @param r_method the register with ArtMethod* if available, otherwise RegStorage::Invalid().
+ */
+ void GenIfNullUseHelperImmMethod(
+ RegStorage r_result, QuickEntrypointEnum trampoline, int imm, RegStorage r_method);
+
+ /**
+ * @brief Generate code to retrieve Class* for another type to be used by SGET/SPUT.
+ * @param field_info information about the field to be accessed.
+ * @param opt_flags the optimization flags of the MIR.
+ */
+ RegStorage GenGetOtherTypeForSgetSput(const MirSFieldLoweringInfo& field_info, int opt_flags);
+
void AddDivZeroCheckSlowPath(LIR* branch);
// Copy arg0 and arg1 to kArg0 and kArg1 safely, possibly using
@@ -1742,6 +1777,13 @@ class Mir2Lir {
// Update references from prev_mir to mir.
void UpdateReferenceVRegs(MIR* mir, MIR* prev_mir, BitVector* references);
+ /**
+ * Returns true if the frame spills the given core register.
+ */
+ bool CoreSpillMaskContains(int reg) {
+ return (core_spill_mask_ & (1u << reg)) != 0;
+ }
+
public:
// TODO: add accessors for these.
LIR* literal_list_; // Constants.
@@ -1815,7 +1857,23 @@ class Mir2Lir {
// Record the MIR that generated a given safepoint (nullptr for prologue safepoints).
ArenaVector<std::pair<LIR*, MIR*>> safepoints_;
- protected:
+ // The layout of the cu_->dex_file's dex cache arrays for PC-relative addressing.
+ const DexCacheArraysLayout dex_cache_arrays_layout_;
+
+ // For architectures that don't have true PC-relative addressing, we can promote
+ // a PC of an instruction (or another PC-relative address such as a pointer to
+ // the dex cache arrays if supported) to a register. This is indicated to the
+ // register promotion by allocating a backend temp.
+ CompilerTemp* pc_rel_temp_;
+
+ // For architectures that don't have true PC-relative addressing (see pc_rel_temp_
+ // above) and also have a limited range of offsets for loads, it's be useful to
+ // know the minimum offset into the dex cache arrays, so we calculate that as well
+ // if pc_rel_temp_ isn't nullptr.
+ uint32_t dex_cache_arrays_min_offset_;
+
+ dwarf::LazyDebugFrameOpCodeWriter cfi_;
+
// ABI support
class ShortyArg {
public:
@@ -1875,6 +1933,8 @@ class Mir2Lir {
private:
static bool SizeMatchesTypeForEntrypoint(OpSize size, Primitive::Type type);
+
+ friend class QuickCFITest;
}; // Class Mir2Lir
} // namespace art
diff --git a/compiler/dex/quick/quick_cfi_test.cc b/compiler/dex/quick/quick_cfi_test.cc
new file mode 100644
index 0000000000..2e62166b7b
--- /dev/null
+++ b/compiler/dex/quick/quick_cfi_test.cc
@@ -0,0 +1,139 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vector>
+#include <memory>
+
+#include "arch/instruction_set.h"
+#include "arch/instruction_set_features.h"
+#include "cfi_test.h"
+#include "dex/compiler_ir.h"
+#include "dex/mir_graph.h"
+#include "dex/pass_manager.h"
+#include "dex/quick/dex_file_to_method_inliner_map.h"
+#include "dex/quick/quick_compiler.h"
+#include "dex/quick/mir_to_lir.h"
+#include "dex/verification_results.h"
+#include "driver/compiler_driver.h"
+#include "driver/compiler_options.h"
+#include "gtest/gtest.h"
+
+#include "dex/quick/quick_cfi_test_expected.inc"
+
+namespace art {
+
+// Run the tests only on host.
+#ifndef HAVE_ANDROID_OS
+
+class QuickCFITest : public CFITest {
+ public:
+ // Enable this flag to generate the expected outputs.
+ static constexpr bool kGenerateExpected = false;
+
+ void TestImpl(InstructionSet isa, const char* isa_str,
+ const std::vector<uint8_t>& expected_asm,
+ const std::vector<uint8_t>& expected_cfi) {
+ // Setup simple compiler context.
+ ArenaPool pool;
+ ArenaAllocator arena(&pool);
+ CompilerOptions compiler_options(
+ CompilerOptions::kDefaultCompilerFilter,
+ CompilerOptions::kDefaultHugeMethodThreshold,
+ CompilerOptions::kDefaultLargeMethodThreshold,
+ CompilerOptions::kDefaultSmallMethodThreshold,
+ CompilerOptions::kDefaultTinyMethodThreshold,
+ CompilerOptions::kDefaultNumDexMethodsThreshold,
+ true, // generate_gdb_information.
+ false,
+ CompilerOptions::kDefaultTopKProfileThreshold,
+ false,
+ true, // include_debug_symbols.
+ false,
+ false,
+ false,
+ false,
+ nullptr,
+ new PassManagerOptions(),
+ nullptr,
+ false);
+ VerificationResults verification_results(&compiler_options);
+ DexFileToMethodInlinerMap method_inliner_map;
+ std::unique_ptr<const InstructionSetFeatures> isa_features;
+ std::string error;
+ isa_features.reset(InstructionSetFeatures::FromVariant(isa, "default", &error));
+ CompilerDriver driver(&compiler_options, &verification_results, &method_inliner_map,
+ Compiler::kQuick, isa, isa_features.get(),
+ false, 0, 0, 0, false, false, "", 0, -1, "");
+ ClassLinker* linker = nullptr;
+ CompilationUnit cu(&pool, isa, &driver, linker);
+ DexFile::CodeItem code_item { 0, 0, 0, 0, 0, 0, { 0 } }; // NOLINT
+ cu.mir_graph.reset(new MIRGraph(&cu, &arena));
+ cu.mir_graph->current_code_item_ = &code_item;
+
+ // Generate empty method with some spills.
+ std::unique_ptr<Mir2Lir> m2l(QuickCompiler::GetCodeGenerator(&cu, nullptr));
+ m2l->frame_size_ = 64u;
+ m2l->CompilerInitializeRegAlloc();
+ for (const auto& info : m2l->reg_pool_->core_regs_) {
+ if (m2l->num_core_spills_ < 2 && !info->IsTemp() && !info->InUse()) {
+ m2l->core_spill_mask_ |= 1 << info->GetReg().GetReg();
+ m2l->num_core_spills_++;
+ }
+ }
+ for (const auto& info : m2l->reg_pool_->sp_regs_) {
+ if (m2l->num_fp_spills_ < 2 && !info->IsTemp() && !info->InUse()) {
+ m2l->fp_spill_mask_ |= 1 << info->GetReg().GetReg();
+ m2l->num_fp_spills_++;
+ }
+ }
+ m2l->AdjustSpillMask();
+ m2l->GenEntrySequence(NULL, m2l->LocCReturnRef());
+ m2l->GenExitSequence();
+ m2l->HandleSlowPaths();
+ m2l->AssembleLIR();
+ std::vector<uint8_t> actual_asm(m2l->code_buffer_.begin(), m2l->code_buffer_.end());
+ auto const& cfi_data = m2l->cfi().Patch(actual_asm.size());
+ std::vector<uint8_t> actual_cfi(cfi_data->begin(), cfi_data->end());
+ EXPECT_EQ(m2l->cfi().GetCurrentPC(), static_cast<int>(actual_asm.size()));
+
+ if (kGenerateExpected) {
+ GenerateExpected(stdout, isa, isa_str, actual_asm, actual_cfi);
+ } else {
+ EXPECT_EQ(expected_asm, actual_asm);
+ EXPECT_EQ(expected_cfi, actual_cfi);
+ }
+ }
+};
+
+#define TEST_ISA(isa) \
+ TEST_F(QuickCFITest, isa) { \
+ std::vector<uint8_t> expected_asm(expected_asm_##isa, \
+ expected_asm_##isa + arraysize(expected_asm_##isa)); \
+ std::vector<uint8_t> expected_cfi(expected_cfi_##isa, \
+ expected_cfi_##isa + arraysize(expected_cfi_##isa)); \
+ TestImpl(isa, #isa, expected_asm, expected_cfi); \
+ }
+
+TEST_ISA(kThumb2)
+TEST_ISA(kArm64)
+TEST_ISA(kX86)
+TEST_ISA(kX86_64)
+TEST_ISA(kMips)
+TEST_ISA(kMips64)
+
+#endif // HAVE_ANDROID_OS
+
+} // namespace art
diff --git a/compiler/dex/quick/quick_cfi_test_expected.inc b/compiler/dex/quick/quick_cfi_test_expected.inc
new file mode 100644
index 0000000000..634fdeead0
--- /dev/null
+++ b/compiler/dex/quick/quick_cfi_test_expected.inc
@@ -0,0 +1,217 @@
+static constexpr uint8_t expected_asm_kThumb2[] = {
+ 0x60, 0xB5, 0x2D, 0xED, 0x02, 0x8A, 0x8B, 0xB0, 0x00, 0x90, 0x0B, 0xB0,
+ 0xBD, 0xEC, 0x02, 0x8A, 0x60, 0xBD, 0x00, 0x00,
+};
+static constexpr uint8_t expected_cfi_kThumb2[] = {
+ 0x42, 0x0E, 0x0C, 0x85, 0x03, 0x86, 0x02, 0x8E, 0x01, 0x44, 0x0E, 0x14,
+ 0x05, 0x50, 0x05, 0x05, 0x51, 0x04, 0x42, 0x0E, 0x40, 0x42, 0x0A, 0x42,
+ 0x0E, 0x14, 0x44, 0x0E, 0x0C, 0x06, 0x50, 0x06, 0x51, 0x44, 0x0B, 0x0E,
+ 0x40,
+};
+// 0x00000000: push {r5, r6, lr}
+// 0x00000002: .cfi_def_cfa_offset: 12
+// 0x00000002: .cfi_offset: r5 at cfa-12
+// 0x00000002: .cfi_offset: r6 at cfa-8
+// 0x00000002: .cfi_offset: r14 at cfa-4
+// 0x00000002: vpush.f32 {s16-s17}
+// 0x00000006: .cfi_def_cfa_offset: 20
+// 0x00000006: .cfi_offset_extended: r80 at cfa-20
+// 0x00000006: .cfi_offset_extended: r81 at cfa-16
+// 0x00000006: sub sp, sp, #44
+// 0x00000008: .cfi_def_cfa_offset: 64
+// 0x00000008: str r0, [sp, #0]
+// 0x0000000a: .cfi_remember_state
+// 0x0000000a: add sp, sp, #44
+// 0x0000000c: .cfi_def_cfa_offset: 20
+// 0x0000000c: vpop.f32 {s16-s17}
+// 0x00000010: .cfi_def_cfa_offset: 12
+// 0x00000010: .cfi_restore_extended: r80
+// 0x00000010: .cfi_restore_extended: r81
+// 0x00000010: pop {r5, r6, pc}
+// 0x00000012: lsls r0, r0, #0
+// 0x00000014: .cfi_restore_state
+// 0x00000014: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kArm64[] = {
+ 0xFF, 0x03, 0x01, 0xD1, 0xE8, 0xA7, 0x01, 0x6D, 0xF4, 0xD7, 0x02, 0xA9,
+ 0xFE, 0x1F, 0x00, 0xF9, 0xE0, 0x03, 0x00, 0xB9, 0xE8, 0xA7, 0x41, 0x6D,
+ 0xF4, 0xD7, 0x42, 0xA9, 0xFE, 0x1F, 0x40, 0xF9, 0xFF, 0x03, 0x01, 0x91,
+ 0xC0, 0x03, 0x5F, 0xD6,
+};
+static constexpr uint8_t expected_cfi_kArm64[] = {
+ 0x44, 0x0E, 0x40, 0x44, 0x05, 0x48, 0x0A, 0x05, 0x49, 0x08, 0x44, 0x94,
+ 0x06, 0x95, 0x04, 0x44, 0x9E, 0x02, 0x44, 0x0A, 0x44, 0x06, 0x48, 0x06,
+ 0x49, 0x44, 0xD4, 0xD5, 0x44, 0xDE, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E,
+ 0x40,
+};
+// 0x00000000: sub sp, sp, #0x40 (64)
+// 0x00000004: .cfi_def_cfa_offset: 64
+// 0x00000004: stp d8, d9, [sp, #24]
+// 0x00000008: .cfi_offset_extended: r72 at cfa-40
+// 0x00000008: .cfi_offset_extended: r73 at cfa-32
+// 0x00000008: stp x20, x21, [sp, #40]
+// 0x0000000c: .cfi_offset: r20 at cfa-24
+// 0x0000000c: .cfi_offset: r21 at cfa-16
+// 0x0000000c: str lr, [sp, #56]
+// 0x00000010: .cfi_offset: r30 at cfa-8
+// 0x00000010: str w0, [sp]
+// 0x00000014: .cfi_remember_state
+// 0x00000014: ldp d8, d9, [sp, #24]
+// 0x00000018: .cfi_restore_extended: r72
+// 0x00000018: .cfi_restore_extended: r73
+// 0x00000018: ldp x20, x21, [sp, #40]
+// 0x0000001c: .cfi_restore: r20
+// 0x0000001c: .cfi_restore: r21
+// 0x0000001c: ldr lr, [sp, #56]
+// 0x00000020: .cfi_restore: r30
+// 0x00000020: add sp, sp, #0x40 (64)
+// 0x00000024: .cfi_def_cfa_offset: 0
+// 0x00000024: ret
+// 0x00000028: .cfi_restore_state
+// 0x00000028: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kX86[] = {
+ 0x83, 0xEC, 0x3C, 0x89, 0x6C, 0x24, 0x34, 0x89, 0x74, 0x24, 0x38, 0x89,
+ 0x04, 0x24, 0x8B, 0x6C, 0x24, 0x34, 0x8B, 0x74, 0x24, 0x38, 0x83, 0xC4,
+ 0x3C, 0xC3, 0x00, 0x00,
+};
+static constexpr uint8_t expected_cfi_kX86[] = {
+ 0x43, 0x0E, 0x40, 0x44, 0x85, 0x03, 0x44, 0x86, 0x02, 0x43, 0x0A, 0x44,
+ 0xC5, 0x44, 0xC6, 0x43, 0x0E, 0x04, 0x43, 0x0B, 0x0E, 0x40,
+};
+// 0x00000000: sub esp, 60
+// 0x00000003: .cfi_def_cfa_offset: 64
+// 0x00000003: mov [esp + 52], ebp
+// 0x00000007: .cfi_offset: r5 at cfa-12
+// 0x00000007: mov [esp + 56], esi
+// 0x0000000b: .cfi_offset: r6 at cfa-8
+// 0x0000000b: mov [esp], eax
+// 0x0000000e: .cfi_remember_state
+// 0x0000000e: mov ebp, [esp + 52]
+// 0x00000012: .cfi_restore: r5
+// 0x00000012: mov esi, [esp + 56]
+// 0x00000016: .cfi_restore: r6
+// 0x00000016: add esp, 60
+// 0x00000019: .cfi_def_cfa_offset: 4
+// 0x00000019: ret
+// 0x0000001a: addb [eax], al
+// 0x0000001c: .cfi_restore_state
+// 0x0000001c: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kX86_64[] = {
+ 0x48, 0x83, 0xEC, 0x38, 0x48, 0x89, 0x5C, 0x24, 0x28, 0x48, 0x89, 0x6C,
+ 0x24, 0x30, 0xF2, 0x44, 0x0F, 0x11, 0x64, 0x24, 0x18, 0xF2, 0x44, 0x0F,
+ 0x11, 0x6C, 0x24, 0x20, 0x48, 0x8B, 0xC7, 0x89, 0x3C, 0x24, 0x48, 0x8B,
+ 0x5C, 0x24, 0x28, 0x48, 0x8B, 0x6C, 0x24, 0x30, 0xF2, 0x44, 0x0F, 0x10,
+ 0x64, 0x24, 0x18, 0xF2, 0x44, 0x0F, 0x10, 0x6C, 0x24, 0x20, 0x48, 0x83,
+ 0xC4, 0x38, 0xC3, 0x00,
+};
+static constexpr uint8_t expected_cfi_kX86_64[] = {
+ 0x44, 0x0E, 0x40, 0x45, 0x83, 0x06, 0x45, 0x86, 0x04, 0x47, 0x9D, 0x0A,
+ 0x47, 0x9E, 0x08, 0x46, 0x0A, 0x45, 0xC3, 0x45, 0xC6, 0x47, 0xDD, 0x47,
+ 0xDE, 0x44, 0x0E, 0x08, 0x42, 0x0B, 0x0E, 0x40,
+};
+// 0x00000000: subq rsp, 56
+// 0x00000004: .cfi_def_cfa_offset: 64
+// 0x00000004: movq [rsp + 40], rbx
+// 0x00000009: .cfi_offset: r3 at cfa-24
+// 0x00000009: movq [rsp + 48], rbp
+// 0x0000000e: .cfi_offset: r6 at cfa-16
+// 0x0000000e: movsd [rsp + 24], xmm12
+// 0x00000015: .cfi_offset: r29 at cfa-40
+// 0x00000015: movsd [rsp + 32], xmm13
+// 0x0000001c: .cfi_offset: r30 at cfa-32
+// 0x0000001c: movq rax, rdi
+// 0x0000001f: mov [rsp], edi
+// 0x00000022: .cfi_remember_state
+// 0x00000022: movq rbx, [rsp + 40]
+// 0x00000027: .cfi_restore: r3
+// 0x00000027: movq rbp, [rsp + 48]
+// 0x0000002c: .cfi_restore: r6
+// 0x0000002c: movsd xmm12, [rsp + 24]
+// 0x00000033: .cfi_restore: r29
+// 0x00000033: movsd xmm13, [rsp + 32]
+// 0x0000003a: .cfi_restore: r30
+// 0x0000003a: addq rsp, 56
+// 0x0000003e: .cfi_def_cfa_offset: 8
+// 0x0000003e: ret
+// 0x0000003f: addb al, al
+// 0x00000040: .cfi_restore_state
+// 0x00000040: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kMips[] = {
+ 0xF4, 0xFF, 0xBD, 0x27, 0x08, 0x00, 0xB2, 0xAF, 0x04, 0x00, 0xB3, 0xAF,
+ 0x00, 0x00, 0xBF, 0xAF, 0xCC, 0xFF, 0xBD, 0x27, 0x25, 0x10, 0x80, 0x00,
+ 0x00, 0x00, 0xA4, 0xAF, 0x3C, 0x00, 0xB2, 0x8F, 0x38, 0x00, 0xB3, 0x8F,
+ 0x34, 0x00, 0xBF, 0x8F, 0x40, 0x00, 0xBD, 0x27, 0x09, 0x00, 0xE0, 0x03,
+ 0x00, 0x00, 0x00, 0x00,
+};
+static constexpr uint8_t expected_cfi_kMips[] = {
+ 0x44, 0x0E, 0x0C, 0x44, 0x92, 0x01, 0x44, 0x93, 0x02, 0x44, 0x9F, 0x03,
+ 0x44, 0x0E, 0x40, 0x48, 0x0A, 0x44, 0xD2, 0x44, 0xD3, 0x44, 0xDF, 0x44,
+ 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40,
+};
+// 0x00000000: addiu r29, r29, -12
+// 0x00000004: .cfi_def_cfa_offset: 12
+// 0x00000004: sw r18, +8(r29)
+// 0x00000008: .cfi_offset: r18 at cfa-4
+// 0x00000008: sw r19, +4(r29)
+// 0x0000000c: .cfi_offset: r19 at cfa-8
+// 0x0000000c: sw r31, +0(r29)
+// 0x00000010: .cfi_offset: r31 at cfa-12
+// 0x00000010: addiu r29, r29, -52
+// 0x00000014: .cfi_def_cfa_offset: 64
+// 0x00000014: or r2, r4, r0
+// 0x00000018: sw r4, +0(r29)
+// 0x0000001c: .cfi_remember_state
+// 0x0000001c: lw r18, +60(r29)
+// 0x00000020: .cfi_restore: r18
+// 0x00000020: lw r19, +56(r29)
+// 0x00000024: .cfi_restore: r19
+// 0x00000024: lw r31, +52(r29)
+// 0x00000028: .cfi_restore: r31
+// 0x00000028: addiu r29, r29, 64
+// 0x0000002c: .cfi_def_cfa_offset: 0
+// 0x0000002c: jalr r0, r31
+// 0x00000030: nop
+// 0x00000034: .cfi_restore_state
+// 0x00000034: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kMips64[] = {
+ 0xE8, 0xFF, 0xBD, 0x67, 0x10, 0x00, 0xB2, 0xFF, 0x08, 0x00, 0xB3, 0xFF,
+ 0x00, 0x00, 0xBF, 0xFF, 0xD8, 0xFF, 0xBD, 0x67, 0x25, 0x10, 0x80, 0x00,
+ 0x00, 0x00, 0xA4, 0xAF, 0x38, 0x00, 0xB2, 0xDF, 0x30, 0x00, 0xB3, 0xDF,
+ 0x28, 0x00, 0xBF, 0xDF, 0x40, 0x00, 0xBD, 0x67, 0x09, 0x00, 0xE0, 0x03,
+ 0x00, 0x00, 0x00, 0x00,
+};
+static constexpr uint8_t expected_cfi_kMips64[] = {
+ 0x44, 0x0E, 0x18, 0x44, 0x92, 0x02, 0x44, 0x93, 0x04, 0x44, 0x9F, 0x06,
+ 0x44, 0x0E, 0x40, 0x48, 0x0A, 0x44, 0xD2, 0x44, 0xD3, 0x44, 0xDF, 0x44,
+ 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40,
+};
+// 0x00000000: daddiu r29, r29, -24
+// 0x00000004: .cfi_def_cfa_offset: 24
+// 0x00000004: sd r18, +16(r29)
+// 0x00000008: .cfi_offset: r18 at cfa-8
+// 0x00000008: sd r19, +8(r29)
+// 0x0000000c: .cfi_offset: r19 at cfa-16
+// 0x0000000c: sd r31, +0(r29)
+// 0x00000010: .cfi_offset: r31 at cfa-24
+// 0x00000010: daddiu r29, r29, -40
+// 0x00000014: .cfi_def_cfa_offset: 64
+// 0x00000014: or r2, r4, r0
+// 0x00000018: sw r4, +0(r29)
+// 0x0000001c: .cfi_remember_state
+// 0x0000001c: ld r18, +56(r29)
+// 0x00000020: .cfi_restore: r18
+// 0x00000020: ld r19, +48(r29)
+// 0x00000024: .cfi_restore: r19
+// 0x00000024: ld r31, +40(r29)
+// 0x00000028: .cfi_restore: r31
+// 0x00000028: daddiu r29, r29, 64
+// 0x0000002c: .cfi_def_cfa_offset: 0
+// 0x0000002c: jr r31
+// 0x00000030: nop
+// 0x00000034: .cfi_restore_state
+// 0x00000034: .cfi_def_cfa_offset: 64
+
diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc
index 6d289843e8..2c0bd47405 100644
--- a/compiler/dex/quick/quick_compiler.cc
+++ b/compiler/dex/quick/quick_compiler.cc
@@ -634,6 +634,12 @@ CompiledMethod* QuickCompiler::Compile(const DexFile::CodeItem* code_item,
instruction_set = kThumb2;
}
CompilationUnit cu(runtime->GetArenaPool(), instruction_set, driver, class_linker);
+ cu.dex_file = &dex_file;
+ cu.class_def_idx = class_def_idx;
+ cu.method_idx = method_idx;
+ cu.access_flags = access_flags;
+ cu.invoke_type = invoke_type;
+ cu.shorty = dex_file.GetMethodShorty(dex_file.GetMethodId(method_idx));
CHECK((cu.instruction_set == kThumb2) ||
(cu.instruction_set == kArm64) ||
@@ -792,11 +798,16 @@ bool QuickCompiler::WriteElf(art::File* file,
const std::vector<const art::DexFile*>& dex_files,
const std::string& android_root,
bool is_host) const {
- return art::ElfWriterQuick32::Create(file, oat_writer, dex_files, android_root, is_host,
- *GetCompilerDriver());
+ if (kProduce64BitELFFiles && Is64BitInstructionSet(GetCompilerDriver()->GetInstructionSet())) {
+ return art::ElfWriterQuick64::Create(file, oat_writer, dex_files, android_root, is_host,
+ *GetCompilerDriver());
+ } else {
+ return art::ElfWriterQuick32::Create(file, oat_writer, dex_files, android_root, is_host,
+ *GetCompilerDriver());
+ }
}
-Mir2Lir* QuickCompiler::GetCodeGenerator(CompilationUnit* cu, void* compilation_unit) const {
+Mir2Lir* QuickCompiler::GetCodeGenerator(CompilationUnit* cu, void* compilation_unit) {
UNUSED(compilation_unit);
Mir2Lir* mir_to_lir = nullptr;
switch (cu->instruction_set) {
diff --git a/compiler/dex/quick/quick_compiler.h b/compiler/dex/quick/quick_compiler.h
index 5153a9e82e..09b08ace77 100644
--- a/compiler/dex/quick/quick_compiler.h
+++ b/compiler/dex/quick/quick_compiler.h
@@ -60,7 +60,7 @@ class QuickCompiler : public Compiler {
OVERRIDE
SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
- Mir2Lir* GetCodeGenerator(CompilationUnit* cu, void* compilation_unit) const;
+ static Mir2Lir* GetCodeGenerator(CompilationUnit* cu, void* compilation_unit);
void InitCompilationUnit(CompilationUnit& cu) const OVERRIDE;
diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc
index 741657bc69..e779479780 100644
--- a/compiler/dex/quick/ralloc_util.cc
+++ b/compiler/dex/quick/ralloc_util.cc
@@ -19,9 +19,11 @@
#include "mir_to_lir-inl.h"
#include "dex/compiler_ir.h"
+#include "dex/dataflow_iterator-inl.h"
#include "dex/mir_graph.h"
#include "driver/compiler_driver.h"
#include "driver/dex_compilation_unit.h"
+#include "utils/dex_cache_arrays_layout-inl.h"
namespace art {
@@ -1128,6 +1130,152 @@ RegLocation Mir2Lir::EvalLoc(RegLocation loc, int reg_class, bool update) {
return loc;
}
+void Mir2Lir::AnalyzeMIR(RefCounts* core_counts, MIR* mir, uint32_t weight) {
+ // NOTE: This should be in sync with functions that actually generate code for
+ // the opcodes below. However, if we get this wrong, the generated code will
+ // still be correct even if it may be sub-optimal.
+ int opcode = mir->dalvikInsn.opcode;
+ bool uses_method = false;
+ bool uses_pc_rel_load = false;
+ uint32_t dex_cache_array_offset = std::numeric_limits<uint32_t>::max();
+ switch (opcode) {
+ case Instruction::CHECK_CAST:
+ case Instruction::INSTANCE_OF: {
+ if ((opcode == Instruction::CHECK_CAST) &&
+ (mir->optimization_flags & MIR_IGNORE_CHECK_CAST) != 0) {
+ break; // No code generated.
+ }
+ uint32_t type_idx =
+ (opcode == Instruction::CHECK_CAST) ? mir->dalvikInsn.vB : mir->dalvikInsn.vC;
+ bool type_known_final, type_known_abstract, use_declaring_class;
+ bool needs_access_check = !cu_->compiler_driver->CanAccessTypeWithoutChecks(
+ cu_->method_idx, *cu_->dex_file, type_idx,
+ &type_known_final, &type_known_abstract, &use_declaring_class);
+ if (opcode == Instruction::CHECK_CAST && !needs_access_check &&
+ cu_->compiler_driver->IsSafeCast(
+ mir_graph_->GetCurrentDexCompilationUnit(), mir->offset)) {
+ break; // No code generated.
+ }
+ if (!needs_access_check && !use_declaring_class && CanUseOpPcRelDexCacheArrayLoad()) {
+ uses_pc_rel_load = true; // And ignore method use in slow path.
+ dex_cache_array_offset = dex_cache_arrays_layout_.TypeOffset(type_idx);
+ } else {
+ uses_method = true;
+ }
+ break;
+ }
+
+ case Instruction::CONST_CLASS:
+ if (CanUseOpPcRelDexCacheArrayLoad() &&
+ cu_->compiler_driver->CanAccessTypeWithoutChecks(cu_->method_idx, *cu_->dex_file,
+ mir->dalvikInsn.vB)) {
+ uses_pc_rel_load = true; // And ignore method use in slow path.
+ dex_cache_array_offset = dex_cache_arrays_layout_.TypeOffset(mir->dalvikInsn.vB);
+ } else {
+ uses_method = true;
+ }
+ break;
+
+ case Instruction::CONST_STRING:
+ case Instruction::CONST_STRING_JUMBO:
+ if (CanUseOpPcRelDexCacheArrayLoad()) {
+ uses_pc_rel_load = true; // And ignore method use in slow path.
+ dex_cache_array_offset = dex_cache_arrays_layout_.StringOffset(mir->dalvikInsn.vB);
+ } else {
+ uses_method = true;
+ }
+ break;
+
+ case Instruction::INVOKE_VIRTUAL:
+ case Instruction::INVOKE_SUPER:
+ case Instruction::INVOKE_DIRECT:
+ case Instruction::INVOKE_STATIC:
+ case Instruction::INVOKE_INTERFACE:
+ case Instruction::INVOKE_VIRTUAL_RANGE:
+ case Instruction::INVOKE_SUPER_RANGE:
+ case Instruction::INVOKE_DIRECT_RANGE:
+ case Instruction::INVOKE_STATIC_RANGE:
+ case Instruction::INVOKE_INTERFACE_RANGE:
+ case Instruction::INVOKE_VIRTUAL_QUICK:
+ case Instruction::INVOKE_VIRTUAL_RANGE_QUICK: {
+ const MirMethodLoweringInfo& info = mir_graph_->GetMethodLoweringInfo(mir);
+ InvokeType sharp_type = info.GetSharpType();
+ if (info.IsIntrinsic()) {
+ // Nothing to do, if an intrinsic uses ArtMethod* it's in the slow-path - don't count it.
+ } else if (!info.FastPath() || (sharp_type != kStatic && sharp_type != kDirect)) {
+ // Nothing to do, the generated code or entrypoint uses method from the stack.
+ } else if (info.DirectCode() != 0 && info.DirectMethod() != 0) {
+ // Nothing to do, the generated code uses method from the stack.
+ } else if (CanUseOpPcRelDexCacheArrayLoad()) {
+ uses_pc_rel_load = true;
+ dex_cache_array_offset = dex_cache_arrays_layout_.MethodOffset(mir->dalvikInsn.vB);
+ } else {
+ uses_method = true;
+ }
+ break;
+ }
+
+ case Instruction::NEW_INSTANCE:
+ case Instruction::NEW_ARRAY:
+ case Instruction::FILLED_NEW_ARRAY:
+ case Instruction::FILLED_NEW_ARRAY_RANGE:
+ uses_method = true;
+ break;
+ case Instruction::FILL_ARRAY_DATA:
+ // Nothing to do, the entrypoint uses method from the stack.
+ break;
+ case Instruction::THROW:
+ // Nothing to do, the entrypoint uses method from the stack.
+ break;
+
+ case Instruction::SGET:
+ case Instruction::SGET_WIDE:
+ case Instruction::SGET_OBJECT:
+ case Instruction::SGET_BOOLEAN:
+ case Instruction::SGET_BYTE:
+ case Instruction::SGET_CHAR:
+ case Instruction::SGET_SHORT:
+ case Instruction::SPUT:
+ case Instruction::SPUT_WIDE:
+ case Instruction::SPUT_OBJECT:
+ case Instruction::SPUT_BOOLEAN:
+ case Instruction::SPUT_BYTE:
+ case Instruction::SPUT_CHAR:
+ case Instruction::SPUT_SHORT: {
+ const MirSFieldLoweringInfo& field_info = mir_graph_->GetSFieldLoweringInfo(mir);
+ bool fast = IsInstructionSGet(static_cast<Instruction::Code>(opcode))
+ ? field_info.FastGet()
+ : field_info.FastPut();
+ if (fast && (cu_->enable_debug & (1 << kDebugSlowFieldPath)) == 0) {
+ if (!field_info.IsReferrersClass() && CanUseOpPcRelDexCacheArrayLoad()) {
+ uses_pc_rel_load = true; // And ignore method use in slow path.
+ dex_cache_array_offset = dex_cache_arrays_layout_.TypeOffset(field_info.StorageIndex());
+ } else {
+ uses_method = true;
+ }
+ } else {
+ // Nothing to do, the entrypoint uses method from the stack.
+ }
+ break;
+ }
+
+ default:
+ break;
+ }
+ if (uses_method) {
+ core_counts[SRegToPMap(mir_graph_->GetMethodLoc().s_reg_low)].count += weight;
+ }
+ if (uses_pc_rel_load) {
+ if (pc_rel_temp_ != nullptr) {
+ core_counts[SRegToPMap(pc_rel_temp_->s_reg_low)].count += weight;
+ DCHECK_NE(dex_cache_array_offset, std::numeric_limits<uint32_t>::max());
+ dex_cache_arrays_min_offset_ = std::min(dex_cache_arrays_min_offset_, dex_cache_array_offset);
+ } else {
+ // Nothing to do, using PC-relative addressing without promoting base PC to register.
+ }
+ }
+}
+
/* USE SSA names to count references of base Dalvik v_regs. */
void Mir2Lir::CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs) {
for (int i = 0; i < mir_graph_->GetNumSSARegs(); i++) {
@@ -1157,6 +1305,22 @@ void Mir2Lir::CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num
}
}
}
+
+ // Now analyze the ArtMethod* and pc_rel_temp_ uses.
+ DCHECK_EQ(core_counts[SRegToPMap(mir_graph_->GetMethodLoc().s_reg_low)].count, 0);
+ if (pc_rel_temp_ != nullptr) {
+ DCHECK_EQ(core_counts[SRegToPMap(pc_rel_temp_->s_reg_low)].count, 0);
+ }
+ PreOrderDfsIterator iter(mir_graph_);
+ for (BasicBlock* bb = iter.Next(); bb != nullptr; bb = iter.Next()) {
+ if (bb->block_type == kDead) {
+ continue;
+ }
+ uint32_t weight = mir_graph_->GetUseCountWeight(bb);
+ for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
+ AnalyzeMIR(core_counts, mir, weight);
+ }
+ }
}
/* qsort callback function, sort descending */
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index 118ab1d843..af19f5eaed 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -544,7 +544,6 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0,
{ kX86CallI, kCall, IS_UNARY_OP | IS_BRANCH, { 0, 0, 0xE8, 0, 0, 0, 0, 4, false }, "CallI", "!0d" },
{ kX86Ret, kNullary, NO_OPERAND | IS_BRANCH, { 0, 0, 0xC3, 0, 0, 0, 0, 0, false }, "Ret", "" },
- { kX86StartOfMethod, kMacro, IS_UNARY_OP | REG_DEF0 | SETS_CCODES, { 0, 0, 0, 0, 0, 0, 0, 0, false }, "StartOfMethod", "!0r" },
{ kX86PcRelLoadRA, kPcRel, IS_LOAD | IS_QUIN_OP | REG_DEF0_USE12, { 0, 0, 0x8B, 0, 0, 0, 0, 0, false }, "PcRelLoadRA", "!0r,[!1r+!2r<<!3d+!4p]" },
{ kX86PcRelAdr, kPcRel, IS_LOAD | IS_BINARY_OP | REG_DEF0, { 0, 0, 0xB8, 0, 0, 0, 0, 4, false }, "PcRelAdr", "!0r,!1p" },
{ kX86RepneScasw, kNullary, NO_OPERAND | REG_USEA | REG_USEC | SETS_CCODES, { 0x66, 0xF2, 0xAF, 0, 0, 0, 0, 0, false }, "RepNE ScasW", "" },
@@ -865,13 +864,6 @@ size_t X86Mir2Lir::GetInsnSize(LIR* lir) {
DCHECK_EQ(entry->opcode, kX86PcRelAdr);
return 5; // opcode with reg + 4 byte immediate
}
- case kMacro: // lir operands - 0: reg
- DCHECK_EQ(lir->opcode, static_cast<int>(kX86StartOfMethod));
- return 5 /* call opcode + 4 byte displacement */ + 1 /* pop reg */ +
- ComputeSize(&X86Mir2Lir::EncodingMap[cu_->target64 ? kX86Sub64RI : kX86Sub32RI],
- lir->operands[0], NO_REG, NO_REG, 0) -
- // Shorter ax encoding.
- (RegStorage::RegNum(lir->operands[0]) == rs_rAX.GetRegNum() ? 1 : 0);
case kUnimplemented:
break;
}
@@ -1586,8 +1578,8 @@ void X86Mir2Lir::EmitPcRel(const X86EncodingMap* entry, int32_t raw_reg, int32_t
int32_t raw_index, int scale, int32_t table_or_disp) {
int disp;
if (entry->opcode == kX86PcRelLoadRA) {
- const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(table_or_disp);
- disp = tab_rec->offset;
+ const SwitchTable* tab_rec = UnwrapPointer<SwitchTable>(table_or_disp);
+ disp = tab_rec->offset - tab_rec->anchor->offset;
} else {
DCHECK(entry->opcode == kX86PcRelAdr);
const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(raw_base_or_table);
@@ -1621,23 +1613,6 @@ void X86Mir2Lir::EmitPcRel(const X86EncodingMap* entry, int32_t raw_reg, int32_t
DCHECK_EQ(0, entry->skeleton.ax_opcode);
}
-void X86Mir2Lir::EmitMacro(const X86EncodingMap* entry, int32_t raw_reg, int32_t offset) {
- DCHECK_EQ(entry->opcode, kX86StartOfMethod) << entry->name;
- DCHECK_EQ(false, entry->skeleton.r8_form);
- EmitPrefix(entry, raw_reg, NO_REG, NO_REG);
- code_buffer_.push_back(0xE8); // call +0
- code_buffer_.push_back(0);
- code_buffer_.push_back(0);
- code_buffer_.push_back(0);
- code_buffer_.push_back(0);
-
- uint8_t low_reg = LowRegisterBits(raw_reg);
- code_buffer_.push_back(0x58 + low_reg); // pop reg
-
- EmitRegImm(&X86Mir2Lir::EncodingMap[cu_->target64 ? kX86Sub64RI : kX86Sub32RI],
- raw_reg, offset + 5 /* size of call +0 */);
-}
-
void X86Mir2Lir::EmitUnimplemented(const X86EncodingMap* entry, LIR* lir) {
UNIMPLEMENTED(WARNING) << "encoding kind for " << entry->name << " "
<< BuildInsnString(entry->fmt, lir, 0);
@@ -1780,7 +1755,8 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(CodeOffset start_addr) {
// Offset is relative to next instruction.
lir->operands[2] = target - (lir->offset + lir->flags.size);
} else {
- lir->operands[2] = target;
+ const LIR* anchor = UnwrapPointer<LIR>(lir->operands[4]);
+ lir->operands[2] = target - anchor->offset;
int newSize = GetInsnSize(lir);
if (newSize != lir->flags.size) {
lir->flags.size = newSize;
@@ -1951,9 +1927,6 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(CodeOffset start_addr) {
EmitPcRel(entry, lir->operands[0], lir->operands[1], lir->operands[2],
lir->operands[3], lir->operands[4]);
break;
- case kMacro: // lir operands - 0: reg
- EmitMacro(entry, lir->operands[0], lir->offset);
- break;
case kNop: // TODO: these instruction kinds are missing implementations.
case kThreadReg:
case kRegArrayImm:
@@ -2044,9 +2017,13 @@ void X86Mir2Lir::AssembleLIR() {
cu_->NewTimingSplit("Assemble");
// We will remove the method address if we never ended up using it
- if (store_method_addr_ && !store_method_addr_used_) {
- setup_method_address_[0]->flags.is_nop = true;
- setup_method_address_[1]->flags.is_nop = true;
+ if (pc_rel_base_reg_.Valid() && !pc_rel_base_reg_used_) {
+ if (kIsDebugBuild) {
+ LOG(WARNING) << "PC-relative addressing base promoted but unused in "
+ << PrettyMethod(cu_->method_idx, *cu_->dex_file);
+ }
+ setup_pc_rel_base_reg_->flags.is_nop = true;
+ NEXT_LIR(setup_pc_rel_base_reg_)->flags.is_nop = true;
}
AssignOffsets();
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index abee87254b..d7a5eb04db 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -21,9 +21,11 @@
#include "base/logging.h"
#include "dex/quick/mir_to_lir-inl.h"
#include "driver/compiler_driver.h"
+#include "driver/compiler_options.h"
#include "gc/accounting/card_table.h"
#include "mirror/art_method.h"
#include "mirror/object_array-inl.h"
+#include "utils/dex_cache_arrays_layout-inl.h"
#include "x86_lir.h"
namespace art {
@@ -95,29 +97,23 @@ void X86Mir2Lir::GenLargePackedSwitch(MIR* mir, DexOffset table_offset, RegLocat
// Add the offset from the table to the table base.
OpRegReg(kOpAdd, addr_for_jump, table_base);
+ tab_rec->anchor = nullptr; // Unused for x86-64.
} else {
- // Materialize a pointer to the switch table.
- RegStorage start_of_method_reg;
- if (base_of_code_ != nullptr) {
- // We can use the saved value.
- RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
- rl_method = LoadValue(rl_method, kCoreReg);
- start_of_method_reg = rl_method.reg;
- store_method_addr_used_ = true;
- } else {
- start_of_method_reg = AllocTempRef();
- NewLIR1(kX86StartOfMethod, start_of_method_reg.GetReg());
- }
+ // Get the PC to a register and get the anchor.
+ LIR* anchor;
+ RegStorage r_pc = GetPcAndAnchor(&anchor);
+
// Load the displacement from the switch table.
addr_for_jump = AllocTemp();
- NewLIR5(kX86PcRelLoadRA, addr_for_jump.GetReg(), start_of_method_reg.GetReg(), keyReg.GetReg(),
+ NewLIR5(kX86PcRelLoadRA, addr_for_jump.GetReg(), r_pc.GetReg(), keyReg.GetReg(),
2, WrapPointer(tab_rec));
- // Add displacement to start of method.
- OpRegReg(kOpAdd, addr_for_jump, start_of_method_reg);
+ // Add displacement and r_pc to get the address.
+ OpRegReg(kOpAdd, addr_for_jump, r_pc);
+ tab_rec->anchor = anchor;
}
// ..and go!
- tab_rec->anchor = NewLIR1(kX86JmpR, addr_for_jump.GetReg());
+ NewLIR1(kX86JmpR, addr_for_jump.GetReg());
/* branch_over target here */
LIR* target = NewLIR0(kPseudoTargetLabel);
@@ -148,6 +144,10 @@ void X86Mir2Lir::UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) {
FreeTemp(reg_card_no);
}
+static dwarf::Reg DwarfCoreReg(bool is_x86_64, int num) {
+ return is_x86_64 ? dwarf::Reg::X86_64Core(num) : dwarf::Reg::X86Core(num);
+}
+
void X86Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
/*
* On entry, rX86_ARG0, rX86_ARG1, rX86_ARG2 are live. Let the register
@@ -182,10 +182,10 @@ void X86Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
}
/* Build frame, return address already on stack */
- stack_decrement_ = OpRegImm(kOpSub, rs_rSP, frame_size_ -
- GetInstructionSetPointerSize(cu_->instruction_set));
+ cfi_.SetCurrentCFAOffset(GetInstructionSetPointerSize(cu_->instruction_set));
+ OpRegImm(kOpSub, rs_rSP, frame_size_ - GetInstructionSetPointerSize(cu_->instruction_set));
+ cfi_.DefCFAOffset(frame_size_);
- NewLIR0(kPseudoMethodEntry);
/* Spill core callee saves */
SpillCoreRegs();
SpillFPRegs();
@@ -201,10 +201,12 @@ void X86Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
GenerateTargetLabel(kPseudoThrowTarget);
const RegStorage local_rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
m2l_->OpRegImm(kOpAdd, local_rs_rSP, sp_displace_);
+ m2l_->cfi().AdjustCFAOffset(-sp_displace_);
m2l_->ClobberCallerSave();
// Assumes codegen and target are in thumb2 mode.
m2l_->CallHelper(RegStorage::InvalidReg(), kQuickThrowStackOverflow,
false /* MarkSafepointPC */, false /* UseLink */);
+ m2l_->cfi().AdjustCFAOffset(sp_displace_);
}
private:
@@ -235,14 +237,12 @@ void X86Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
FlushIns(ArgLocs, rl_method);
- if (base_of_code_ != nullptr) {
- RegStorage method_start = TargetPtrReg(kArg0);
- // We have been asked to save the address of the method start for later use.
- setup_method_address_[0] = NewLIR1(kX86StartOfMethod, method_start.GetReg());
- int displacement = SRegOffset(base_of_code_->s_reg_low);
- // Native pointer - must be natural word size.
- setup_method_address_[1] = StoreBaseDisp(rs_rSP, displacement, method_start,
- cu_->target64 ? k64 : k32, kNotVolatile);
+ // We can promote the PC of an anchor for PC-relative addressing to a register
+ // if it's used at least twice. Without investigating where we should lazily
+ // load the reference, we conveniently load it after flushing inputs.
+ if (pc_rel_base_reg_.Valid()) {
+ DCHECK(!cu_->target64);
+ setup_pc_rel_base_reg_ = OpLoadPc(pc_rel_base_reg_);
}
FreeTemp(arg0);
@@ -251,6 +251,7 @@ void X86Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
}
void X86Mir2Lir::GenExitSequence() {
+ cfi_.RememberState();
/*
* In the exit path, rX86_RET0/rX86_RET1 are live - make sure they aren't
* allocated by the register utilities as temps.
@@ -258,14 +259,18 @@ void X86Mir2Lir::GenExitSequence() {
LockTemp(rs_rX86_RET0);
LockTemp(rs_rX86_RET1);
- NewLIR0(kPseudoMethodExit);
UnSpillCoreRegs();
UnSpillFPRegs();
/* Remove frame except for return address */
const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
- stack_increment_ = OpRegImm(kOpAdd, rs_rSP,
- frame_size_ - GetInstructionSetPointerSize(cu_->instruction_set));
+ int adjust = frame_size_ - GetInstructionSetPointerSize(cu_->instruction_set);
+ OpRegImm(kOpAdd, rs_rSP, adjust);
+ cfi_.AdjustCFAOffset(-adjust);
+ // There is only the return PC on the stack now.
NewLIR0(kX86Ret);
+ // The CFI should be restored for any code that follows the exit block.
+ cfi_.RestoreState();
+ cfi_.DefCFAOffset(frame_size_);
}
void X86Mir2Lir::GenSpecialExitSequence() {
@@ -276,6 +281,8 @@ void X86Mir2Lir::GenSpecialEntryForSuspend() {
// Keep 16-byte stack alignment, there's already the return address, so
// - for 32-bit push EAX, i.e. ArtMethod*, ESI, EDI,
// - for 64-bit push RAX, i.e. ArtMethod*.
+ const int kRegSize = cu_->target64 ? 8 : 4;
+ cfi_.SetCurrentCFAOffset(kRegSize); // Return address.
if (!cu_->target64) {
DCHECK(!IsTemp(rs_rSI));
DCHECK(!IsTemp(rs_rDI));
@@ -293,17 +300,29 @@ void X86Mir2Lir::GenSpecialEntryForSuspend() {
fp_vmap_table_.clear();
if (!cu_->target64) {
NewLIR1(kX86Push32R, rs_rDI.GetReg());
+ cfi_.AdjustCFAOffset(kRegSize);
+ cfi_.RelOffset(DwarfCoreReg(cu_->target64, rs_rDI.GetRegNum()), 0);
NewLIR1(kX86Push32R, rs_rSI.GetReg());
+ cfi_.AdjustCFAOffset(kRegSize);
+ cfi_.RelOffset(DwarfCoreReg(cu_->target64, rs_rSI.GetRegNum()), 0);
}
NewLIR1(kX86Push32R, TargetReg(kArg0, kRef).GetReg()); // ArtMethod*
+ cfi_.AdjustCFAOffset(kRegSize);
+ // Do not generate CFI for scratch register.
}
void X86Mir2Lir::GenSpecialExitForSuspend() {
+ const int kRegSize = cu_->target64 ? 8 : 4;
// Pop the frame. (ArtMethod* no longer needed but restore it anyway.)
NewLIR1(kX86Pop32R, TargetReg(kArg0, kRef).GetReg()); // ArtMethod*
+ cfi_.AdjustCFAOffset(-kRegSize);
if (!cu_->target64) {
NewLIR1(kX86Pop32R, rs_rSI.GetReg());
+ cfi_.AdjustCFAOffset(-kRegSize);
+ cfi_.Restore(DwarfCoreReg(cu_->target64, rs_rSI.GetRegNum()));
NewLIR1(kX86Pop32R, rs_rDI.GetReg());
+ cfi_.AdjustCFAOffset(-kRegSize);
+ cfi_.Restore(DwarfCoreReg(cu_->target64, rs_rDI.GetRegNum()));
}
}
@@ -321,13 +340,13 @@ void X86Mir2Lir::GenImplicitNullCheck(RegStorage reg, int opt_flags) {
* Bit of a hack here - in the absence of a real scheduling pass,
* emit the next instruction in static & direct invoke sequences.
*/
-static int X86NextSDCallInsn(CompilationUnit* cu, CallInfo* info,
- int state, const MethodReference& target_method,
- uint32_t,
- uintptr_t direct_code, uintptr_t direct_method,
- InvokeType type) {
+int X86Mir2Lir::X86NextSDCallInsn(CompilationUnit* cu, CallInfo* info,
+ int state, const MethodReference& target_method,
+ uint32_t,
+ uintptr_t direct_code, uintptr_t direct_method,
+ InvokeType type) {
UNUSED(info, direct_code);
- Mir2Lir* cg = static_cast<Mir2Lir*>(cu->cg.get());
+ X86Mir2Lir* cg = static_cast<X86Mir2Lir*>(cu->cg.get());
if (direct_method != 0) {
switch (state) {
case 0: // Get the current Method* [sets kArg0]
@@ -345,6 +364,17 @@ static int X86NextSDCallInsn(CompilationUnit* cu, CallInfo* info,
default:
return -1;
}
+ } else if (cg->CanUseOpPcRelDexCacheArrayLoad()) {
+ switch (state) {
+ case 0: {
+ CHECK_EQ(cu->dex_file, target_method.dex_file);
+ size_t offset = cg->dex_cache_arrays_layout_.MethodOffset(target_method.dex_method_index);
+ cg->OpPcRelDexCacheArrayLoad(cu->dex_file, offset, cg->TargetReg(kArg0, kRef));
+ break;
+ }
+ default:
+ return -1;
+ }
} else {
RegStorage arg0_ref = cg->TargetReg(kArg0, kRef);
switch (state) {
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 040a8c4bef..72580a3e39 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -28,7 +28,7 @@
namespace art {
-class X86Mir2Lir : public Mir2Lir {
+class X86Mir2Lir FINAL : public Mir2Lir {
protected:
class InToRegStorageX86_64Mapper : public InToRegStorageMapper {
public:
@@ -104,6 +104,9 @@ class X86Mir2Lir : public Mir2Lir {
/// @copydoc Mir2Lir::UnconditionallyMarkGCCard(RegStorage)
void UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) OVERRIDE;
+ bool CanUseOpPcRelDexCacheArrayLoad() const OVERRIDE;
+ void OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset, RegStorage r_dest) OVERRIDE;
+
void GenImplicitNullCheck(RegStorage reg, int opt_flags) OVERRIDE;
// Required for target - register utilities.
@@ -372,17 +375,15 @@ class X86Mir2Lir : public Mir2Lir {
*/
LIR* GenCallInsn(const MirMethodLoweringInfo& method_info) OVERRIDE;
+ void AnalyzeMIR(RefCounts* core_counts, MIR* mir, uint32_t weight) OVERRIDE;
+ void CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs) OVERRIDE;
+ void DoPromotion() OVERRIDE;
+
/*
* @brief Handle x86 specific literals
*/
void InstallLiteralPools() OVERRIDE;
- /*
- * @brief Generate the debug_frame FDE information.
- * @returns pointer to vector containing CFE information
- */
- std::vector<uint8_t>* ReturnFrameDescriptionEntry() OVERRIDE;
-
LIR* InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) OVERRIDE;
protected:
@@ -491,7 +492,6 @@ class X86Mir2Lir : public Mir2Lir {
void EmitCallThread(const X86EncodingMap* entry, int32_t disp);
void EmitPcRel(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_base_or_table,
int32_t raw_index, int scale, int32_t table_or_disp);
- void EmitMacro(const X86EncodingMap* entry, int32_t raw_reg, int32_t offset);
void EmitUnimplemented(const X86EncodingMap* entry, LIR* lir);
void GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1,
int64_t val, ConditionCode ccode);
@@ -862,12 +862,6 @@ class X86Mir2Lir : public Mir2Lir {
void SpillFPRegs();
/*
- * @brief Perform MIR analysis before compiling method.
- * @note Invokes Mir2LiR::Materialize after analysis.
- */
- void Materialize();
-
- /*
* Mir2Lir's UpdateLoc() looks to see if the Dalvik value is currently live in any temp register
* without regard to data type. In practice, this can result in UpdateLoc returning a
* location record for a Dalvik float value in a core register, and vis-versa. For targets
@@ -881,67 +875,39 @@ class X86Mir2Lir : public Mir2Lir {
RegLocation UpdateLocWideTyped(RegLocation loc);
/*
- * @brief Analyze MIR before generating code, to prepare for the code generation.
- */
- void AnalyzeMIR();
-
- /*
- * @brief Analyze one basic block.
- * @param bb Basic block to analyze.
- */
- void AnalyzeBB(BasicBlock* bb);
-
- /*
- * @brief Analyze one extended MIR instruction
- * @param opcode MIR instruction opcode.
- * @param bb Basic block containing instruction.
- * @param mir Extended instruction to analyze.
- */
- void AnalyzeExtendedMIR(int opcode, BasicBlock* bb, MIR* mir);
-
- /*
- * @brief Analyze one MIR instruction
- * @param opcode MIR instruction opcode.
- * @param bb Basic block containing instruction.
- * @param mir Instruction to analyze.
- */
- virtual void AnalyzeMIR(int opcode, BasicBlock* bb, MIR* mir);
-
- /*
* @brief Analyze one MIR float/double instruction
* @param opcode MIR instruction opcode.
- * @param bb Basic block containing instruction.
* @param mir Instruction to analyze.
+ * @return true iff the instruction needs to load a literal using PC-relative addressing.
*/
- virtual void AnalyzeFPInstruction(int opcode, BasicBlock* bb, MIR* mir);
+ bool AnalyzeFPInstruction(int opcode, MIR* mir);
/*
* @brief Analyze one use of a double operand.
* @param rl_use Double RegLocation for the operand.
+ * @return true iff the instruction needs to load a literal using PC-relative addressing.
*/
- void AnalyzeDoubleUse(RegLocation rl_use);
+ bool AnalyzeDoubleUse(RegLocation rl_use);
/*
* @brief Analyze one invoke-static MIR instruction
- * @param opcode MIR instruction opcode.
- * @param bb Basic block containing instruction.
* @param mir Instruction to analyze.
+ * @return true iff the instruction needs to load a literal using PC-relative addressing.
*/
- void AnalyzeInvokeStatic(int opcode, BasicBlock* bb, MIR* mir);
+ bool AnalyzeInvokeStaticIntrinsic(MIR* mir);
// Information derived from analysis of MIR
- // The compiler temporary for the code address of the method.
- CompilerTemp *base_of_code_;
-
- // Have we decided to compute a ptr to code and store in temporary VR?
- bool store_method_addr_;
+ // The base register for PC-relative addressing if promoted (32-bit only).
+ RegStorage pc_rel_base_reg_;
- // Have we used the stored method address?
- bool store_method_addr_used_;
+ // Have we actually used the pc_rel_base_reg_?
+ bool pc_rel_base_reg_used_;
- // Instructions to remove if we didn't use the stored method address.
- LIR* setup_method_address_[2];
+ // Pointer to the "call +0" insn that sets up the promoted register for PC-relative addressing.
+ // The anchor "pop" insn is NEXT_LIR(setup_pc_rel_base_reg_). The whole "call +0; pop <reg>"
+ // sequence will be removed in AssembleLIR() if we do not actually use PC-relative addressing.
+ LIR* setup_pc_rel_base_reg_; // There are 2 chained insns (no reordering allowed).
// Instructions needing patching with Method* values.
ArenaVector<LIR*> method_address_insns_;
@@ -952,11 +918,8 @@ class X86Mir2Lir : public Mir2Lir {
// Instructions needing patching with PC relative code addresses.
ArenaVector<LIR*> call_method_insns_;
- // Prologue decrement of stack pointer.
- LIR* stack_decrement_;
-
- // Epilogue increment of stack pointer.
- LIR* stack_increment_;
+ // Instructions needing patching with PC relative code addresses.
+ ArenaVector<LIR*> dex_cache_access_insns_;
// The list of const vector literals.
LIR* const_vectors_;
@@ -992,6 +955,20 @@ class X86Mir2Lir : public Mir2Lir {
void SwapBits(RegStorage result_reg, int shift, int32_t value);
void SwapBits64(RegStorage result_reg, int shift, int64_t value);
+ static int X86NextSDCallInsn(CompilationUnit* cu, CallInfo* info,
+ int state, const MethodReference& target_method,
+ uint32_t,
+ uintptr_t direct_code, uintptr_t direct_method,
+ InvokeType type);
+
+ LIR* OpLoadPc(RegStorage r_dest);
+ RegStorage GetPcAndAnchor(LIR** anchor, RegStorage r_tmp = RegStorage::InvalidReg());
+
+ // When we don't know the proper offset for the value, pick one that will force
+ // 4 byte offset. We will fix this up in the assembler or linker later to have
+ // the right value.
+ static constexpr int kDummy32BitOffset = 256;
+
static const X86EncodingMap EncodingMap[kX86Last];
friend std::ostream& operator<<(std::ostream& os, const X86OpCode& rhs);
diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc
index d8616a7bf3..cfe0480c54 100755
--- a/compiler/dex/quick/x86/fp_x86.cc
+++ b/compiler/dex/quick/x86/fp_x86.cc
@@ -756,24 +756,6 @@ bool X86Mir2Lir::GenInlinedMinMaxFP(CallInfo* info, bool is_min, bool is_double)
branch_nan->target = NewLIR0(kPseudoTargetLabel);
LoadConstantWide(rl_result.reg, INT64_C(0x7ff8000000000000));
- // The base_of_code_ compiler temp is non-null when it is reserved
- // for being able to do data accesses relative to method start.
- if (base_of_code_ != nullptr) {
- // Loading from the constant pool may have used base of code register.
- // However, the code here generates logic in diamond shape and not all
- // paths load base of code register. Therefore, we ensure it is clobbered so
- // that the temp caching system does not believe it is live at merge point.
- RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
- if (rl_method.wide) {
- rl_method = UpdateLocWide(rl_method);
- } else {
- rl_method = UpdateLoc(rl_method);
- }
- if (rl_method.location == kLocPhysReg) {
- Clobber(rl_method.reg);
- }
- }
-
LIR* branch_exit_nan = NewLIR1(kX86Jmp8, 0);
// Handle Min/Max. Copy greater/lesser value from src2.
branch_cond1->target = NewLIR0(kPseudoTargetLabel);
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 4eb626c14f..1043815e10 100755
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -830,6 +830,10 @@ RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
return rl_result;
}
+static dwarf::Reg DwarfCoreReg(bool is_x86_64, int num) {
+ return is_x86_64 ? dwarf::Reg::X86_64Core(num) : dwarf::Reg::X86Core(num);
+}
+
bool X86Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) {
DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64);
@@ -928,6 +932,7 @@ bool X86Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) {
// Do we have a free register for intermediate calculations?
RegStorage tmp = AllocTemp(false);
+ const int kRegSize = cu_->target64 ? 8 : 4;
if (tmp == RegStorage::InvalidReg()) {
/*
* No, will use 'edi'.
@@ -946,6 +951,11 @@ bool X86Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) {
IsTemp(rl_result.reg.GetHigh()));
tmp = rs_rDI;
NewLIR1(kX86Push32R, tmp.GetReg());
+ cfi_.AdjustCFAOffset(kRegSize);
+ // Record cfi only if it is not already spilled.
+ if (!CoreSpillMaskContains(tmp.GetReg())) {
+ cfi_.RelOffset(DwarfCoreReg(cu_->target64, tmp.GetReg()), 0);
+ }
}
// Now we are ready to do calculations.
@@ -957,6 +967,10 @@ bool X86Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) {
// Let's put pop 'edi' here to break a bit the dependency chain.
if (tmp == rs_rDI) {
NewLIR1(kX86Pop32R, tmp.GetReg());
+ cfi_.AdjustCFAOffset(-kRegSize);
+ if (!CoreSpillMaskContains(tmp.GetReg())) {
+ cfi_.Restore(DwarfCoreReg(cu_->target64, tmp.GetReg()));
+ }
} else {
FreeTemp(tmp);
}
@@ -1104,6 +1118,7 @@ bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
// If is_long, high half is in info->args[5]
RegLocation rl_src_new_value = info->args[is_long ? 6 : 5]; // int, long or Object
// If is_long, high half is in info->args[7]
+ const int kRegSize = cu_->target64 ? 8 : 4;
if (is_long && cu_->target64) {
// RAX must hold expected for CMPXCHG. Neither rl_new_value, nor r_ptr may be in RAX.
@@ -1125,7 +1140,6 @@ bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
FreeTemp(rs_r0q);
} else if (is_long) {
// TODO: avoid unnecessary loads of SI and DI when the values are in registers.
- // TODO: CFI support.
FlushAllRegs();
LockCallTemps();
RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_rAX, rs_rDX);
@@ -1148,11 +1162,21 @@ bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
NewLIR1(kX86Push32R, rs_rDI.GetReg());
MarkTemp(rs_rDI);
LockTemp(rs_rDI);
+ cfi_.AdjustCFAOffset(kRegSize);
+ // Record cfi only if it is not already spilled.
+ if (!CoreSpillMaskContains(rs_rDI.GetReg())) {
+ cfi_.RelOffset(DwarfCoreReg(cu_->target64, rs_rDI.GetReg()), 0);
+ }
}
if (push_si) {
NewLIR1(kX86Push32R, rs_rSI.GetReg());
MarkTemp(rs_rSI);
LockTemp(rs_rSI);
+ cfi_.AdjustCFAOffset(kRegSize);
+ // Record cfi only if it is not already spilled.
+ if (!CoreSpillMaskContains(rs_rSI.GetReg())) {
+ cfi_.RelOffset(DwarfCoreReg(cu_->target64, rs_rSI.GetReg()), 0);
+ }
}
ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
const size_t push_offset = (push_si ? 4u : 0u) + (push_di ? 4u : 0u);
@@ -1183,11 +1207,19 @@ bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
FreeTemp(rs_rSI);
UnmarkTemp(rs_rSI);
NewLIR1(kX86Pop32R, rs_rSI.GetReg());
+ cfi_.AdjustCFAOffset(-kRegSize);
+ if (!CoreSpillMaskContains(rs_rSI.GetReg())) {
+ cfi_.Restore(DwarfCoreReg(cu_->target64, rs_rSI.GetRegNum()));
+ }
}
if (push_di) {
FreeTemp(rs_rDI);
UnmarkTemp(rs_rDI);
NewLIR1(kX86Pop32R, rs_rDI.GetReg());
+ cfi_.AdjustCFAOffset(-kRegSize);
+ if (!CoreSpillMaskContains(rs_rDI.GetReg())) {
+ cfi_.Restore(DwarfCoreReg(cu_->target64, rs_rDI.GetRegNum()));
+ }
}
FreeCallTemps();
} else {
@@ -1327,37 +1359,79 @@ bool X86Mir2Lir::GenInlinedReverseBits(CallInfo* info, OpSize size) {
void X86Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
if (cu_->target64) {
// We can do this directly using RIP addressing.
- // We don't know the proper offset for the value, so pick one that will force
- // 4 byte offset. We will fix this up in the assembler later to have the right
- // value.
ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
- LIR* res = NewLIR3(kX86Mov32RM, reg.GetReg(), kRIPReg, 256);
+ LIR* res = NewLIR3(kX86Mov32RM, reg.GetReg(), kRIPReg, kDummy32BitOffset);
res->target = target;
res->flags.fixup = kFixupLoad;
return;
}
- CHECK(base_of_code_ != nullptr);
-
- // Address the start of the method
- RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
- if (rl_method.wide) {
- LoadValueDirectWideFixed(rl_method, reg);
- } else {
- LoadValueDirectFixed(rl_method, reg);
- }
- store_method_addr_used_ = true;
+ // Get the PC to a register and get the anchor.
+ LIR* anchor;
+ RegStorage r_pc = GetPcAndAnchor(&anchor);
// Load the proper value from the literal area.
- // We don't know the proper offset for the value, so pick one that will force
- // 4 byte offset. We will fix this up in the assembler later to have the right
- // value.
ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
- LIR* res = NewLIR3(kX86Mov32RM, reg.GetReg(), reg.GetReg(), 256);
+ LIR* res = NewLIR3(kX86Mov32RM, reg.GetReg(), r_pc.GetReg(), kDummy32BitOffset);
+ res->operands[4] = WrapPointer(anchor);
res->target = target;
res->flags.fixup = kFixupLoad;
}
+bool X86Mir2Lir::CanUseOpPcRelDexCacheArrayLoad() const {
+ return dex_cache_arrays_layout_.Valid();
+}
+
+LIR* X86Mir2Lir::OpLoadPc(RegStorage r_dest) {
+ DCHECK(!cu_->target64);
+ LIR* call = NewLIR1(kX86CallI, 0);
+ call->flags.fixup = kFixupLabel;
+ LIR* pop = NewLIR1(kX86Pop32R, r_dest.GetReg());
+ pop->flags.fixup = kFixupLabel;
+ DCHECK(NEXT_LIR(call) == pop);
+ return call;
+}
+
+RegStorage X86Mir2Lir::GetPcAndAnchor(LIR** anchor, RegStorage r_tmp) {
+ if (pc_rel_base_reg_.Valid()) {
+ DCHECK(setup_pc_rel_base_reg_ != nullptr);
+ *anchor = NEXT_LIR(setup_pc_rel_base_reg_);
+ DCHECK(*anchor != nullptr);
+ DCHECK_EQ((*anchor)->opcode, kX86Pop32R);
+ pc_rel_base_reg_used_ = true;
+ return pc_rel_base_reg_;
+ } else {
+ RegStorage r_pc = r_tmp.Valid() ? r_tmp : AllocTempRef();
+ LIR* load_pc = OpLoadPc(r_pc);
+ *anchor = NEXT_LIR(load_pc);
+ DCHECK(*anchor != nullptr);
+ DCHECK_EQ((*anchor)->opcode, kX86Pop32R);
+ return r_pc;
+ }
+}
+
+void X86Mir2Lir::OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset,
+ RegStorage r_dest) {
+ if (cu_->target64) {
+ LIR* mov = NewLIR3(kX86Mov32RM, r_dest.GetReg(), kRIPReg, kDummy32BitOffset);
+ mov->flags.fixup = kFixupLabel;
+ mov->operands[3] = WrapPointer(dex_file);
+ mov->operands[4] = offset;
+ mov->target = mov; // Used for pc_insn_offset (not used by x86-64 relative patcher).
+ dex_cache_access_insns_.push_back(mov);
+ } else {
+ // Get the PC to a register and get the anchor. Use r_dest for the temp if needed.
+ LIR* anchor;
+ RegStorage r_pc = GetPcAndAnchor(&anchor, r_dest);
+ LIR* mov = NewLIR3(kX86Mov32RM, r_dest.GetReg(), r_pc.GetReg(), kDummy32BitOffset);
+ mov->flags.fixup = kFixupLabel;
+ mov->operands[3] = WrapPointer(dex_file);
+ mov->operands[4] = offset;
+ mov->target = anchor; // Used for pc_insn_offset.
+ dex_cache_access_insns_.push_back(mov);
+ }
+}
+
LIR* X86Mir2Lir::OpVldm(RegStorage r_base, int count) {
UNUSED(r_base, count);
LOG(FATAL) << "Unexpected use of OpVldm for x86";
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index f128eb78a3..a16e242d08 100755
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -32,7 +32,6 @@
#include "mirror/string.h"
#include "oat.h"
#include "x86_lir.h"
-#include "utils/dwarf_cfi.h"
namespace art {
@@ -725,6 +724,14 @@ int X86Mir2Lir::NumReservableVectorRegisters(bool long_or_fp) {
return long_or_fp ? num_vector_temps - 2 : num_vector_temps - 1;
}
+static dwarf::Reg DwarfCoreReg(bool is_x86_64, int num) {
+ return is_x86_64 ? dwarf::Reg::X86_64Core(num) : dwarf::Reg::X86Core(num);
+}
+
+static dwarf::Reg DwarfFpReg(bool is_x86_64, int num) {
+ return is_x86_64 ? dwarf::Reg::X86_64Fp(num) : dwarf::Reg::X86Fp(num);
+}
+
void X86Mir2Lir::SpillCoreRegs() {
if (num_core_spills_ == 0) {
return;
@@ -735,11 +742,11 @@ void X86Mir2Lir::SpillCoreRegs() {
frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_);
OpSize size = cu_->target64 ? k64 : k32;
const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
- for (int reg = 0; mask; mask >>= 1, reg++) {
- if (mask & 0x1) {
- StoreBaseDisp(rs_rSP, offset,
- cu_->target64 ? RegStorage::Solo64(reg) : RegStorage::Solo32(reg),
- size, kNotVolatile);
+ for (int reg = 0; mask != 0u; mask >>= 1, reg++) {
+ if ((mask & 0x1) != 0u) {
+ RegStorage r_src = cu_->target64 ? RegStorage::Solo64(reg) : RegStorage::Solo32(reg);
+ StoreBaseDisp(rs_rSP, offset, r_src, size, kNotVolatile);
+ cfi_.RelOffset(DwarfCoreReg(cu_->target64, reg), offset);
offset += GetInstructionSetPointerSize(cu_->instruction_set);
}
}
@@ -754,10 +761,11 @@ void X86Mir2Lir::UnSpillCoreRegs() {
int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_);
OpSize size = cu_->target64 ? k64 : k32;
const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
- for (int reg = 0; mask; mask >>= 1, reg++) {
- if (mask & 0x1) {
- LoadBaseDisp(rs_rSP, offset, cu_->target64 ? RegStorage::Solo64(reg) : RegStorage::Solo32(reg),
- size, kNotVolatile);
+ for (int reg = 0; mask != 0u; mask >>= 1, reg++) {
+ if ((mask & 0x1) != 0u) {
+ RegStorage r_dest = cu_->target64 ? RegStorage::Solo64(reg) : RegStorage::Solo32(reg);
+ LoadBaseDisp(rs_rSP, offset, r_dest, size, kNotVolatile);
+ cfi_.Restore(DwarfCoreReg(cu_->target64, reg));
offset += GetInstructionSetPointerSize(cu_->instruction_set);
}
}
@@ -771,9 +779,10 @@ void X86Mir2Lir::SpillFPRegs() {
int offset = frame_size_ -
(GetInstructionSetPointerSize(cu_->instruction_set) * (num_fp_spills_ + num_core_spills_));
const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
- for (int reg = 0; mask; mask >>= 1, reg++) {
- if (mask & 0x1) {
+ for (int reg = 0; mask != 0u; mask >>= 1, reg++) {
+ if ((mask & 0x1) != 0u) {
StoreBaseDisp(rs_rSP, offset, RegStorage::FloatSolo64(reg), k64, kNotVolatile);
+ cfi_.RelOffset(DwarfFpReg(cu_->target64, reg), offset);
offset += sizeof(double);
}
}
@@ -786,10 +795,11 @@ void X86Mir2Lir::UnSpillFPRegs() {
int offset = frame_size_ -
(GetInstructionSetPointerSize(cu_->instruction_set) * (num_fp_spills_ + num_core_spills_));
const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
- for (int reg = 0; mask; mask >>= 1, reg++) {
- if (mask & 0x1) {
+ for (int reg = 0; mask != 0u; mask >>= 1, reg++) {
+ if ((mask & 0x1) != 0u) {
LoadBaseDisp(rs_rSP, offset, RegStorage::FloatSolo64(reg),
k64, kNotVolatile);
+ cfi_.Restore(DwarfFpReg(cu_->target64, reg));
offset += sizeof(double);
}
}
@@ -825,21 +835,22 @@ RegisterClass X86Mir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volatil
X86Mir2Lir::X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena)
: Mir2Lir(cu, mir_graph, arena),
in_to_reg_storage_x86_64_mapper_(this), in_to_reg_storage_x86_mapper_(this),
- base_of_code_(nullptr), store_method_addr_(false), store_method_addr_used_(false),
+ pc_rel_base_reg_(RegStorage::InvalidReg()),
+ pc_rel_base_reg_used_(false),
+ setup_pc_rel_base_reg_(nullptr),
method_address_insns_(arena->Adapter()),
class_type_address_insns_(arena->Adapter()),
call_method_insns_(arena->Adapter()),
- stack_decrement_(nullptr), stack_increment_(nullptr),
+ dex_cache_access_insns_(arena->Adapter()),
const_vectors_(nullptr) {
method_address_insns_.reserve(100);
class_type_address_insns_.reserve(100);
call_method_insns_.reserve(100);
- store_method_addr_used_ = false;
- for (int i = 0; i < kX86Last; i++) {
- DCHECK_EQ(X86Mir2Lir::EncodingMap[i].opcode, i)
- << "Encoding order for " << X86Mir2Lir::EncodingMap[i].name
- << " is wrong: expecting " << i << ", seeing "
- << static_cast<int>(X86Mir2Lir::EncodingMap[i].opcode);
+ for (int i = 0; i < kX86Last; i++) {
+ DCHECK_EQ(X86Mir2Lir::EncodingMap[i].opcode, i)
+ << "Encoding order for " << X86Mir2Lir::EncodingMap[i].name
+ << " is wrong: expecting " << i << ", seeing "
+ << static_cast<int>(X86Mir2Lir::EncodingMap[i].opcode);
}
}
@@ -924,14 +935,6 @@ void X86Mir2Lir::DumpRegLocation(RegLocation loc) {
<< ", orig: " << loc.orig_sreg;
}
-void X86Mir2Lir::Materialize() {
- // A good place to put the analysis before starting.
- AnalyzeMIR();
-
- // Now continue with regular code generation.
- Mir2Lir::Materialize();
-}
-
void X86Mir2Lir::LoadMethodAddress(const MethodReference& target_method, InvokeType type,
SpecialTargetRegister symbolic_reg) {
/*
@@ -1058,6 +1061,9 @@ void X86Mir2Lir::InstallLiteralPools() {
}
}
+ patches_.reserve(method_address_insns_.size() + class_type_address_insns_.size() +
+ call_method_insns_.size() + dex_cache_access_insns_.size());
+
// Handle the fixups for methods.
for (LIR* p : method_address_insns_) {
DCHECK_EQ(p->opcode, kX86Mov32RI);
@@ -1084,7 +1090,6 @@ void X86Mir2Lir::InstallLiteralPools() {
}
// And now the PC-relative calls to methods.
- patches_.reserve(call_method_insns_.size());
for (LIR* p : call_method_insns_) {
DCHECK_EQ(p->opcode, kX86CallI);
uint32_t target_method_idx = p->operands[1];
@@ -1096,6 +1101,18 @@ void X86Mir2Lir::InstallLiteralPools() {
target_dex_file, target_method_idx));
}
+ // PC-relative references to dex cache arrays.
+ for (LIR* p : dex_cache_access_insns_) {
+ DCHECK(p->opcode == kX86Mov32RM);
+ const DexFile* dex_file = UnwrapPointer<DexFile>(p->operands[3]);
+ uint32_t offset = p->operands[4];
+ // The offset to patch is the last 4 bytes of the instruction.
+ int patch_offset = p->offset + p->flags.size - 4;
+ DCHECK(!p->flags.is_nop);
+ patches_.push_back(LinkerPatch::DexCacheArrayPatch(patch_offset, dex_file,
+ p->target->offset, offset));
+ }
+
// And do the normal processing.
Mir2Lir::InstallLiteralPools();
}
@@ -1303,6 +1320,11 @@ bool X86Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) {
if (!cu_->target64) {
// EDI is promotable in 32-bit mode.
NewLIR1(kX86Push32R, rs_rDI.GetReg());
+ cfi_.AdjustCFAOffset(4);
+ // Record cfi only if it is not already spilled.
+ if (!CoreSpillMaskContains(rs_rDI.GetReg())) {
+ cfi_.RelOffset(DwarfCoreReg(cu_->target64, rs_rDI.GetReg()), 0);
+ }
}
if (zero_based) {
@@ -1398,8 +1420,13 @@ bool X86Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) {
// And join up at the end.
all_done->target = NewLIR0(kPseudoTargetLabel);
- if (!cu_->target64)
+ if (!cu_->target64) {
NewLIR1(kX86Pop32R, rs_rDI.GetReg());
+ cfi_.AdjustCFAOffset(-4);
+ if (!CoreSpillMaskContains(rs_rDI.GetReg())) {
+ cfi_.Restore(DwarfCoreReg(cu_->target64, rs_rDI.GetReg()));
+ }
+ }
// Out of line code returns here.
if (slowpath_branch != nullptr) {
@@ -1412,100 +1439,6 @@ bool X86Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) {
return true;
}
-static bool ARTRegIDToDWARFRegID(bool is_x86_64, int art_reg_id, int* dwarf_reg_id) {
- if (is_x86_64) {
- switch (art_reg_id) {
- case 3 : *dwarf_reg_id = 3; return true; // %rbx
- // This is the only discrepancy between ART & DWARF register numbering.
- case 5 : *dwarf_reg_id = 6; return true; // %rbp
- case 12: *dwarf_reg_id = 12; return true; // %r12
- case 13: *dwarf_reg_id = 13; return true; // %r13
- case 14: *dwarf_reg_id = 14; return true; // %r14
- case 15: *dwarf_reg_id = 15; return true; // %r15
- default: return false; // Should not get here
- }
- } else {
- switch (art_reg_id) {
- case 5: *dwarf_reg_id = 5; return true; // %ebp
- case 6: *dwarf_reg_id = 6; return true; // %esi
- case 7: *dwarf_reg_id = 7; return true; // %edi
- default: return false; // Should not get here
- }
- }
-}
-
-std::vector<uint8_t>* X86Mir2Lir::ReturnFrameDescriptionEntry() {
- std::vector<uint8_t>* cfi_info = new std::vector<uint8_t>;
-
- // Generate the FDE for the method.
- DCHECK_NE(data_offset_, 0U);
-
- WriteFDEHeader(cfi_info, cu_->target64);
- WriteFDEAddressRange(cfi_info, data_offset_, cu_->target64);
-
- // The instructions in the FDE.
- if (stack_decrement_ != nullptr) {
- // Advance LOC to just past the stack decrement.
- uint32_t pc = NEXT_LIR(stack_decrement_)->offset;
- DW_CFA_advance_loc(cfi_info, pc);
-
- // Now update the offset to the call frame: DW_CFA_def_cfa_offset frame_size.
- DW_CFA_def_cfa_offset(cfi_info, frame_size_);
-
- // Handle register spills
- const uint32_t kSpillInstLen = (cu_->target64) ? 5 : 4;
- const int kDataAlignmentFactor = (cu_->target64) ? -8 : -4;
- uint32_t mask = core_spill_mask_ & ~(1 << rs_rRET.GetRegNum());
- int offset = -(GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_);
- for (int reg = 0; mask; mask >>= 1, reg++) {
- if (mask & 0x1) {
- pc += kSpillInstLen;
-
- // Advance LOC to pass this instruction
- DW_CFA_advance_loc(cfi_info, kSpillInstLen);
-
- int dwarf_reg_id;
- if (ARTRegIDToDWARFRegID(cu_->target64, reg, &dwarf_reg_id)) {
- // DW_CFA_offset_extended_sf reg offset
- DW_CFA_offset_extended_sf(cfi_info, dwarf_reg_id, offset / kDataAlignmentFactor);
- }
-
- offset += GetInstructionSetPointerSize(cu_->instruction_set);
- }
- }
-
- // We continue with that stack until the epilogue.
- if (stack_increment_ != nullptr) {
- uint32_t new_pc = NEXT_LIR(stack_increment_)->offset;
- DW_CFA_advance_loc(cfi_info, new_pc - pc);
-
- // We probably have code snippets after the epilogue, so save the
- // current state: DW_CFA_remember_state.
- DW_CFA_remember_state(cfi_info);
-
- // We have now popped the stack: DW_CFA_def_cfa_offset 4/8.
- // There is only the return PC on the stack now.
- DW_CFA_def_cfa_offset(cfi_info, GetInstructionSetPointerSize(cu_->instruction_set));
-
- // Everything after that is the same as before the epilogue.
- // Stack bump was followed by RET instruction.
- LIR *post_ret_insn = NEXT_LIR(NEXT_LIR(stack_increment_));
- if (post_ret_insn != nullptr) {
- pc = new_pc;
- new_pc = post_ret_insn->offset;
- DW_CFA_advance_loc(cfi_info, new_pc - pc);
- // Restore the state: DW_CFA_restore_state.
- DW_CFA_restore_state(cfi_info);
- }
- }
- }
-
- PadCFI(cfi_info);
- WriteCFILength(cfi_info, cu_->target64);
-
- return cfi_info;
-}
-
void X86Mir2Lir::GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir) {
switch (static_cast<ExtendedMIROpcode>(mir->dalvikInsn.opcode)) {
case kMirOpReserveVectorRegisters:
@@ -1642,20 +1575,17 @@ void X86Mir2Lir::AppendOpcodeWithConst(X86OpCode opcode, int reg, MIR* mir) {
LIR* load;
ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
if (cu_->target64) {
- load = NewLIR3(opcode, reg, kRIPReg, 256 /* bogus */);
+ load = NewLIR3(opcode, reg, kRIPReg, kDummy32BitOffset);
} else {
- // Address the start of the method.
- RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
- if (rl_method.wide) {
- rl_method = LoadValueWide(rl_method, kCoreReg);
- } else {
- rl_method = LoadValue(rl_method, kCoreReg);
+ // Get the PC to a register and get the anchor.
+ LIR* anchor;
+ RegStorage r_pc = GetPcAndAnchor(&anchor);
+
+ load = NewLIR3(opcode, reg, r_pc.GetReg(), kDummy32BitOffset);
+ load->operands[4] = WrapPointer(anchor);
+ if (IsTemp(r_pc)) {
+ FreeTemp(r_pc);
}
-
- load = NewLIR3(opcode, reg, rl_method.reg.GetReg(), 256 /* bogus */);
-
- // The literal pool needs position independent logic.
- store_method_addr_used_ = true;
}
load->flags.fixup = kFixupLoad;
load->target = data_target;
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index 893b98a49d..efcb9eefb5 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -17,6 +17,7 @@
#include "codegen_x86.h"
#include "base/logging.h"
+#include "dex/mir_graph.h"
#include "dex/quick/mir_to_lir-inl.h"
#include "dex/dataflow_iterator-inl.h"
#include "dex/quick/dex_file_method_inliner.h"
@@ -574,7 +575,7 @@ LIR* X86Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) {
DCHECK(r_dest.IsDouble());
if (value == 0) {
return NewLIR2(kX86XorpdRR, low_reg_val, low_reg_val);
- } else if (base_of_code_ != nullptr || cu_->target64) {
+ } else if (pc_rel_base_reg_.Valid() || cu_->target64) {
// We will load the value from the literal area.
LIR* data_target = ScanLiteralPoolWide(literal_list_, val_lo, val_hi);
if (data_target == NULL) {
@@ -589,17 +590,16 @@ LIR* X86Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) {
if (cu_->target64) {
res = NewLIR3(kX86MovsdRM, low_reg_val, kRIPReg, 256 /* bogus */);
} else {
- // Address the start of the method.
- RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
- if (rl_method.wide) {
- rl_method = LoadValueWide(rl_method, kCoreReg);
- } else {
- rl_method = LoadValue(rl_method, kCoreReg);
- }
+ // Get the PC to a register and get the anchor.
+ LIR* anchor;
+ RegStorage r_pc = GetPcAndAnchor(&anchor);
- res = LoadBaseDisp(rl_method.reg, 256 /* bogus */, RegStorage::FloatSolo64(low_reg_val),
+ res = LoadBaseDisp(r_pc, kDummy32BitOffset, RegStorage::FloatSolo64(low_reg_val),
kDouble, kNotVolatile);
- store_method_addr_used_ = true;
+ res->operands[4] = WrapPointer(anchor);
+ if (IsTemp(r_pc)) {
+ FreeTemp(r_pc);
+ }
}
res->target = data_target;
res->flags.fixup = kFixupLoad;
@@ -954,82 +954,14 @@ LIR* X86Mir2Lir::OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg, RegS
return branch;
}
-void X86Mir2Lir::AnalyzeMIR() {
- // Assume we don't need a pointer to the base of the code.
- cu_->NewTimingSplit("X86 MIR Analysis");
- store_method_addr_ = false;
-
- // Walk the MIR looking for interesting items.
- PreOrderDfsIterator iter(mir_graph_);
- BasicBlock* curr_bb = iter.Next();
- while (curr_bb != NULL) {
- AnalyzeBB(curr_bb);
- curr_bb = iter.Next();
- }
-
- // Did we need a pointer to the method code? Not in 64 bit mode.
- base_of_code_ = nullptr;
-
- // store_method_addr_ must be false for x86_64, since RIP addressing is used.
- CHECK(!(cu_->target64 && store_method_addr_));
- if (store_method_addr_) {
- base_of_code_ = mir_graph_->GetNewCompilerTemp(kCompilerTempBackend, false);
- DCHECK(base_of_code_ != nullptr);
- }
-}
-
-void X86Mir2Lir::AnalyzeBB(BasicBlock* bb) {
- if (bb->block_type == kDead) {
- // Ignore dead blocks
+void X86Mir2Lir::AnalyzeMIR(RefCounts* core_counts, MIR* mir, uint32_t weight) {
+ if (cu_->target64) {
+ Mir2Lir::AnalyzeMIR(core_counts, mir, weight);
return;
}
- for (MIR* mir = bb->first_mir_insn; mir != NULL; mir = mir->next) {
- int opcode = mir->dalvikInsn.opcode;
- if (MIR::DecodedInstruction::IsPseudoMirOp(opcode)) {
- AnalyzeExtendedMIR(opcode, bb, mir);
- } else {
- AnalyzeMIR(opcode, bb, mir);
- }
- }
-}
-
-
-void X86Mir2Lir::AnalyzeExtendedMIR(int opcode, BasicBlock* bb, MIR* mir) {
- switch (opcode) {
- // Instructions referencing doubles.
- case kMirOpFusedCmplDouble:
- case kMirOpFusedCmpgDouble:
- AnalyzeFPInstruction(opcode, bb, mir);
- break;
- case kMirOpConstVector:
- if (!cu_->target64) {
- store_method_addr_ = true;
- }
- break;
- case kMirOpPackedMultiply:
- case kMirOpPackedShiftLeft:
- case kMirOpPackedSignedShiftRight:
- case kMirOpPackedUnsignedShiftRight:
- if (!cu_->target64) {
- // Byte emulation requires constants from the literal pool.
- OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vC >> 16);
- if (opsize == kSignedByte || opsize == kUnsignedByte) {
- store_method_addr_ = true;
- }
- }
- break;
- default:
- // Ignore the rest.
- break;
- }
-}
-
-void X86Mir2Lir::AnalyzeMIR(int opcode, BasicBlock* bb, MIR* mir) {
- // Looking for
- // - Do we need a pointer to the code (used for packed switches and double lits)?
- // 64 bit uses RIP addressing instead.
-
+ int opcode = mir->dalvikInsn.opcode;
+ bool uses_pc_rel_load = false;
switch (opcode) {
// Instructions referencing doubles.
case Instruction::CMPL_DOUBLE:
@@ -1045,34 +977,62 @@ void X86Mir2Lir::AnalyzeMIR(int opcode, BasicBlock* bb, MIR* mir) {
case Instruction::MUL_DOUBLE_2ADDR:
case Instruction::DIV_DOUBLE_2ADDR:
case Instruction::REM_DOUBLE_2ADDR:
- AnalyzeFPInstruction(opcode, bb, mir);
+ case kMirOpFusedCmplDouble:
+ case kMirOpFusedCmpgDouble:
+ uses_pc_rel_load = AnalyzeFPInstruction(opcode, mir);
break;
- // Packed switches and array fills need a pointer to the base of the method.
- case Instruction::FILL_ARRAY_DATA:
+ // Packed switch needs the PC-relative pointer if it's large.
case Instruction::PACKED_SWITCH:
- if (!cu_->target64) {
- store_method_addr_ = true;
+ if (mir_graph_->GetTable(mir, mir->dalvikInsn.vB)[1] > kSmallSwitchThreshold) {
+ uses_pc_rel_load = true;
}
break;
+
+ case kMirOpConstVector:
+ uses_pc_rel_load = true;
+ break;
+ case kMirOpPackedMultiply:
+ case kMirOpPackedShiftLeft:
+ case kMirOpPackedSignedShiftRight:
+ case kMirOpPackedUnsignedShiftRight:
+ {
+ // Byte emulation requires constants from the literal pool.
+ OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vC >> 16);
+ if (opsize == kSignedByte || opsize == kUnsignedByte) {
+ uses_pc_rel_load = true;
+ }
+ }
+ break;
+
case Instruction::INVOKE_STATIC:
case Instruction::INVOKE_STATIC_RANGE:
- AnalyzeInvokeStatic(opcode, bb, mir);
- break;
+ if (mir_graph_->GetMethodLoweringInfo(mir).IsIntrinsic()) {
+ uses_pc_rel_load = AnalyzeInvokeStaticIntrinsic(mir);
+ break;
+ }
+ FALLTHROUGH_INTENDED;
default:
- // Other instructions are not interesting yet.
+ Mir2Lir::AnalyzeMIR(core_counts, mir, weight);
break;
}
+
+ if (uses_pc_rel_load) {
+ DCHECK(pc_rel_temp_ != nullptr);
+ core_counts[SRegToPMap(pc_rel_temp_->s_reg_low)].count += weight;
+ }
}
-void X86Mir2Lir::AnalyzeFPInstruction(int opcode, BasicBlock* bb, MIR* mir) {
- UNUSED(bb);
+bool X86Mir2Lir::AnalyzeFPInstruction(int opcode, MIR* mir) {
+ DCHECK(!cu_->target64);
// Look at all the uses, and see if they are double constants.
uint64_t attrs = MIRGraph::GetDataFlowAttributes(static_cast<Instruction::Code>(opcode));
int next_sreg = 0;
if (attrs & DF_UA) {
if (attrs & DF_A_WIDE) {
- AnalyzeDoubleUse(mir_graph_->GetSrcWide(mir, next_sreg));
+ if (AnalyzeDoubleUse(mir_graph_->GetSrcWide(mir, next_sreg))) {
+ return true;
+ }
next_sreg += 2;
} else {
next_sreg++;
@@ -1080,7 +1040,9 @@ void X86Mir2Lir::AnalyzeFPInstruction(int opcode, BasicBlock* bb, MIR* mir) {
}
if (attrs & DF_UB) {
if (attrs & DF_B_WIDE) {
- AnalyzeDoubleUse(mir_graph_->GetSrcWide(mir, next_sreg));
+ if (AnalyzeDoubleUse(mir_graph_->GetSrcWide(mir, next_sreg))) {
+ return true;
+ }
next_sreg += 2;
} else {
next_sreg++;
@@ -1088,15 +1050,39 @@ void X86Mir2Lir::AnalyzeFPInstruction(int opcode, BasicBlock* bb, MIR* mir) {
}
if (attrs & DF_UC) {
if (attrs & DF_C_WIDE) {
- AnalyzeDoubleUse(mir_graph_->GetSrcWide(mir, next_sreg));
+ if (AnalyzeDoubleUse(mir_graph_->GetSrcWide(mir, next_sreg))) {
+ return true;
+ }
}
}
+ return false;
}
-void X86Mir2Lir::AnalyzeDoubleUse(RegLocation use) {
+inline bool X86Mir2Lir::AnalyzeDoubleUse(RegLocation use) {
// If this is a double literal, we will want it in the literal pool on 32b platforms.
- if (use.is_const && !cu_->target64) {
- store_method_addr_ = true;
+ DCHECK(!cu_->target64);
+ return use.is_const;
+}
+
+bool X86Mir2Lir::AnalyzeInvokeStaticIntrinsic(MIR* mir) {
+ // 64 bit RIP addressing doesn't need this analysis.
+ DCHECK(!cu_->target64);
+
+ // Retrieve the type of the intrinsic.
+ MethodReference method_ref = mir_graph_->GetMethodLoweringInfo(mir).GetTargetMethod();
+ DCHECK(cu_->compiler_driver->GetMethodInlinerMap() != nullptr);
+ DexFileMethodInliner* method_inliner =
+ cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(method_ref.dex_file);
+ InlineMethod method;
+ bool is_intrinsic = method_inliner->IsIntrinsic(method_ref.dex_method_index, &method);
+ DCHECK(is_intrinsic);
+
+ switch (method.opcode) {
+ case kIntrinsicAbsDouble:
+ case kIntrinsicMinMaxDouble:
+ return true;
+ default:
+ return false;
}
}
@@ -1128,37 +1114,47 @@ RegLocation X86Mir2Lir::UpdateLocWideTyped(RegLocation loc) {
return loc;
}
-void X86Mir2Lir::AnalyzeInvokeStatic(int opcode, BasicBlock* bb, MIR* mir) {
- UNUSED(opcode, bb);
-
- // 64 bit RIP addressing doesn't need store_method_addr_ set.
+LIR* X86Mir2Lir::InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) {
+ UNUSED(r_tgt); // Call to absolute memory location doesn't need a temporary target register.
if (cu_->target64) {
- return;
+ return OpThreadMem(op, GetThreadOffset<8>(trampoline));
+ } else {
+ return OpThreadMem(op, GetThreadOffset<4>(trampoline));
}
+}
- uint32_t index = mir->dalvikInsn.vB;
- DCHECK(cu_->compiler_driver->GetMethodInlinerMap() != nullptr);
- DexFileMethodInliner* method_inliner =
- cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(cu_->dex_file);
- InlineMethod method;
- if (method_inliner->IsIntrinsic(index, &method)) {
- switch (method.opcode) {
- case kIntrinsicAbsDouble:
- case kIntrinsicMinMaxDouble:
- store_method_addr_ = true;
- break;
- default:
- break;
+void X86Mir2Lir::CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs) {
+ // Start with the default counts.
+ Mir2Lir::CountRefs(core_counts, fp_counts, num_regs);
+
+ if (pc_rel_temp_ != nullptr) {
+ // Now, if the dex cache array base temp is used only once outside any loops (weight = 1),
+ // avoid the promotion, otherwise boost the weight by factor 2 because the full PC-relative
+ // load sequence is 3 instructions long and by promoting the PC base we save 2 instructions
+ // per use.
+ int p_map_idx = SRegToPMap(pc_rel_temp_->s_reg_low);
+ if (core_counts[p_map_idx].count == 1) {
+ core_counts[p_map_idx].count = 0;
+ } else {
+ core_counts[p_map_idx].count *= 2;
}
}
}
-LIR* X86Mir2Lir::InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) {
- UNUSED(r_tgt); // Call to absolute memory location doesn't need a temporary target register.
- if (cu_->target64) {
- return OpThreadMem(op, GetThreadOffset<8>(trampoline));
- } else {
- return OpThreadMem(op, GetThreadOffset<4>(trampoline));
+void X86Mir2Lir::DoPromotion() {
+ if (!cu_->target64) {
+ pc_rel_temp_ = mir_graph_->GetNewCompilerTemp(kCompilerTempBackend, false);
+ }
+
+ Mir2Lir::DoPromotion();
+
+ if (pc_rel_temp_ != nullptr) {
+ // Now, if the dex cache array base temp is promoted, remember the register but
+ // always remove the temp's stack location to avoid unnecessarily bloating the stack.
+ pc_rel_base_reg_ = mir_graph_->reg_location_[pc_rel_temp_->s_reg_low].reg;
+ DCHECK(!pc_rel_base_reg_.Valid() || !pc_rel_base_reg_.IsFloat());
+ mir_graph_->RemoveLastCompilerTemp(kCompilerTempBackend, false, pc_rel_temp_);
+ pc_rel_temp_ = nullptr;
}
}
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index 7dea09a579..57db0158e4 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -635,8 +635,6 @@ enum X86OpCode {
kX86CallT, // call fs:[disp]; fs: is equal to Thread::Current(); lir operands - 0: disp
kX86CallI, // call <relative> - 0: disp; Used for core.oat linking only
kX86Ret, // ret; no lir operands
- kX86StartOfMethod, // call 0; pop reg; sub reg, # - generate start of method into reg
- // lir operands - 0: reg
kX86PcRelLoadRA, // mov reg, [base + index * scale + PC relative displacement]
// lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: table
kX86PcRelAdr, // mov reg, PC relative displacement; lir operands - 0: reg, 1: table
@@ -670,7 +668,6 @@ enum X86EncodingKind {
kRegMemCond, // RM instruction kind followed by a condition.
kJmp, kJcc, kCall, // Branch instruction kinds.
kPcRel, // Operation with displacement that is PC relative
- kMacro, // An instruction composing multiple others
kUnimplemented // Encoding used when an instruction isn't yet implemented.
};
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 100d49a99e..c2b837512c 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -31,6 +31,7 @@
#include "base/timing_logger.h"
#include "class_linker.h"
#include "compiled_class.h"
+#include "compiled_method.h"
#include "compiler.h"
#include "compiler_driver-inl.h"
#include "dex_compilation_unit.h"
@@ -62,6 +63,7 @@
#include "thread_pool.h"
#include "trampolines/trampoline_compiler.h"
#include "transaction.h"
+#include "utils/dex_cache_arrays_layout-inl.h"
#include "utils/swap_space.h"
#include "verifier/method_verifier.h"
#include "verifier/method_verifier-inl.h"
@@ -348,6 +350,7 @@ CompilerDriver::CompilerDriver(const CompilerOptions* compiler_options,
verification_results_(verification_results),
method_inliner_map_(method_inliner_map),
compiler_(Compiler::Create(this, compiler_kind)),
+ compiler_kind_(compiler_kind),
instruction_set_(instruction_set),
instruction_set_features_(instruction_set_features),
freezing_constructor_lock_("freezing constructor lock"),
@@ -1173,6 +1176,13 @@ uint32_t CompilerDriver::GetReferenceDisableFlagOffset() const {
return klass->GetDisableIntrinsicFlagOffset().Uint32Value();
}
+DexCacheArraysLayout CompilerDriver::GetDexCacheArraysLayout(const DexFile* dex_file) {
+ // Currently only image dex caches have fixed array layout.
+ return IsImage() && GetSupportBootImageFixup()
+ ? DexCacheArraysLayout(dex_file)
+ : DexCacheArraysLayout();
+}
+
void CompilerDriver::ProcessedInstanceField(bool resolved) {
if (!resolved) {
stats_->UnresolvedInstanceField();
@@ -2205,10 +2215,8 @@ void CompilerDriver::CompileMethod(Thread* self, const DexFile::CodeItem* code_i
InstructionSetHasGenericJniStub(instruction_set_)) {
// Leaving this empty will trigger the generic JNI version
} else {
- if (instruction_set_ != kMips64) { // Use generic JNI for Mips64 (temporarily).
- compiled_method = compiler_->JniCompile(access_flags, method_idx, dex_file);
- CHECK(compiled_method != nullptr);
- }
+ compiled_method = compiler_->JniCompile(access_flags, method_idx, dex_file);
+ CHECK(compiled_method != nullptr);
}
} else if ((access_flags & kAccAbstract) != 0) {
// Abstract methods don't have code.
@@ -2246,7 +2254,7 @@ void CompilerDriver::CompileMethod(Thread* self, const DexFile::CodeItem* code_i
// Count non-relative linker patches.
size_t non_relative_linker_patch_count = 0u;
for (const LinkerPatch& patch : compiled_method->GetPatches()) {
- if (patch.Type() != kLinkerPatchCallRelative) {
+ if (!patch.IsPcRelative()) {
++non_relative_linker_patch_count;
}
}
@@ -2263,8 +2271,11 @@ void CompilerDriver::CompileMethod(Thread* self, const DexFile::CodeItem* code_i
DCHECK(GetCompiledMethod(method_ref) != nullptr) << PrettyMethod(method_idx, dex_file);
}
- // Done compiling, delete the verified method to reduce native memory usage.
- verification_results_->RemoveVerifiedMethod(method_ref);
+ // Done compiling, delete the verified method to reduce native memory usage. Do not delete in
+ // optimizing compiler, which may need the verified method again for inlining.
+ if (compiler_kind_ != Compiler::kOptimizing) {
+ verification_results_->RemoveVerifiedMethod(method_ref);
+ }
if (self->IsExceptionPending()) {
ScopedObjectAccess soa(self);
@@ -2359,44 +2370,6 @@ bool CompilerDriver::WriteElf(const std::string& android_root,
SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
return compiler_->WriteElf(file, oat_writer, dex_files, android_root, is_host);
}
-void CompilerDriver::InstructionSetToLLVMTarget(InstructionSet instruction_set,
- std::string* target_triple,
- std::string* target_cpu,
- std::string* target_attr) {
- switch (instruction_set) {
- case kThumb2:
- *target_triple = "thumb-none-linux-gnueabi";
- *target_cpu = "cortex-a9";
- *target_attr = "+thumb2,+neon,+neonfp,+vfp3,+db";
- break;
-
- case kArm:
- *target_triple = "armv7-none-linux-gnueabi";
- // TODO: Fix for Nexus S.
- *target_cpu = "cortex-a9";
- // TODO: Fix for Xoom.
- *target_attr = "+v7,+neon,+neonfp,+vfp3,+db";
- break;
-
- case kX86:
- *target_triple = "i386-pc-linux-gnu";
- *target_attr = "";
- break;
-
- case kX86_64:
- *target_triple = "x86_64-pc-linux-gnu";
- *target_attr = "";
- break;
-
- case kMips:
- *target_triple = "mipsel-unknown-linux";
- *target_attr = "mips32r2";
- break;
-
- default:
- LOG(FATAL) << "Unknown instruction set: " << instruction_set;
- }
- }
bool CompilerDriver::SkipCompilation(const std::string& method_name) {
if (!profile_present_) {
@@ -2438,7 +2411,7 @@ std::string CompilerDriver::GetMemoryUsageString(bool extended) const {
gc::Heap* const heap = runtime->GetHeap();
oss << "arena alloc=" << PrettySize(arena_pool->GetBytesAllocated());
oss << " java alloc=" << PrettySize(heap->GetBytesAllocated());
-#ifdef HAVE_MALLOC_H
+#if defined(__BIONIC__) || defined(__GLIBC__)
struct mallinfo info = mallinfo();
const size_t allocated_space = static_cast<size_t>(info.uordblks);
const size_t free_space = static_cast<size_t>(info.fordblks);
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index b825293c33..a6ed5590dc 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -26,11 +26,8 @@
#include "base/mutex.h"
#include "base/timing_logger.h"
#include "class_reference.h"
-#include "compiled_method.h"
#include "compiler.h"
#include "dex_file.h"
-#include "dex/verified_method.h"
-#include "driver/compiler_options.h"
#include "invoke_type.h"
#include "method_reference.h"
#include "mirror/class.h" // For mirror::Class::Status.
@@ -39,7 +36,9 @@
#include "runtime.h"
#include "safe_map.h"
#include "thread_pool.h"
+#include "utils/array_ref.h"
#include "utils/dedupe_set.h"
+#include "utils/dex_cache_arrays_layout.h"
#include "utils/swap_space.h"
#include "utils.h"
@@ -54,6 +53,7 @@ class MethodVerifier;
} // namespace verifier
class CompiledClass;
+class CompiledMethod;
class CompilerOptions;
class DexCompilationUnit;
class DexFileToMethodInlinerMap;
@@ -62,6 +62,9 @@ class InstructionSetFeatures;
class OatWriter;
class ParallelCompilationManager;
class ScopedObjectAccess;
+template <class Allocator> class SrcMap;
+class SrcMapElem;
+using SwapSrcMap = SrcMap<SwapAllocator<SrcMapElem>>;
template<class T> class Handle;
class TimingLogger;
class VerificationResults;
@@ -318,6 +321,10 @@ class CompilerDriver {
bool IsMethodsClassInitialized(mirror::Class* referrer_class, mirror::ArtMethod* resolved_method)
SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+ // Get the layout of dex cache arrays for a dex file. Returns invalid layout if the
+ // dex cache arrays don't have a fixed layout.
+ DexCacheArraysLayout GetDexCacheArraysLayout(const DexFile* dex_file);
+
void ProcessedInstanceField(bool resolved);
void ProcessedStaticField(bool resolved, bool local);
void ProcessedInvoke(InvokeType invoke_type, int flags);
@@ -378,12 +385,6 @@ class CompilerDriver {
OatWriter* oat_writer,
File* file);
- // TODO: move to a common home for llvm helpers once quick/portable are merged.
- static void InstructionSetToLLVMTarget(InstructionSet instruction_set,
- std::string* target_triple,
- std::string* target_cpu,
- std::string* target_attr);
-
void SetCompilerContext(void* compiler_context) {
compiler_context_ = compiler_context;
}
@@ -550,6 +551,7 @@ class CompilerDriver {
DexFileToMethodInlinerMap* const method_inliner_map_;
std::unique_ptr<Compiler> compiler_;
+ Compiler::Kind compiler_kind_;
const InstructionSet instruction_set_;
const InstructionSetFeatures* const instruction_set_features_;
diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc
index e436f52db3..fc00c926b2 100644
--- a/compiler/driver/compiler_options.cc
+++ b/compiler/driver/compiler_options.cc
@@ -42,6 +42,11 @@ CompilerOptions::CompilerOptions()
init_failure_output_(nullptr) {
}
+CompilerOptions::~CompilerOptions() {
+ // The destructor looks empty but it destroys a PassManagerOptions object. We keep it here
+ // because we don't want to include the PassManagerOptions definition from the header file.
+}
+
CompilerOptions::CompilerOptions(CompilerFilter compiler_filter,
size_t huge_method_threshold,
size_t large_method_threshold,
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index d06ec278ab..f7ea385e19 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -53,6 +53,7 @@ class CompilerOptions FINAL {
static const bool kDefaultIncludePatchInformation = false;
CompilerOptions();
+ ~CompilerOptions();
CompilerOptions(CompilerFilter compiler_filter,
size_t huge_method_threshold,
diff --git a/compiler/dwarf/debug_frame_opcode_writer.h b/compiler/dwarf/debug_frame_opcode_writer.h
new file mode 100644
index 0000000000..d0d182106f
--- /dev/null
+++ b/compiler/dwarf/debug_frame_opcode_writer.h
@@ -0,0 +1,333 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DWARF_DEBUG_FRAME_OPCODE_WRITER_H_
+#define ART_COMPILER_DWARF_DEBUG_FRAME_OPCODE_WRITER_H_
+
+#include "dwarf.h"
+#include "register.h"
+#include "writer.h"
+#include "utils.h"
+
+namespace art {
+namespace dwarf {
+
+// Writer for .debug_frame opcodes (DWARF-3).
+// See the DWARF specification for the precise meaning of the opcodes.
+// The writer is very light-weight, however it will do the following for you:
+// * Choose the most compact encoding of a given opcode.
+// * Keep track of current state and convert absolute values to deltas.
+// * Divide by header-defined factors as appropriate.
+template<typename Allocator = std::allocator<uint8_t> >
+class DebugFrameOpCodeWriter : private Writer<Allocator> {
+ public:
+ // To save space, DWARF divides most offsets by header-defined factors.
+ // They are used in integer divisions, so we make them constants.
+ // We usually subtract from stack base pointer, so making the factor
+ // negative makes the encoded values positive and thus easier to encode.
+ static constexpr int kDataAlignmentFactor = -4;
+ static constexpr int kCodeAlignmentFactor = 1;
+
+ // Explicitely advance the program counter to given location.
+ void ALWAYS_INLINE AdvancePC(int absolute_pc) {
+ DCHECK_GE(absolute_pc, current_pc_);
+ if (UNLIKELY(enabled_)) {
+ int delta = FactorCodeOffset(absolute_pc - current_pc_);
+ if (delta != 0) {
+ if (delta <= 0x3F) {
+ this->PushUint8(DW_CFA_advance_loc | delta);
+ } else if (delta <= UINT8_MAX) {
+ this->PushUint8(DW_CFA_advance_loc1);
+ this->PushUint8(delta);
+ } else if (delta <= UINT16_MAX) {
+ this->PushUint8(DW_CFA_advance_loc2);
+ this->PushUint16(delta);
+ } else {
+ this->PushUint8(DW_CFA_advance_loc4);
+ this->PushUint32(delta);
+ }
+ }
+ current_pc_ = absolute_pc;
+ }
+ }
+
+ // Override this method to automatically advance the PC before each opcode.
+ virtual void ImplicitlyAdvancePC() { }
+
+ // Common alias in assemblers - spill relative to current stack pointer.
+ void ALWAYS_INLINE RelOffset(Reg reg, int offset) {
+ Offset(reg, offset - current_cfa_offset_);
+ }
+
+ // Common alias in assemblers - increase stack frame size.
+ void ALWAYS_INLINE AdjustCFAOffset(int delta) {
+ DefCFAOffset(current_cfa_offset_ + delta);
+ }
+
+ // Custom alias - spill many registers based on bitmask.
+ void ALWAYS_INLINE RelOffsetForMany(Reg reg_base, int offset,
+ uint32_t reg_mask, int reg_size) {
+ DCHECK(reg_size == 4 || reg_size == 8);
+ if (UNLIKELY(enabled_)) {
+ for (int i = 0; reg_mask != 0u; reg_mask >>= 1, i++) {
+ // Skip zero bits and go to the set bit.
+ int num_zeros = CTZ(reg_mask);
+ i += num_zeros;
+ reg_mask >>= num_zeros;
+ RelOffset(Reg(reg_base.num() + i), offset);
+ offset += reg_size;
+ }
+ }
+ }
+
+ // Custom alias - unspill many registers based on bitmask.
+ void ALWAYS_INLINE RestoreMany(Reg reg_base, uint32_t reg_mask) {
+ if (UNLIKELY(enabled_)) {
+ for (int i = 0; reg_mask != 0u; reg_mask >>= 1, i++) {
+ // Skip zero bits and go to the set bit.
+ int num_zeros = CTZ(reg_mask);
+ i += num_zeros;
+ reg_mask >>= num_zeros;
+ Restore(Reg(reg_base.num() + i));
+ }
+ }
+ }
+
+ void ALWAYS_INLINE Nop() {
+ if (UNLIKELY(enabled_)) {
+ this->PushUint8(DW_CFA_nop);
+ }
+ }
+
+ void ALWAYS_INLINE Offset(Reg reg, int offset) {
+ if (UNLIKELY(enabled_)) {
+ ImplicitlyAdvancePC();
+ int factored_offset = FactorDataOffset(offset); // May change sign.
+ if (factored_offset >= 0) {
+ if (0 <= reg.num() && reg.num() <= 0x3F) {
+ this->PushUint8(DW_CFA_offset | reg.num());
+ this->PushUleb128(factored_offset);
+ } else {
+ this->PushUint8(DW_CFA_offset_extended);
+ this->PushUleb128(reg.num());
+ this->PushUleb128(factored_offset);
+ }
+ } else {
+ uses_dwarf3_features_ = true;
+ this->PushUint8(DW_CFA_offset_extended_sf);
+ this->PushUleb128(reg.num());
+ this->PushSleb128(factored_offset);
+ }
+ }
+ }
+
+ void ALWAYS_INLINE Restore(Reg reg) {
+ if (UNLIKELY(enabled_)) {
+ ImplicitlyAdvancePC();
+ if (0 <= reg.num() && reg.num() <= 0x3F) {
+ this->PushUint8(DW_CFA_restore | reg.num());
+ } else {
+ this->PushUint8(DW_CFA_restore_extended);
+ this->PushUleb128(reg.num());
+ }
+ }
+ }
+
+ void ALWAYS_INLINE Undefined(Reg reg) {
+ if (UNLIKELY(enabled_)) {
+ ImplicitlyAdvancePC();
+ this->PushUint8(DW_CFA_undefined);
+ this->PushUleb128(reg.num());
+ }
+ }
+
+ void ALWAYS_INLINE SameValue(Reg reg) {
+ if (UNLIKELY(enabled_)) {
+ ImplicitlyAdvancePC();
+ this->PushUint8(DW_CFA_same_value);
+ this->PushUleb128(reg.num());
+ }
+ }
+
+ // The previous value of "reg" is stored in register "new_reg".
+ void ALWAYS_INLINE Register(Reg reg, Reg new_reg) {
+ if (UNLIKELY(enabled_)) {
+ ImplicitlyAdvancePC();
+ this->PushUint8(DW_CFA_register);
+ this->PushUleb128(reg.num());
+ this->PushUleb128(new_reg.num());
+ }
+ }
+
+ void ALWAYS_INLINE RememberState() {
+ if (UNLIKELY(enabled_)) {
+ ImplicitlyAdvancePC();
+ this->PushUint8(DW_CFA_remember_state);
+ }
+ }
+
+ void ALWAYS_INLINE RestoreState() {
+ if (UNLIKELY(enabled_)) {
+ ImplicitlyAdvancePC();
+ this->PushUint8(DW_CFA_restore_state);
+ }
+ }
+
+ void ALWAYS_INLINE DefCFA(Reg reg, int offset) {
+ if (UNLIKELY(enabled_)) {
+ ImplicitlyAdvancePC();
+ if (offset >= 0) {
+ this->PushUint8(DW_CFA_def_cfa);
+ this->PushUleb128(reg.num());
+ this->PushUleb128(offset); // Non-factored.
+ } else {
+ uses_dwarf3_features_ = true;
+ this->PushUint8(DW_CFA_def_cfa_sf);
+ this->PushUleb128(reg.num());
+ this->PushSleb128(FactorDataOffset(offset));
+ }
+ }
+ current_cfa_offset_ = offset;
+ }
+
+ void ALWAYS_INLINE DefCFARegister(Reg reg) {
+ if (UNLIKELY(enabled_)) {
+ ImplicitlyAdvancePC();
+ this->PushUint8(DW_CFA_def_cfa_register);
+ this->PushUleb128(reg.num());
+ }
+ }
+
+ void ALWAYS_INLINE DefCFAOffset(int offset) {
+ if (UNLIKELY(enabled_)) {
+ if (current_cfa_offset_ != offset) {
+ ImplicitlyAdvancePC();
+ if (offset >= 0) {
+ this->PushUint8(DW_CFA_def_cfa_offset);
+ this->PushUleb128(offset); // Non-factored.
+ } else {
+ uses_dwarf3_features_ = true;
+ this->PushUint8(DW_CFA_def_cfa_offset_sf);
+ this->PushSleb128(FactorDataOffset(offset));
+ }
+ }
+ }
+ // Uncoditional so that the user can still get and check the value.
+ current_cfa_offset_ = offset;
+ }
+
+ void ALWAYS_INLINE ValOffset(Reg reg, int offset) {
+ if (UNLIKELY(enabled_)) {
+ ImplicitlyAdvancePC();
+ uses_dwarf3_features_ = true;
+ int factored_offset = FactorDataOffset(offset); // May change sign.
+ if (factored_offset >= 0) {
+ this->PushUint8(DW_CFA_val_offset);
+ this->PushUleb128(reg.num());
+ this->PushUleb128(factored_offset);
+ } else {
+ this->PushUint8(DW_CFA_val_offset_sf);
+ this->PushUleb128(reg.num());
+ this->PushSleb128(factored_offset);
+ }
+ }
+ }
+
+ void ALWAYS_INLINE DefCFAExpression(void * expr, int expr_size) {
+ if (UNLIKELY(enabled_)) {
+ ImplicitlyAdvancePC();
+ uses_dwarf3_features_ = true;
+ this->PushUint8(DW_CFA_def_cfa_expression);
+ this->PushUleb128(expr_size);
+ this->PushData(expr, expr_size);
+ }
+ }
+
+ void ALWAYS_INLINE Expression(Reg reg, void * expr, int expr_size) {
+ if (UNLIKELY(enabled_)) {
+ ImplicitlyAdvancePC();
+ uses_dwarf3_features_ = true;
+ this->PushUint8(DW_CFA_expression);
+ this->PushUleb128(reg.num());
+ this->PushUleb128(expr_size);
+ this->PushData(expr, expr_size);
+ }
+ }
+
+ void ALWAYS_INLINE ValExpression(Reg reg, void * expr, int expr_size) {
+ if (UNLIKELY(enabled_)) {
+ ImplicitlyAdvancePC();
+ uses_dwarf3_features_ = true;
+ this->PushUint8(DW_CFA_val_expression);
+ this->PushUleb128(reg.num());
+ this->PushUleb128(expr_size);
+ this->PushData(expr, expr_size);
+ }
+ }
+
+ bool IsEnabled() const { return enabled_; }
+
+ void SetEnabled(bool value) { enabled_ = value; }
+
+ int GetCurrentPC() const { return current_pc_; }
+
+ int GetCurrentCFAOffset() const { return current_cfa_offset_; }
+
+ void SetCurrentCFAOffset(int offset) { current_cfa_offset_ = offset; }
+
+ using Writer<Allocator>::data;
+
+ DebugFrameOpCodeWriter(bool enabled = true,
+ const Allocator& alloc = Allocator())
+ : Writer<Allocator>(&opcodes_),
+ enabled_(enabled),
+ opcodes_(alloc),
+ current_cfa_offset_(0),
+ current_pc_(0),
+ uses_dwarf3_features_(false) {
+ if (enabled) {
+ // Best guess based on couple of observed outputs.
+ opcodes_.reserve(16);
+ }
+ }
+
+ virtual ~DebugFrameOpCodeWriter() { }
+
+ protected:
+ int FactorDataOffset(int offset) const {
+ DCHECK_EQ(offset % kDataAlignmentFactor, 0);
+ return offset / kDataAlignmentFactor;
+ }
+
+ int FactorCodeOffset(int offset) const {
+ DCHECK_EQ(offset % kCodeAlignmentFactor, 0);
+ return offset / kCodeAlignmentFactor;
+ }
+
+ bool enabled_; // If disabled all writes are no-ops.
+ std::vector<uint8_t, Allocator> opcodes_;
+ int current_cfa_offset_;
+ int current_pc_;
+ bool uses_dwarf3_features_;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(DebugFrameOpCodeWriter);
+};
+
+} // namespace dwarf
+} // namespace art
+
+#endif // ART_COMPILER_DWARF_DEBUG_FRAME_OPCODE_WRITER_H_
diff --git a/compiler/dwarf/debug_info_entry_writer.h b/compiler/dwarf/debug_info_entry_writer.h
new file mode 100644
index 0000000000..c0350b6f8a
--- /dev/null
+++ b/compiler/dwarf/debug_info_entry_writer.h
@@ -0,0 +1,248 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DWARF_DEBUG_INFO_ENTRY_WRITER_H_
+#define ART_COMPILER_DWARF_DEBUG_INFO_ENTRY_WRITER_H_
+
+#include <unordered_map>
+
+#include "dwarf.h"
+#include "leb128.h"
+#include "writer.h"
+
+namespace art {
+namespace dwarf {
+
+// 32-bit FNV-1a hash function which we use to find duplicate abbreviations.
+// See http://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
+template< typename Allocator >
+struct FNVHash {
+ size_t operator()(const std::vector<uint8_t, Allocator>& v) const {
+ uint32_t hash = 2166136261u;
+ for (size_t i = 0; i < v.size(); i++) {
+ hash = (hash ^ v[i]) * 16777619u;
+ }
+ return hash;
+ }
+};
+
+/*
+ * Writer for debug information entries (DIE).
+ * It also handles generation of abbreviations.
+ *
+ * Usage:
+ * StartTag(DW_TAG_compile_unit, DW_CHILDREN_yes);
+ * WriteStrp(DW_AT_producer, "Compiler name", debug_str);
+ * StartTag(DW_TAG_subprogram, DW_CHILDREN_no);
+ * WriteStrp(DW_AT_name, "Foo", debug_str);
+ * EndTag();
+ * EndTag();
+ */
+template< typename Allocator = std::allocator<uint8_t> >
+class DebugInfoEntryWriter FINAL : private Writer<Allocator> {
+ public:
+ // Start debugging information entry.
+ void StartTag(Tag tag, Children children) {
+ DCHECK(has_children) << "This tag can not have nested tags";
+ if (inside_entry_) {
+ // Write abbrev code for the previous entry.
+ this->UpdateUleb128(abbrev_code_offset_, EndAbbrev());
+ inside_entry_ = false;
+ }
+ StartAbbrev(tag, children);
+ // Abbrev code placeholder of sufficient size.
+ abbrev_code_offset_ = this->data()->size();
+ this->PushUleb128(NextAbbrevCode());
+ depth_++;
+ inside_entry_ = true;
+ has_children = (children == DW_CHILDREN_yes);
+ }
+
+ // End debugging information entry.
+ void EndTag() {
+ DCHECK_GT(depth_, 0);
+ if (inside_entry_) {
+ // Write abbrev code for this tag.
+ this->UpdateUleb128(abbrev_code_offset_, EndAbbrev());
+ inside_entry_ = false;
+ }
+ if (has_children) {
+ this->PushUint8(0); // End of children.
+ }
+ depth_--;
+ has_children = true; // Parent tag obviously has children.
+ }
+
+ void WriteAddr(Attribute attrib, uint64_t value) {
+ AddAbbrevAttribute(attrib, DW_FORM_addr);
+ if (is64bit_) {
+ this->PushUint64(value);
+ } else {
+ this->PushUint32(value);
+ }
+ }
+
+ void WriteBlock(Attribute attrib, const void* ptr, int size) {
+ AddAbbrevAttribute(attrib, DW_FORM_block);
+ this->PushUleb128(size);
+ this->PushData(ptr, size);
+ }
+
+ void WriteData1(Attribute attrib, uint8_t value) {
+ AddAbbrevAttribute(attrib, DW_FORM_data1);
+ this->PushUint8(value);
+ }
+
+ void WriteData2(Attribute attrib, uint16_t value) {
+ AddAbbrevAttribute(attrib, DW_FORM_data2);
+ this->PushUint16(value);
+ }
+
+ void WriteData4(Attribute attrib, uint32_t value) {
+ AddAbbrevAttribute(attrib, DW_FORM_data4);
+ this->PushUint32(value);
+ }
+
+ void WriteData8(Attribute attrib, uint64_t value) {
+ AddAbbrevAttribute(attrib, DW_FORM_data8);
+ this->PushUint64(value);
+ }
+
+ void WriteSdata(Attribute attrib, int value) {
+ AddAbbrevAttribute(attrib, DW_FORM_sdata);
+ this->PushSleb128(value);
+ }
+
+ void WriteUdata(Attribute attrib, int value) {
+ AddAbbrevAttribute(attrib, DW_FORM_udata);
+ this->PushUleb128(value);
+ }
+
+ void WriteUdata(Attribute attrib, uint32_t value) {
+ AddAbbrevAttribute(attrib, DW_FORM_udata);
+ this->PushUleb128(value);
+ }
+
+ void WriteFlag(Attribute attrib, bool value) {
+ AddAbbrevAttribute(attrib, DW_FORM_flag);
+ this->PushUint8(value ? 1 : 0);
+ }
+
+ void WriteRef4(Attribute attrib, int cu_offset) {
+ AddAbbrevAttribute(attrib, DW_FORM_ref4);
+ this->PushUint32(cu_offset);
+ }
+
+ void WriteRef(Attribute attrib, int cu_offset) {
+ AddAbbrevAttribute(attrib, DW_FORM_ref_udata);
+ this->PushUleb128(cu_offset);
+ }
+
+ void WriteString(Attribute attrib, const char* value) {
+ AddAbbrevAttribute(attrib, DW_FORM_string);
+ this->PushString(value);
+ }
+
+ void WriteStrp(Attribute attrib, int address) {
+ AddAbbrevAttribute(attrib, DW_FORM_strp);
+ this->PushUint32(address);
+ }
+
+ void WriteStrp(Attribute attrib, const char* value, std::vector<uint8_t>* debug_str) {
+ AddAbbrevAttribute(attrib, DW_FORM_strp);
+ int address = debug_str->size();
+ debug_str->insert(debug_str->end(), value, value + strlen(value) + 1);
+ this->PushUint32(address);
+ }
+
+ bool is64bit() const { return is64bit_; }
+
+ using Writer<Allocator>::data;
+
+ DebugInfoEntryWriter(bool is64bitArch,
+ std::vector<uint8_t, Allocator>* debug_abbrev,
+ const Allocator& alloc = Allocator())
+ : Writer<Allocator>(&entries_),
+ debug_abbrev_(debug_abbrev),
+ current_abbrev_(alloc),
+ abbrev_codes_(alloc),
+ entries_(alloc),
+ is64bit_(is64bitArch) {
+ debug_abbrev_.PushUint8(0); // Add abbrev table terminator.
+ }
+
+ ~DebugInfoEntryWriter() {
+ DCHECK_EQ(depth_, 0);
+ }
+
+ private:
+ // Start abbreviation declaration.
+ void StartAbbrev(Tag tag, Children children) {
+ DCHECK(!inside_entry_);
+ current_abbrev_.clear();
+ EncodeUnsignedLeb128(&current_abbrev_, tag);
+ current_abbrev_.push_back(children);
+ }
+
+ // Add attribute specification.
+ void AddAbbrevAttribute(Attribute name, Form type) {
+ DCHECK(inside_entry_) << "Call StartTag before adding attributes.";
+ EncodeUnsignedLeb128(&current_abbrev_, name);
+ EncodeUnsignedLeb128(&current_abbrev_, type);
+ }
+
+ int NextAbbrevCode() {
+ return 1 + abbrev_codes_.size();
+ }
+
+ // End abbreviation declaration and return its code.
+ int EndAbbrev() {
+ DCHECK(inside_entry_);
+ auto it = abbrev_codes_.insert(std::make_pair(std::move(current_abbrev_),
+ NextAbbrevCode()));
+ int abbrev_code = it.first->second;
+ if (UNLIKELY(it.second)) { // Inserted new entry.
+ const std::vector<uint8_t, Allocator>& abbrev = it.first->first;
+ debug_abbrev_.Pop(); // Remove abbrev table terminator.
+ debug_abbrev_.PushUleb128(abbrev_code);
+ debug_abbrev_.PushData(abbrev.data(), abbrev.size());
+ debug_abbrev_.PushUint8(0); // Attribute list end.
+ debug_abbrev_.PushUint8(0); // Attribute list end.
+ debug_abbrev_.PushUint8(0); // Add abbrev table terminator.
+ }
+ return abbrev_code;
+ }
+
+ private:
+ // Fields for writing and deduplication of abbrevs.
+ Writer<Allocator> debug_abbrev_;
+ std::vector<uint8_t, Allocator> current_abbrev_;
+ std::unordered_map<std::vector<uint8_t, Allocator>, int,
+ FNVHash<Allocator> > abbrev_codes_;
+
+ // Fields for writing of debugging information entries.
+ std::vector<uint8_t, Allocator> entries_;
+ bool is64bit_;
+ int depth_ = 0;
+ size_t abbrev_code_offset_ = 0; // Location to patch once we know the code.
+ bool inside_entry_ = false; // Entry ends at first child (if any).
+ bool has_children = true;
+};
+
+} // namespace dwarf
+} // namespace art
+
+#endif // ART_COMPILER_DWARF_DEBUG_INFO_ENTRY_WRITER_H_
diff --git a/compiler/dwarf/debug_line_opcode_writer.h b/compiler/dwarf/debug_line_opcode_writer.h
new file mode 100644
index 0000000000..f34acee647
--- /dev/null
+++ b/compiler/dwarf/debug_line_opcode_writer.h
@@ -0,0 +1,243 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DWARF_DEBUG_LINE_OPCODE_WRITER_H_
+#define ART_COMPILER_DWARF_DEBUG_LINE_OPCODE_WRITER_H_
+
+#include "dwarf.h"
+#include "writer.h"
+
+namespace art {
+namespace dwarf {
+
+// Writer for the .debug_line opcodes (DWARF-3).
+// The writer is very light-weight, however it will do the following for you:
+// * Choose the most compact encoding of a given opcode.
+// * Keep track of current state and convert absolute values to deltas.
+// * Divide by header-defined factors as appropriate.
+template<typename Allocator = std::allocator<uint8_t>>
+class DebugLineOpCodeWriter FINAL : private Writer<Allocator> {
+ public:
+ static constexpr int kOpcodeBase = 13;
+ static constexpr bool kDefaultIsStmt = true;
+ static constexpr int kLineBase = -5;
+ static constexpr int kLineRange = 14;
+
+ void AddRow() {
+ this->PushUint8(DW_LNS_copy);
+ }
+
+ void AdvancePC(uint64_t absolute_address) {
+ DCHECK_NE(current_address_, 0u); // Use SetAddress for the first advance.
+ DCHECK_GE(absolute_address, current_address_);
+ if (absolute_address != current_address_) {
+ uint64_t delta = FactorCodeOffset(absolute_address - current_address_);
+ if (delta <= INT32_MAX) {
+ this->PushUint8(DW_LNS_advance_pc);
+ this->PushUleb128(static_cast<int>(delta));
+ current_address_ = absolute_address;
+ } else {
+ SetAddress(absolute_address);
+ }
+ }
+ }
+
+ void AdvanceLine(int absolute_line) {
+ int delta = absolute_line - current_line_;
+ if (delta != 0) {
+ this->PushUint8(DW_LNS_advance_line);
+ this->PushSleb128(delta);
+ current_line_ = absolute_line;
+ }
+ }
+
+ void SetFile(int file) {
+ if (current_file_ != file) {
+ this->PushUint8(DW_LNS_set_file);
+ this->PushUleb128(file);
+ current_file_ = file;
+ }
+ }
+
+ void SetColumn(int column) {
+ this->PushUint8(DW_LNS_set_column);
+ this->PushUleb128(column);
+ }
+
+ void NegateStmt() {
+ this->PushUint8(DW_LNS_negate_stmt);
+ }
+
+ void SetBasicBlock() {
+ this->PushUint8(DW_LNS_set_basic_block);
+ }
+
+ void SetPrologueEnd() {
+ uses_dwarf3_features_ = true;
+ this->PushUint8(DW_LNS_set_prologue_end);
+ }
+
+ void SetEpilogueBegin() {
+ uses_dwarf3_features_ = true;
+ this->PushUint8(DW_LNS_set_epilogue_begin);
+ }
+
+ void SetISA(int isa) {
+ uses_dwarf3_features_ = true;
+ this->PushUint8(DW_LNS_set_isa);
+ this->PushUleb128(isa);
+ }
+
+ void EndSequence() {
+ this->PushUint8(0);
+ this->PushUleb128(1);
+ this->PushUint8(DW_LNE_end_sequence);
+ current_address_ = 0;
+ current_file_ = 1;
+ current_line_ = 1;
+ }
+
+ // Uncoditionally set address using the long encoding.
+ // This gives the linker opportunity to relocate the address.
+ void SetAddress(uint64_t absolute_address) {
+ DCHECK_GE(absolute_address, current_address_);
+ FactorCodeOffset(absolute_address); // Check if it is factorable.
+ this->PushUint8(0);
+ if (use_64bit_address_) {
+ this->PushUleb128(1 + 8);
+ this->PushUint8(DW_LNE_set_address);
+ this->PushUint64(absolute_address);
+ } else {
+ this->PushUleb128(1 + 4);
+ this->PushUint8(DW_LNE_set_address);
+ this->PushUint32(absolute_address);
+ }
+ current_address_ = absolute_address;
+ }
+
+ void DefineFile(const char* filename,
+ int directory_index,
+ int modification_time,
+ int file_size) {
+ int size = 1 +
+ strlen(filename) + 1 +
+ UnsignedLeb128Size(directory_index) +
+ UnsignedLeb128Size(modification_time) +
+ UnsignedLeb128Size(file_size);
+ this->PushUint8(0);
+ this->PushUleb128(size);
+ size_t start = data()->size();
+ this->PushUint8(DW_LNE_define_file);
+ this->PushString(filename);
+ this->PushUleb128(directory_index);
+ this->PushUleb128(modification_time);
+ this->PushUleb128(file_size);
+ DCHECK_EQ(start + size, data()->size());
+ }
+
+ // Compact address and line opcode.
+ void AddRow(uint64_t absolute_address, int absolute_line) {
+ DCHECK_GE(absolute_address, current_address_);
+
+ // If the address is definitely too far, use the long encoding.
+ uint64_t delta_address = FactorCodeOffset(absolute_address - current_address_);
+ if (delta_address > UINT8_MAX) {
+ AdvancePC(absolute_address);
+ delta_address = 0;
+ }
+
+ // If the line is definitely too far, use the long encoding.
+ int delta_line = absolute_line - current_line_;
+ if (!(kLineBase <= delta_line && delta_line < kLineBase + kLineRange)) {
+ AdvanceLine(absolute_line);
+ delta_line = 0;
+ }
+
+ // Both address and line should be reasonable now. Use the short encoding.
+ int opcode = kOpcodeBase + (delta_line - kLineBase) +
+ (static_cast<int>(delta_address) * kLineRange);
+ if (opcode > UINT8_MAX) {
+ // If the address is still too far, try to increment it by const amount.
+ int const_advance = (0xFF - kOpcodeBase) / kLineRange;
+ opcode -= (kLineRange * const_advance);
+ if (opcode <= UINT8_MAX) {
+ this->PushUint8(DW_LNS_const_add_pc);
+ } else {
+ // Give up and use long encoding for address.
+ AdvancePC(absolute_address);
+ // Still use the opcode to do line advance and copy.
+ opcode = kOpcodeBase + (delta_line - kLineBase);
+ }
+ }
+ DCHECK(kOpcodeBase <= opcode && opcode <= 0xFF);
+ this->PushUint8(opcode); // Special opcode.
+ current_line_ = absolute_line;
+ current_address_ = absolute_address;
+ }
+
+ int GetCodeFactorBits() const {
+ return code_factor_bits_;
+ }
+
+ uint64_t CurrentAddress() const {
+ return current_address_;
+ }
+
+ int CurrentFile() const {
+ return current_file_;
+ }
+
+ int CurrentLine() const {
+ return current_line_;
+ }
+
+ using Writer<Allocator>::data;
+
+ DebugLineOpCodeWriter(bool use64bitAddress,
+ int codeFactorBits,
+ const Allocator& alloc = Allocator())
+ : Writer<Allocator>(&opcodes_),
+ opcodes_(alloc),
+ uses_dwarf3_features_(false),
+ use_64bit_address_(use64bitAddress),
+ code_factor_bits_(codeFactorBits),
+ current_address_(0),
+ current_file_(1),
+ current_line_(1) {
+ }
+
+ private:
+ uint64_t FactorCodeOffset(uint64_t offset) const {
+ DCHECK_GE(code_factor_bits_, 0);
+ DCHECK_EQ((offset >> code_factor_bits_) << code_factor_bits_, offset);
+ return offset >> code_factor_bits_;
+ }
+
+ std::vector<uint8_t, Allocator> opcodes_;
+ bool uses_dwarf3_features_;
+ bool use_64bit_address_;
+ int code_factor_bits_;
+ uint64_t current_address_;
+ int current_file_;
+ int current_line_;
+
+ DISALLOW_COPY_AND_ASSIGN(DebugLineOpCodeWriter);
+};
+
+} // namespace dwarf
+} // namespace art
+
+#endif // ART_COMPILER_DWARF_DEBUG_LINE_OPCODE_WRITER_H_
diff --git a/compiler/dwarf/dwarf_test.cc b/compiler/dwarf/dwarf_test.cc
new file mode 100644
index 0000000000..ec18e96b4b
--- /dev/null
+++ b/compiler/dwarf/dwarf_test.cc
@@ -0,0 +1,281 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dwarf_test.h"
+
+#include "dwarf/debug_frame_opcode_writer.h"
+#include "dwarf/debug_info_entry_writer.h"
+#include "dwarf/debug_line_opcode_writer.h"
+#include "dwarf/headers.h"
+#include "gtest/gtest.h"
+
+namespace art {
+namespace dwarf {
+
+// Run the tests only on host since we need objdump.
+#ifndef HAVE_ANDROID_OS
+
+TEST_F(DwarfTest, DebugFrame) {
+ const bool is64bit = false;
+
+ // Pick offset value which would catch Uleb vs Sleb errors.
+ const int offset = 40000;
+ ASSERT_EQ(UnsignedLeb128Size(offset / 4), 2u);
+ ASSERT_EQ(SignedLeb128Size(offset / 4), 3u);
+ DW_CHECK("Data alignment factor: -4");
+ const Reg reg(6);
+
+ // Test the opcodes in the order mentioned in the spec.
+ // There are usually several encoding variations of each opcode.
+ DebugFrameOpCodeWriter<> opcodes;
+ DW_CHECK("FDE");
+ int pc = 0;
+ for (int i : {0, 1, 0x3F, 0x40, 0xFF, 0x100, 0xFFFF, 0x10000}) {
+ pc += i;
+ opcodes.AdvancePC(pc);
+ }
+ DW_CHECK_NEXT("DW_CFA_advance_loc: 1 to 01000001");
+ DW_CHECK_NEXT("DW_CFA_advance_loc: 63 to 01000040");
+ DW_CHECK_NEXT("DW_CFA_advance_loc1: 64 to 01000080");
+ DW_CHECK_NEXT("DW_CFA_advance_loc1: 255 to 0100017f");
+ DW_CHECK_NEXT("DW_CFA_advance_loc2: 256 to 0100027f");
+ DW_CHECK_NEXT("DW_CFA_advance_loc2: 65535 to 0101027e");
+ DW_CHECK_NEXT("DW_CFA_advance_loc4: 65536 to 0102027e");
+ opcodes.DefCFA(reg, offset);
+ DW_CHECK_NEXT("DW_CFA_def_cfa: r6 (esi) ofs 40000");
+ opcodes.DefCFA(reg, -offset);
+ DW_CHECK_NEXT("DW_CFA_def_cfa_sf: r6 (esi) ofs -40000");
+ opcodes.DefCFARegister(reg);
+ DW_CHECK_NEXT("DW_CFA_def_cfa_register: r6 (esi)");
+ opcodes.DefCFAOffset(offset);
+ DW_CHECK_NEXT("DW_CFA_def_cfa_offset: 40000");
+ opcodes.DefCFAOffset(-offset);
+ DW_CHECK_NEXT("DW_CFA_def_cfa_offset_sf: -40000");
+ uint8_t expr[] = { 0 };
+ opcodes.DefCFAExpression(expr, arraysize(expr));
+ DW_CHECK_NEXT("DW_CFA_def_cfa_expression");
+ opcodes.Undefined(reg);
+ DW_CHECK_NEXT("DW_CFA_undefined: r6 (esi)");
+ opcodes.SameValue(reg);
+ DW_CHECK_NEXT("DW_CFA_same_value: r6 (esi)");
+ opcodes.Offset(Reg(0x3F), -offset);
+ // Bad register likely means that it does not exist on x86,
+ // but we want to test high register numbers anyway.
+ DW_CHECK_NEXT("DW_CFA_offset: bad register: r63 at cfa-40000");
+ opcodes.Offset(Reg(0x40), -offset);
+ DW_CHECK_NEXT("DW_CFA_offset_extended: bad register: r64 at cfa-40000");
+ opcodes.Offset(Reg(0x40), offset);
+ DW_CHECK_NEXT("DW_CFA_offset_extended_sf: bad register: r64 at cfa+40000");
+ opcodes.ValOffset(reg, -offset);
+ DW_CHECK_NEXT("DW_CFA_val_offset: r6 (esi) at cfa-40000");
+ opcodes.ValOffset(reg, offset);
+ DW_CHECK_NEXT("DW_CFA_val_offset_sf: r6 (esi) at cfa+40000");
+ opcodes.Register(reg, Reg(1));
+ DW_CHECK_NEXT("DW_CFA_register: r6 (esi) in r1 (ecx)");
+ opcodes.Expression(reg, expr, arraysize(expr));
+ DW_CHECK_NEXT("DW_CFA_expression: r6 (esi)");
+ opcodes.ValExpression(reg, expr, arraysize(expr));
+ DW_CHECK_NEXT("DW_CFA_val_expression: r6 (esi)");
+ opcodes.Restore(Reg(0x3F));
+ DW_CHECK_NEXT("DW_CFA_restore: bad register: r63");
+ opcodes.Restore(Reg(0x40));
+ DW_CHECK_NEXT("DW_CFA_restore_extended: bad register: r64");
+ opcodes.Restore(reg);
+ DW_CHECK_NEXT("DW_CFA_restore: r6 (esi)");
+ opcodes.RememberState();
+ DW_CHECK_NEXT("DW_CFA_remember_state");
+ opcodes.RestoreState();
+ DW_CHECK_NEXT("DW_CFA_restore_state");
+ opcodes.Nop();
+ DW_CHECK_NEXT("DW_CFA_nop");
+
+ // Also test helpers.
+ opcodes.DefCFA(Reg(4), 100); // ESP
+ DW_CHECK_NEXT("DW_CFA_def_cfa: r4 (esp) ofs 100");
+ opcodes.AdjustCFAOffset(8);
+ DW_CHECK_NEXT("DW_CFA_def_cfa_offset: 108");
+ opcodes.RelOffset(Reg(0), 0); // push R0
+ DW_CHECK_NEXT("DW_CFA_offset: r0 (eax) at cfa-108");
+ opcodes.RelOffset(Reg(1), 4); // push R1
+ DW_CHECK_NEXT("DW_CFA_offset: r1 (ecx) at cfa-104");
+ opcodes.RelOffsetForMany(Reg(2), 8, 1 | (1 << 3), 4); // push R2 and R5
+ DW_CHECK_NEXT("DW_CFA_offset: r2 (edx) at cfa-100");
+ DW_CHECK_NEXT("DW_CFA_offset: r5 (ebp) at cfa-96");
+ opcodes.RestoreMany(Reg(2), 1 | (1 << 3)); // pop R2 and R5
+ DW_CHECK_NEXT("DW_CFA_restore: r2 (edx)");
+ DW_CHECK_NEXT("DW_CFA_restore: r5 (ebp)");
+
+ DebugFrameOpCodeWriter<> initial_opcodes;
+ WriteEhFrameCIE(is64bit, Reg(is64bit ? 16 : 8), initial_opcodes, &eh_frame_data_);
+ WriteEhFrameFDE(is64bit, 0, 0x01000000, 0x01000000, opcodes.data(), &eh_frame_data_);
+ CheckObjdumpOutput(is64bit, "-W");
+}
+
+// TODO: objdump seems to have trouble with 64bit CIE length.
+TEST_F(DwarfTest, DISABLED_DebugFrame64) {
+ constexpr bool is64bit = true;
+ DebugFrameOpCodeWriter<> initial_opcodes;
+ WriteEhFrameCIE(is64bit, Reg(16), initial_opcodes, &eh_frame_data_);
+ DebugFrameOpCodeWriter<> opcodes;
+ WriteEhFrameFDE(is64bit, 0, 0x0100000000000000, 0x0200000000000000,
+ opcodes.data(), &eh_frame_data_);
+ DW_CHECK("FDE cie=00000000 pc=100000000000000..300000000000000");
+ CheckObjdumpOutput(is64bit, "-W");
+}
+
+TEST_F(DwarfTest, DebugLine) {
+ const bool is64bit = false;
+ const int code_factor_bits = 1;
+ DebugLineOpCodeWriter<> opcodes(is64bit, code_factor_bits);
+
+ std::vector<std::string> include_directories;
+ include_directories.push_back("/path/to/source");
+ DW_CHECK("/path/to/source");
+
+ std::vector<FileEntry> files {
+ { "file0.c", 0, 1000, 2000 },
+ { "file1.c", 1, 1000, 2000 },
+ { "file2.c", 1, 1000, 2000 },
+ };
+ DW_CHECK("1\t0\t1000\t2000\tfile0.c");
+ DW_CHECK_NEXT("2\t1\t1000\t2000\tfile1.c");
+ DW_CHECK_NEXT("3\t1\t1000\t2000\tfile2.c");
+
+ DW_CHECK("Line Number Statements");
+ opcodes.SetAddress(0x01000000);
+ DW_CHECK_NEXT("Extended opcode 2: set Address to 0x1000000");
+ opcodes.AddRow();
+ DW_CHECK_NEXT("Copy");
+ opcodes.AdvancePC(0x01000100);
+ DW_CHECK_NEXT("Advance PC by 256 to 0x1000100");
+ opcodes.SetFile(2);
+ DW_CHECK_NEXT("Set File Name to entry 2 in the File Name Table");
+ opcodes.AdvanceLine(3);
+ DW_CHECK_NEXT("Advance Line by 2 to 3");
+ opcodes.SetColumn(4);
+ DW_CHECK_NEXT("Set column to 4");
+ opcodes.NegateStmt();
+ DW_CHECK_NEXT("Set is_stmt to 0");
+ opcodes.SetBasicBlock();
+ DW_CHECK_NEXT("Set basic block");
+ opcodes.SetPrologueEnd();
+ DW_CHECK_NEXT("Set prologue_end to true");
+ opcodes.SetEpilogueBegin();
+ DW_CHECK_NEXT("Set epilogue_begin to true");
+ opcodes.SetISA(5);
+ DW_CHECK_NEXT("Set ISA to 5");
+ opcodes.EndSequence();
+ DW_CHECK_NEXT("Extended opcode 1: End of Sequence");
+ opcodes.DefineFile("file.c", 0, 1000, 2000);
+ DW_CHECK_NEXT("Extended opcode 3: define new File Table entry");
+ DW_CHECK_NEXT("Entry\tDir\tTime\tSize\tName");
+ DW_CHECK_NEXT("1\t0\t1000\t2000\tfile.c");
+
+ WriteDebugLineTable(include_directories, files, opcodes, &debug_line_data_);
+ CheckObjdumpOutput(is64bit, "-W");
+}
+
+// DWARF has special one byte codes which advance PC and line at the same time.
+TEST_F(DwarfTest, DebugLineSpecialOpcodes) {
+ const bool is64bit = false;
+ const int code_factor_bits = 1;
+ uint32_t pc = 0x01000000;
+ int line = 1;
+ DebugLineOpCodeWriter<> opcodes(is64bit, code_factor_bits);
+ opcodes.SetAddress(pc);
+ size_t num_rows = 0;
+ DW_CHECK("Line Number Statements:");
+ DW_CHECK("Special opcode");
+ DW_CHECK("Advance PC by constant");
+ DW_CHECK("Decoded dump of debug contents of section .debug_line:");
+ DW_CHECK("Line number Starting address");
+ for (int addr_delta = 0; addr_delta < 80; addr_delta += 2) {
+ for (int line_delta = 16; line_delta >= -16; --line_delta) {
+ pc += addr_delta;
+ line += line_delta;
+ opcodes.AddRow(pc, line);
+ num_rows++;
+ ASSERT_EQ(opcodes.CurrentAddress(), pc);
+ ASSERT_EQ(opcodes.CurrentLine(), line);
+ char expected[1024];
+ sprintf(expected, "%i 0x%x", line, pc);
+ DW_CHECK_NEXT(expected);
+ }
+ }
+ EXPECT_LT(opcodes.data()->size(), num_rows * 3);
+
+ std::vector<std::string> directories;
+ std::vector<FileEntry> files { { "file.c", 0, 1000, 2000 } }; // NOLINT
+ WriteDebugLineTable(directories, files, opcodes, &debug_line_data_);
+ CheckObjdumpOutput(is64bit, "-W -WL");
+}
+
+TEST_F(DwarfTest, DebugInfo) {
+ constexpr bool is64bit = false;
+ DebugInfoEntryWriter<> info(is64bit, &debug_abbrev_data_);
+ DW_CHECK("Contents of the .debug_info section:");
+ info.StartTag(dwarf::DW_TAG_compile_unit, dwarf::DW_CHILDREN_yes);
+ DW_CHECK("Abbrev Number: 1 (DW_TAG_compile_unit)");
+ info.WriteStrp(dwarf::DW_AT_producer, "Compiler name", &debug_str_data_);
+ DW_CHECK_NEXT("DW_AT_producer : (indirect string, offset: 0x0): Compiler name");
+ info.WriteAddr(dwarf::DW_AT_low_pc, 0x01000000);
+ DW_CHECK_NEXT("DW_AT_low_pc : 0x1000000");
+ info.WriteAddr(dwarf::DW_AT_high_pc, 0x02000000);
+ DW_CHECK_NEXT("DW_AT_high_pc : 0x2000000");
+ info.StartTag(dwarf::DW_TAG_subprogram, dwarf::DW_CHILDREN_no);
+ DW_CHECK("Abbrev Number: 2 (DW_TAG_subprogram)");
+ info.WriteStrp(dwarf::DW_AT_name, "Foo", &debug_str_data_);
+ DW_CHECK_NEXT("DW_AT_name : (indirect string, offset: 0xe): Foo");
+ info.WriteAddr(dwarf::DW_AT_low_pc, 0x01010000);
+ DW_CHECK_NEXT("DW_AT_low_pc : 0x1010000");
+ info.WriteAddr(dwarf::DW_AT_high_pc, 0x01020000);
+ DW_CHECK_NEXT("DW_AT_high_pc : 0x1020000");
+ info.EndTag(); // DW_TAG_subprogram
+ info.StartTag(dwarf::DW_TAG_subprogram, dwarf::DW_CHILDREN_no);
+ DW_CHECK("Abbrev Number: 2 (DW_TAG_subprogram)");
+ info.WriteStrp(dwarf::DW_AT_name, "Bar", &debug_str_data_);
+ DW_CHECK_NEXT("DW_AT_name : (indirect string, offset: 0x12): Bar");
+ info.WriteAddr(dwarf::DW_AT_low_pc, 0x01020000);
+ DW_CHECK_NEXT("DW_AT_low_pc : 0x1020000");
+ info.WriteAddr(dwarf::DW_AT_high_pc, 0x01030000);
+ DW_CHECK_NEXT("DW_AT_high_pc : 0x1030000");
+ info.EndTag(); // DW_TAG_subprogram
+ info.EndTag(); // DW_TAG_compile_unit
+ // Test that previous list was properly terminated and empty children.
+ info.StartTag(dwarf::DW_TAG_compile_unit, dwarf::DW_CHILDREN_yes);
+ info.EndTag(); // DW_TAG_compile_unit
+
+ // The abbrev table is just side product, but check it as well.
+ DW_CHECK("Abbrev Number: 3 (DW_TAG_compile_unit)");
+ DW_CHECK("Contents of the .debug_abbrev section:");
+ DW_CHECK("1 DW_TAG_compile_unit [has children]");
+ DW_CHECK_NEXT("DW_AT_producer DW_FORM_strp");
+ DW_CHECK_NEXT("DW_AT_low_pc DW_FORM_addr");
+ DW_CHECK_NEXT("DW_AT_high_pc DW_FORM_addr");
+ DW_CHECK("2 DW_TAG_subprogram [no children]");
+ DW_CHECK_NEXT("DW_AT_name DW_FORM_strp");
+ DW_CHECK_NEXT("DW_AT_low_pc DW_FORM_addr");
+ DW_CHECK_NEXT("DW_AT_high_pc DW_FORM_addr");
+ DW_CHECK("3 DW_TAG_compile_unit [has children]");
+
+ dwarf::WriteDebugInfoCU(0 /* debug_abbrev_offset */, info, &debug_info_data_);
+ CheckObjdumpOutput(is64bit, "-W");
+}
+
+#endif // HAVE_ANDROID_OS
+
+} // namespace dwarf
+} // namespace art
diff --git a/compiler/dwarf/dwarf_test.h b/compiler/dwarf/dwarf_test.h
new file mode 100644
index 0000000000..dd5e0c286e
--- /dev/null
+++ b/compiler/dwarf/dwarf_test.h
@@ -0,0 +1,220 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DWARF_DWARF_TEST_H_
+#define ART_COMPILER_DWARF_DWARF_TEST_H_
+
+#include <cstring>
+#include <dirent.h>
+#include <memory>
+#include <set>
+#include <stdio.h>
+#include <string>
+#include <sys/types.h>
+
+#include "utils.h"
+#include "base/unix_file/fd_file.h"
+#include "common_runtime_test.h"
+#include "elf_builder.h"
+#include "gtest/gtest.h"
+#include "os.h"
+
+namespace art {
+namespace dwarf {
+
+#define DW_CHECK(substring) Check(substring, false, __FILE__, __LINE__)
+#define DW_CHECK_NEXT(substring) Check(substring, true, __FILE__, __LINE__)
+
+class DwarfTest : public CommonRuntimeTest {
+ public:
+ static constexpr bool kPrintObjdumpOutput = false; // debugging.
+
+ struct ExpectedLine {
+ std::string substring;
+ bool next;
+ const char* at_file;
+ int at_line;
+ };
+
+ // Check that the objdump output contains given output.
+ // If next is true, it must be the next line. Otherwise lines are skipped.
+ void Check(const char* substr, bool next, const char* at_file, int at_line) {
+ expected_lines_.push_back(ExpectedLine {substr, next, at_file, at_line});
+ }
+
+ static std::string GetObjdumpPath() {
+ const char* android_build_top = getenv("ANDROID_BUILD_TOP");
+ if (android_build_top != nullptr) {
+ std::string host_prebuilts = std::string(android_build_top) +
+ "/prebuilts/gcc/linux-x86/host/";
+ // Read the content of the directory.
+ std::set<std::string> entries;
+ DIR* dir = opendir(host_prebuilts.c_str());
+ if (dir != nullptr) {
+ struct dirent* entry;
+ while ((entry = readdir(dir)) != nullptr) {
+ if (strstr(entry->d_name, "linux-glibc")) {
+ entries.insert(host_prebuilts + entry->d_name);
+ }
+ }
+ closedir(dir);
+ }
+ // Strings are sorted so the last one should be the most recent version.
+ if (!entries.empty()) {
+ std::string path = *entries.rbegin() + "/x86_64-linux/bin/objdump";
+ struct stat st;
+ if (stat(path.c_str(), &st) == 0) {
+ return path; // File exists.
+ }
+ }
+ }
+ ADD_FAILURE() << "Can not find prebuild objdump.";
+ return "objdump"; // Use the system objdump as fallback.
+ }
+
+ // Pretty-print the generated DWARF data using objdump.
+ template<typename Elf_Word, typename Elf_Sword, typename Elf_Addr, typename Elf_Dyn,
+ typename Elf_Sym, typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr>
+ std::vector<std::string> Objdump(bool is64bit, const char* args) {
+ // Write simple elf file with just the DWARF sections.
+ class NoCode : public CodeOutput {
+ virtual void SetCodeOffset(size_t) { }
+ virtual bool Write(OutputStream*) { return true; }
+ } code;
+ ScratchFile file;
+ InstructionSet isa = is64bit ? kX86_64 : kX86;
+ ElfBuilder<Elf_Word, Elf_Sword, Elf_Addr, Elf_Dyn,
+ Elf_Sym, Elf_Ehdr, Elf_Phdr, Elf_Shdr> builder(
+ &code, file.GetFile(), isa, 0, 0, 0, 0, 0, 0, false, false);
+ typedef ElfRawSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr> Section;
+ if (!debug_info_data_.empty()) {
+ Section debug_info(".debug_info", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
+ debug_info.SetBuffer(debug_info_data_);
+ builder.RegisterRawSection(debug_info);
+ }
+ if (!debug_abbrev_data_.empty()) {
+ Section debug_abbrev(".debug_abbrev", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
+ debug_abbrev.SetBuffer(debug_abbrev_data_);
+ builder.RegisterRawSection(debug_abbrev);
+ }
+ if (!debug_str_data_.empty()) {
+ Section debug_str(".debug_str", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
+ debug_str.SetBuffer(debug_str_data_);
+ builder.RegisterRawSection(debug_str);
+ }
+ if (!debug_line_data_.empty()) {
+ Section debug_line(".debug_line", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
+ debug_line.SetBuffer(debug_line_data_);
+ builder.RegisterRawSection(debug_line);
+ }
+ if (!eh_frame_data_.empty()) {
+ Section eh_frame(".eh_frame", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, 4, 0);
+ eh_frame.SetBuffer(eh_frame_data_);
+ builder.RegisterRawSection(eh_frame);
+ }
+ builder.Init();
+ builder.Write();
+
+ // Read the elf file back using objdump.
+ std::vector<std::string> lines;
+ std::string cmd = GetObjdumpPath();
+ cmd = cmd + " " + args + " " + file.GetFilename() + " 2>&1";
+ FILE* output = popen(cmd.data(), "r");
+ char buffer[1024];
+ const char* line;
+ while ((line = fgets(buffer, sizeof(buffer), output)) != nullptr) {
+ if (kPrintObjdumpOutput) {
+ printf("%s", line);
+ }
+ if (line[0] != '\0' && line[0] != '\n') {
+ EXPECT_TRUE(strstr(line, "objdump: Error:") == nullptr) << line;
+ EXPECT_TRUE(strstr(line, "objdump: Warning:") == nullptr) << line;
+ std::string str(line);
+ if (str.back() == '\n') {
+ str.pop_back();
+ }
+ lines.push_back(str);
+ }
+ }
+ pclose(output);
+ return lines;
+ }
+
+ std::vector<std::string> Objdump(bool is64bit, const char* args) {
+ if (is64bit) {
+ return Objdump<Elf64_Word, Elf64_Sword, Elf64_Addr, Elf64_Dyn,
+ Elf64_Sym, Elf64_Ehdr, Elf64_Phdr, Elf64_Shdr>(is64bit, args);
+ } else {
+ return Objdump<Elf32_Word, Elf32_Sword, Elf32_Addr, Elf32_Dyn,
+ Elf32_Sym, Elf32_Ehdr, Elf32_Phdr, Elf32_Shdr>(is64bit, args);
+ }
+ }
+
+ // Compare objdump output to the recorded checks.
+ void CheckObjdumpOutput(bool is64bit, const char* args) {
+ std::vector<std::string> actual_lines = Objdump(is64bit, args);
+ auto actual_line = actual_lines.begin();
+ for (const ExpectedLine& expected_line : expected_lines_) {
+ const std::string& substring = expected_line.substring;
+ if (actual_line == actual_lines.end()) {
+ ADD_FAILURE_AT(expected_line.at_file, expected_line.at_line) <<
+ "Expected '" << substring << "'.\n" <<
+ "Seen end of output.";
+ } else if (expected_line.next) {
+ if (actual_line->find(substring) == std::string::npos) {
+ ADD_FAILURE_AT(expected_line.at_file, expected_line.at_line) <<
+ "Expected '" << substring << "'.\n" <<
+ "Seen '" << actual_line->data() << "'.";
+ } else {
+ // printf("Found '%s' in '%s'.\n", substring.data(), actual_line->data());
+ }
+ actual_line++;
+ } else {
+ bool found = false;
+ for (auto it = actual_line; it < actual_lines.end(); it++) {
+ if (it->find(substring) != std::string::npos) {
+ actual_line = it;
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ ADD_FAILURE_AT(expected_line.at_file, expected_line.at_line) <<
+ "Expected '" << substring << "'.\n" <<
+ "Not found anywhere in the rest of the output.";
+ } else {
+ // printf("Found '%s' in '%s'.\n", substring.data(), actual_line->data());
+ actual_line++;
+ }
+ }
+ }
+ }
+
+ // Buffers which are going to assembled into ELF file and passed to objdump.
+ std::vector<uint8_t> eh_frame_data_;
+ std::vector<uint8_t> debug_info_data_;
+ std::vector<uint8_t> debug_abbrev_data_;
+ std::vector<uint8_t> debug_str_data_;
+ std::vector<uint8_t> debug_line_data_;
+
+ // The expected output of objdump.
+ std::vector<ExpectedLine> expected_lines_;
+};
+
+} // namespace dwarf
+} // namespace art
+
+#endif // ART_COMPILER_DWARF_DWARF_TEST_H_
diff --git a/compiler/dwarf/headers.h b/compiler/dwarf/headers.h
new file mode 100644
index 0000000000..d866b91ae7
--- /dev/null
+++ b/compiler/dwarf/headers.h
@@ -0,0 +1,167 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DWARF_HEADERS_H_
+#define ART_COMPILER_DWARF_HEADERS_H_
+
+#include "debug_frame_opcode_writer.h"
+#include "debug_info_entry_writer.h"
+#include "debug_line_opcode_writer.h"
+#include "register.h"
+#include "writer.h"
+
+namespace art {
+namespace dwarf {
+
+// Write common information entry (CIE) to .eh_frame section.
+template<typename Allocator>
+void WriteEhFrameCIE(bool is64bit, Reg return_address_register,
+ const DebugFrameOpCodeWriter<Allocator>& opcodes,
+ std::vector<uint8_t>* eh_frame) {
+ Writer<> writer(eh_frame);
+ size_t cie_header_start_ = writer.data()->size();
+ if (is64bit) {
+ // TODO: This is not related to being 64bit.
+ writer.PushUint32(0xffffffff);
+ writer.PushUint64(0); // Length placeholder.
+ writer.PushUint64(0); // CIE id.
+ } else {
+ writer.PushUint32(0); // Length placeholder.
+ writer.PushUint32(0); // CIE id.
+ }
+ writer.PushUint8(1); // Version.
+ writer.PushString("zR");
+ writer.PushUleb128(DebugFrameOpCodeWriter<Allocator>::kCodeAlignmentFactor);
+ writer.PushSleb128(DebugFrameOpCodeWriter<Allocator>::kDataAlignmentFactor);
+ writer.PushUleb128(return_address_register.num()); // ubyte in DWARF2.
+ writer.PushUleb128(1); // z: Augmentation data size.
+ if (is64bit) {
+ writer.PushUint8(0x04); // R: ((DW_EH_PE_absptr << 4) | DW_EH_PE_udata8).
+ } else {
+ writer.PushUint8(0x03); // R: ((DW_EH_PE_absptr << 4) | DW_EH_PE_udata4).
+ }
+ writer.PushData(opcodes.data());
+ writer.Pad(is64bit ? 8 : 4);
+ if (is64bit) {
+ writer.UpdateUint64(cie_header_start_ + 4, writer.data()->size() - cie_header_start_ - 12);
+ } else {
+ writer.UpdateUint32(cie_header_start_, writer.data()->size() - cie_header_start_ - 4);
+ }
+}
+
+// Write frame description entry (FDE) to .eh_frame section.
+template<typename Allocator>
+void WriteEhFrameFDE(bool is64bit, size_t cie_offset,
+ uint64_t initial_address, uint64_t address_range,
+ const std::vector<uint8_t, Allocator>* opcodes,
+ std::vector<uint8_t>* eh_frame) {
+ Writer<> writer(eh_frame);
+ size_t fde_header_start = writer.data()->size();
+ if (is64bit) {
+ // TODO: This is not related to being 64bit.
+ writer.PushUint32(0xffffffff);
+ writer.PushUint64(0); // Length placeholder.
+ uint64_t cie_pointer = writer.data()->size() - cie_offset;
+ writer.PushUint64(cie_pointer);
+ } else {
+ writer.PushUint32(0); // Length placeholder.
+ uint32_t cie_pointer = writer.data()->size() - cie_offset;
+ writer.PushUint32(cie_pointer);
+ }
+ if (is64bit) {
+ writer.PushUint64(initial_address);
+ writer.PushUint64(address_range);
+ } else {
+ writer.PushUint32(initial_address);
+ writer.PushUint32(address_range);
+ }
+ writer.PushUleb128(0); // Augmentation data size.
+ writer.PushData(opcodes);
+ writer.Pad(is64bit ? 8 : 4);
+ if (is64bit) {
+ writer.UpdateUint64(fde_header_start + 4, writer.data()->size() - fde_header_start - 12);
+ } else {
+ writer.UpdateUint32(fde_header_start, writer.data()->size() - fde_header_start - 4);
+ }
+}
+
+// Write compilation unit (CU) to .debug_info section.
+template<typename Allocator>
+void WriteDebugInfoCU(uint32_t debug_abbrev_offset,
+ const DebugInfoEntryWriter<Allocator>& entries,
+ std::vector<uint8_t>* debug_info) {
+ Writer<> writer(debug_info);
+ size_t start = writer.data()->size();
+ writer.PushUint32(0); // Length placeholder.
+ writer.PushUint16(3); // Version.
+ writer.PushUint32(debug_abbrev_offset);
+ writer.PushUint8(entries.is64bit() ? 8 : 4);
+ writer.PushData(entries.data());
+ writer.UpdateUint32(start, writer.data()->size() - start - 4);
+}
+
+struct FileEntry {
+ std::string file_name;
+ int directory_index;
+ int modification_time;
+ int file_size;
+};
+
+// Write line table to .debug_line section.
+template<typename Allocator>
+void WriteDebugLineTable(const std::vector<std::string>& include_directories,
+ const std::vector<FileEntry>& files,
+ const DebugLineOpCodeWriter<Allocator>& opcodes,
+ std::vector<uint8_t>* debug_line) {
+ Writer<> writer(debug_line);
+ size_t header_start = writer.data()->size();
+ writer.PushUint32(0); // Section-length placeholder.
+ // Claim DWARF-2 version even though we use some DWARF-3 features.
+ // DWARF-2 consumers will ignore the unknown opcodes.
+ // This is what clang currently does.
+ writer.PushUint16(2); // .debug_line version.
+ size_t header_length_pos = writer.data()->size();
+ writer.PushUint32(0); // Header-length placeholder.
+ writer.PushUint8(1 << opcodes.GetCodeFactorBits());
+ writer.PushUint8(DebugLineOpCodeWriter<Allocator>::kDefaultIsStmt ? 1 : 0);
+ writer.PushInt8(DebugLineOpCodeWriter<Allocator>::kLineBase);
+ writer.PushUint8(DebugLineOpCodeWriter<Allocator>::kLineRange);
+ writer.PushUint8(DebugLineOpCodeWriter<Allocator>::kOpcodeBase);
+ static const int opcode_lengths[DebugLineOpCodeWriter<Allocator>::kOpcodeBase] = {
+ 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1 };
+ for (int i = 1; i < DebugLineOpCodeWriter<Allocator>::kOpcodeBase; i++) {
+ writer.PushUint8(opcode_lengths[i]);
+ }
+ for (const std::string& directory : include_directories) {
+ writer.PushData(directory.data(), directory.size() + 1);
+ }
+ writer.PushUint8(0); // Terminate include_directories list.
+ for (const FileEntry& file : files) {
+ writer.PushData(file.file_name.data(), file.file_name.size() + 1);
+ writer.PushUleb128(file.directory_index);
+ writer.PushUleb128(file.modification_time);
+ writer.PushUleb128(file.file_size);
+ }
+ writer.PushUint8(0); // Terminate file list.
+ writer.UpdateUint32(header_length_pos, writer.data()->size() - header_length_pos - 4);
+ writer.PushData(opcodes.data()->data(), opcodes.data()->size());
+ writer.UpdateUint32(header_start, writer.data()->size() - header_start - 4);
+}
+
+} // namespace dwarf
+} // namespace art
+
+#endif // ART_COMPILER_DWARF_HEADERS_H_
diff --git a/compiler/dwarf/register.h b/compiler/dwarf/register.h
new file mode 100644
index 0000000000..fa666dffa9
--- /dev/null
+++ b/compiler/dwarf/register.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DWARF_REGISTER_H_
+#define ART_COMPILER_DWARF_REGISTER_H_
+
+namespace art {
+namespace dwarf {
+
+// Represents DWARF register.
+class Reg {
+ public:
+ explicit Reg(int reg_num) : num_(reg_num) { }
+ int num() const { return num_; }
+
+ // TODO: Arm S0–S31 register mapping is obsolescent.
+ // We should use VFP-v3/Neon D0-D31 mapping instead.
+ // However, D0 is aliased to pair of S0 and S1, so using that
+ // mapping we can not easily say S0 is spilled and S1 is not.
+ // There are ways around this in DWARF but they are complex.
+ // It would be much simpler to always spill whole D registers.
+ // Arm64 mapping is correct since we already do this there.
+
+ static Reg ArmCore(int num) { return Reg(num); }
+ static Reg ArmFp(int num) { return Reg(64 + num); } // S0–S31.
+ static Reg Arm64Core(int num) { return Reg(num); }
+ static Reg Arm64Fp(int num) { return Reg(64 + num); } // V0-V31.
+ static Reg MipsCore(int num) { return Reg(num); }
+ static Reg Mips64Core(int num) { return Reg(num); }
+ static Reg X86Core(int num) { return Reg(num); }
+ static Reg X86Fp(int num) { return Reg(21 + num); }
+ static Reg X86_64Core(int num) {
+ static const int map[8] = {0, 2, 1, 3, 7, 6, 4, 5};
+ return Reg(num < 8 ? map[num] : num);
+ }
+ static Reg X86_64Fp(int num) { return Reg(17 + num); }
+
+ private:
+ int num_;
+};
+
+} // namespace dwarf
+} // namespace art
+
+#endif // ART_COMPILER_DWARF_REGISTER_H_
diff --git a/compiler/dwarf/writer.h b/compiler/dwarf/writer.h
new file mode 100644
index 0000000000..3b9c55866a
--- /dev/null
+++ b/compiler/dwarf/writer.h
@@ -0,0 +1,173 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DWARF_WRITER_H_
+#define ART_COMPILER_DWARF_WRITER_H_
+
+#include <vector>
+#include "leb128.h"
+#include "base/logging.h"
+#include "utils.h"
+
+namespace art {
+namespace dwarf {
+
+// The base class for all DWARF writers.
+template<typename Allocator = std::allocator<uint8_t>>
+class Writer {
+ public:
+ void PushUint8(int value) {
+ DCHECK_GE(value, 0);
+ DCHECK_LE(value, UINT8_MAX);
+ data_->push_back(value & 0xff);
+ }
+
+ void PushUint16(int value) {
+ DCHECK_GE(value, 0);
+ DCHECK_LE(value, UINT16_MAX);
+ data_->push_back((value >> 0) & 0xff);
+ data_->push_back((value >> 8) & 0xff);
+ }
+
+ void PushUint32(uint32_t value) {
+ data_->push_back((value >> 0) & 0xff);
+ data_->push_back((value >> 8) & 0xff);
+ data_->push_back((value >> 16) & 0xff);
+ data_->push_back((value >> 24) & 0xff);
+ }
+
+ void PushUint32(int value) {
+ DCHECK_GE(value, 0);
+ PushUint32(static_cast<uint32_t>(value));
+ }
+
+ void PushUint32(uint64_t value) {
+ DCHECK_LE(value, UINT32_MAX);
+ PushUint32(static_cast<uint32_t>(value));
+ }
+
+ void PushUint64(uint64_t value) {
+ data_->push_back((value >> 0) & 0xff);
+ data_->push_back((value >> 8) & 0xff);
+ data_->push_back((value >> 16) & 0xff);
+ data_->push_back((value >> 24) & 0xff);
+ data_->push_back((value >> 32) & 0xff);
+ data_->push_back((value >> 40) & 0xff);
+ data_->push_back((value >> 48) & 0xff);
+ data_->push_back((value >> 56) & 0xff);
+ }
+
+ void PushInt8(int value) {
+ DCHECK_GE(value, INT8_MIN);
+ DCHECK_LE(value, INT8_MAX);
+ PushUint8(static_cast<uint8_t>(value));
+ }
+
+ void PushInt16(int value) {
+ DCHECK_GE(value, INT16_MIN);
+ DCHECK_LE(value, INT16_MAX);
+ PushUint16(static_cast<uint16_t>(value));
+ }
+
+ void PushInt32(int value) {
+ PushUint32(static_cast<uint32_t>(value));
+ }
+
+ void PushInt64(int64_t value) {
+ PushUint64(static_cast<uint64_t>(value));
+ }
+
+ // Variable-length encoders.
+
+ void PushUleb128(uint32_t value) {
+ EncodeUnsignedLeb128(data_, value);
+ }
+
+ void PushUleb128(int value) {
+ DCHECK_GE(value, 0);
+ EncodeUnsignedLeb128(data_, value);
+ }
+
+ void PushSleb128(int value) {
+ EncodeSignedLeb128(data_, value);
+ }
+
+ // Miscellaneous functions.
+
+ void PushString(const char* value) {
+ data_->insert(data_->end(), value, value + strlen(value) + 1);
+ }
+
+ void PushData(const void* ptr, size_t size) {
+ const char* p = reinterpret_cast<const char*>(ptr);
+ data_->insert(data_->end(), p, p + size);
+ }
+
+ template<typename Allocator2>
+ void PushData(const std::vector<uint8_t, Allocator2>* buffer) {
+ data_->insert(data_->end(), buffer->begin(), buffer->end());
+ }
+
+ void UpdateUint32(size_t offset, uint32_t value) {
+ DCHECK_LT(offset + 3, data_->size());
+ (*data_)[offset + 0] = (value >> 0) & 0xFF;
+ (*data_)[offset + 1] = (value >> 8) & 0xFF;
+ (*data_)[offset + 2] = (value >> 16) & 0xFF;
+ (*data_)[offset + 3] = (value >> 24) & 0xFF;
+ }
+
+ void UpdateUint64(size_t offset, uint64_t value) {
+ DCHECK_LT(offset + 7, data_->size());
+ (*data_)[offset + 0] = (value >> 0) & 0xFF;
+ (*data_)[offset + 1] = (value >> 8) & 0xFF;
+ (*data_)[offset + 2] = (value >> 16) & 0xFF;
+ (*data_)[offset + 3] = (value >> 24) & 0xFF;
+ (*data_)[offset + 4] = (value >> 32) & 0xFF;
+ (*data_)[offset + 5] = (value >> 40) & 0xFF;
+ (*data_)[offset + 6] = (value >> 48) & 0xFF;
+ (*data_)[offset + 7] = (value >> 56) & 0xFF;
+ }
+
+ void UpdateUleb128(size_t offset, uint32_t value) {
+ DCHECK_LE(offset + UnsignedLeb128Size(value), data_->size());
+ UpdateUnsignedLeb128(data_->data() + offset, value);
+ }
+
+ void Pop() {
+ return data_->pop_back();
+ }
+
+ void Pad(int alignment) {
+ DCHECK_NE(alignment, 0);
+ data_->resize(RoundUp(data_->size(), alignment), 0);
+ }
+
+ const std::vector<uint8_t, Allocator>* data() const {
+ return data_;
+ }
+
+ explicit Writer(std::vector<uint8_t, Allocator>* buffer) : data_(buffer) { }
+
+ private:
+ std::vector<uint8_t, Allocator>* data_;
+
+ DISALLOW_COPY_AND_ASSIGN(Writer);
+};
+
+} // namespace dwarf
+} // namespace art
+
+#endif // ART_COMPILER_DWARF_WRITER_H_
diff --git a/compiler/elf_builder.h b/compiler/elf_builder.h
index 9ab3602606..124ed03c21 100644
--- a/compiler/elf_builder.h
+++ b/compiler/elf_builder.h
@@ -40,6 +40,7 @@ class ElfSectionBuilder : public ValueObject {
section_.sh_addralign = align;
section_.sh_entsize = entsize;
}
+ ElfSectionBuilder(const ElfSectionBuilder&) = default;
~ElfSectionBuilder() {}
@@ -144,6 +145,7 @@ class ElfRawSectionBuilder FINAL : public ElfSectionBuilder<Elf_Word, Elf_Sword,
: ElfSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr>(sec_name, type, flags, link, info, align,
entsize) {
}
+ ElfRawSectionBuilder(const ElfRawSectionBuilder&) = default;
~ElfRawSectionBuilder() {}
diff --git a/compiler/elf_writer_debug.cc b/compiler/elf_writer_debug.cc
new file mode 100644
index 0000000000..5e8e24b035
--- /dev/null
+++ b/compiler/elf_writer_debug.cc
@@ -0,0 +1,360 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "elf_writer_debug.h"
+
+#include "compiled_method.h"
+#include "driver/compiler_driver.h"
+#include "dex_file-inl.h"
+#include "dwarf/headers.h"
+#include "dwarf/register.h"
+#include "oat_writer.h"
+
+namespace art {
+namespace dwarf {
+
+static void WriteEhFrameCIE(InstructionSet isa, std::vector<uint8_t>* eh_frame) {
+ // Scratch registers should be marked as undefined. This tells the
+ // debugger that its value in the previous frame is not recoverable.
+ bool is64bit = Is64BitInstructionSet(isa);
+ switch (isa) {
+ case kArm:
+ case kThumb2: {
+ DebugFrameOpCodeWriter<> opcodes;
+ opcodes.DefCFA(Reg::ArmCore(13), 0); // R13(SP).
+ // core registers.
+ for (int reg = 0; reg < 13; reg++) {
+ if (reg < 4 || reg == 12) {
+ opcodes.Undefined(Reg::ArmCore(reg));
+ } else {
+ opcodes.SameValue(Reg::ArmCore(reg));
+ }
+ }
+ // fp registers.
+ for (int reg = 0; reg < 32; reg++) {
+ if (reg < 16) {
+ opcodes.Undefined(Reg::ArmFp(reg));
+ } else {
+ opcodes.SameValue(Reg::ArmFp(reg));
+ }
+ }
+ auto return_address_reg = Reg::ArmCore(14); // R14(LR).
+ WriteEhFrameCIE(is64bit, return_address_reg, opcodes, eh_frame);
+ return;
+ }
+ case kArm64: {
+ DebugFrameOpCodeWriter<> opcodes;
+ opcodes.DefCFA(Reg::Arm64Core(31), 0); // R31(SP).
+ // core registers.
+ for (int reg = 0; reg < 30; reg++) {
+ if (reg < 8 || reg == 16 || reg == 17) {
+ opcodes.Undefined(Reg::Arm64Core(reg));
+ } else {
+ opcodes.SameValue(Reg::Arm64Core(reg));
+ }
+ }
+ // fp registers.
+ for (int reg = 0; reg < 32; reg++) {
+ if (reg < 8 || reg >= 16) {
+ opcodes.Undefined(Reg::Arm64Fp(reg));
+ } else {
+ opcodes.SameValue(Reg::Arm64Fp(reg));
+ }
+ }
+ auto return_address_reg = Reg::Arm64Core(30); // R30(LR).
+ WriteEhFrameCIE(is64bit, return_address_reg, opcodes, eh_frame);
+ return;
+ }
+ case kMips:
+ case kMips64: {
+ DebugFrameOpCodeWriter<> opcodes;
+ opcodes.DefCFA(Reg::MipsCore(29), 0); // R29(SP).
+ // core registers.
+ for (int reg = 1; reg < 26; reg++) {
+ if (reg < 16 || reg == 24 || reg == 25) { // AT, V*, A*, T*.
+ opcodes.Undefined(Reg::MipsCore(reg));
+ } else {
+ opcodes.SameValue(Reg::MipsCore(reg));
+ }
+ }
+ auto return_address_reg = Reg::MipsCore(31); // R31(RA).
+ WriteEhFrameCIE(is64bit, return_address_reg, opcodes, eh_frame);
+ return;
+ }
+ case kX86: {
+ DebugFrameOpCodeWriter<> opcodes;
+ opcodes.DefCFA(Reg::X86Core(4), 4); // R4(ESP).
+ opcodes.Offset(Reg::X86Core(8), -4); // R8(EIP).
+ // core registers.
+ for (int reg = 0; reg < 8; reg++) {
+ if (reg <= 3) {
+ opcodes.Undefined(Reg::X86Core(reg));
+ } else if (reg == 4) {
+ // Stack pointer.
+ } else {
+ opcodes.SameValue(Reg::X86Core(reg));
+ }
+ }
+ // fp registers.
+ for (int reg = 0; reg < 8; reg++) {
+ opcodes.Undefined(Reg::X86Fp(reg));
+ }
+ auto return_address_reg = Reg::X86Core(8); // R8(EIP).
+ WriteEhFrameCIE(is64bit, return_address_reg, opcodes, eh_frame);
+ return;
+ }
+ case kX86_64: {
+ DebugFrameOpCodeWriter<> opcodes;
+ opcodes.DefCFA(Reg::X86_64Core(4), 8); // R4(RSP).
+ opcodes.Offset(Reg::X86_64Core(16), -8); // R16(RIP).
+ // core registers.
+ for (int reg = 0; reg < 16; reg++) {
+ if (reg == 4) {
+ // Stack pointer.
+ } else if (reg < 12 && reg != 3 && reg != 5) { // except EBX and EBP.
+ opcodes.Undefined(Reg::X86_64Core(reg));
+ } else {
+ opcodes.SameValue(Reg::X86_64Core(reg));
+ }
+ }
+ // fp registers.
+ for (int reg = 0; reg < 16; reg++) {
+ if (reg < 12) {
+ opcodes.Undefined(Reg::X86_64Fp(reg));
+ } else {
+ opcodes.SameValue(Reg::X86_64Fp(reg));
+ }
+ }
+ auto return_address_reg = Reg::X86_64Core(16); // R16(RIP).
+ WriteEhFrameCIE(is64bit, return_address_reg, opcodes, eh_frame);
+ return;
+ }
+ case kNone:
+ break;
+ }
+ LOG(FATAL) << "Can not write CIE frame for ISA " << isa;
+ UNREACHABLE();
+}
+
+/*
+ * @brief Generate the DWARF sections.
+ * @param oat_writer The Oat file Writer.
+ * @param eh_frame Call Frame Information.
+ * @param debug_info Compilation unit information.
+ * @param debug_abbrev Abbreviations used to generate dbg_info.
+ * @param debug_str Debug strings.
+ * @param debug_line Line number table.
+ */
+void WriteDebugSections(const CompilerDriver* compiler,
+ const OatWriter* oat_writer,
+ uint32_t text_section_offset,
+ std::vector<uint8_t>* eh_frame,
+ std::vector<uint8_t>* debug_info,
+ std::vector<uint8_t>* debug_abbrev,
+ std::vector<uint8_t>* debug_str,
+ std::vector<uint8_t>* debug_line) {
+ const std::vector<OatWriter::DebugInfo>& method_infos = oat_writer->GetMethodDebugInfo();
+ const InstructionSet isa = compiler->GetInstructionSet();
+ uint32_t cunit_low_pc = static_cast<uint32_t>(-1);
+ uint32_t cunit_high_pc = 0;
+ for (auto method_info : method_infos) {
+ cunit_low_pc = std::min(cunit_low_pc, method_info.low_pc_);
+ cunit_high_pc = std::max(cunit_high_pc, method_info.high_pc_);
+ }
+
+ // Write .eh_frame section.
+ size_t cie_offset = eh_frame->size();
+ WriteEhFrameCIE(isa, eh_frame);
+ for (const OatWriter::DebugInfo& mi : method_infos) {
+ const SwapVector<uint8_t>* opcodes = mi.compiled_method_->GetCFIInfo();
+ if (opcodes != nullptr) {
+ WriteEhFrameFDE(Is64BitInstructionSet(isa), cie_offset,
+ text_section_offset + mi.low_pc_, mi.high_pc_ - mi.low_pc_,
+ opcodes, eh_frame);
+ }
+ }
+
+ // Write .debug_info section.
+ size_t debug_abbrev_offset = debug_abbrev->size();
+ DebugInfoEntryWriter<> info(false /* 32 bit */, debug_abbrev);
+ info.StartTag(DW_TAG_compile_unit, DW_CHILDREN_yes);
+ info.WriteStrp(DW_AT_producer, "Android dex2oat", debug_str);
+ info.WriteData1(DW_AT_language, DW_LANG_Java);
+ info.WriteAddr(DW_AT_low_pc, cunit_low_pc + text_section_offset);
+ info.WriteAddr(DW_AT_high_pc, cunit_high_pc + text_section_offset);
+ info.WriteData4(DW_AT_stmt_list, debug_line->size());
+ for (auto method_info : method_infos) {
+ std::string method_name = PrettyMethod(method_info.dex_method_index_,
+ *method_info.dex_file_, true);
+ if (method_info.deduped_) {
+ // TODO We should place the DEDUPED tag on the first instance of a deduplicated symbol
+ // so that it will show up in a debuggerd crash report.
+ method_name += " [ DEDUPED ]";
+ }
+ info.StartTag(DW_TAG_subprogram, DW_CHILDREN_no);
+ info.WriteStrp(DW_AT_name, method_name.data(), debug_str);
+ info.WriteAddr(DW_AT_low_pc, method_info.low_pc_ + text_section_offset);
+ info.WriteAddr(DW_AT_high_pc, method_info.high_pc_ + text_section_offset);
+ info.EndTag(); // DW_TAG_subprogram
+ }
+ info.EndTag(); // DW_TAG_compile_unit
+ WriteDebugInfoCU(debug_abbrev_offset, info, debug_info);
+
+ // TODO: in gdb info functions <regexp> - reports Java functions, but
+ // source file is <unknown> because .debug_line is formed as one
+ // compilation unit. To fix this it is possible to generate
+ // a separate compilation unit for every distinct Java source.
+ // Each of the these compilation units can have several non-adjacent
+ // method ranges.
+
+ // Write .debug_line section.
+ std::vector<FileEntry> files;
+ std::unordered_map<std::string, size_t> files_map;
+ std::vector<std::string> directories;
+ std::unordered_map<std::string, size_t> directories_map;
+ int code_factor_bits_ = 0;
+ int dwarf_isa = -1;
+ switch (isa) {
+ case kArm: // arm actually means thumb2.
+ case kThumb2:
+ code_factor_bits_ = 1; // 16-bit instuctions
+ dwarf_isa = 1; // DW_ISA_ARM_thumb.
+ break;
+ case kArm64:
+ case kMips:
+ case kMips64:
+ code_factor_bits_ = 2; // 32-bit instructions
+ break;
+ case kNone:
+ case kX86:
+ case kX86_64:
+ break;
+ }
+ DebugLineOpCodeWriter<> opcodes(false /* 32bit */, code_factor_bits_);
+ opcodes.SetAddress(text_section_offset + cunit_low_pc);
+ if (dwarf_isa != -1) {
+ opcodes.SetISA(dwarf_isa);
+ }
+ for (const OatWriter::DebugInfo& mi : method_infos) {
+ // Addresses in the line table should be unique and increasing.
+ if (mi.deduped_) {
+ continue;
+ }
+
+ struct DebugInfoCallbacks {
+ static bool NewPosition(void* ctx, uint32_t address, uint32_t line) {
+ auto* context = reinterpret_cast<DebugInfoCallbacks*>(ctx);
+ context->dex2line_.push_back({address, static_cast<int32_t>(line)});
+ return false;
+ }
+ DefaultSrcMap dex2line_;
+ } debug_info_callbacks;
+
+ const DexFile* dex = mi.dex_file_;
+ if (mi.code_item_ != nullptr) {
+ dex->DecodeDebugInfo(mi.code_item_,
+ (mi.access_flags_ & kAccStatic) != 0,
+ mi.dex_method_index_,
+ DebugInfoCallbacks::NewPosition,
+ nullptr,
+ &debug_info_callbacks);
+ }
+
+ // Get and deduplicate directory and filename.
+ int file_index = 0; // 0 - primary source file of the compilation.
+ auto& dex_class_def = dex->GetClassDef(mi.class_def_index_);
+ const char* source_file = dex->GetSourceFile(dex_class_def);
+ if (source_file != nullptr) {
+ std::string file_name(source_file);
+ size_t file_name_slash = file_name.find_last_of('/');
+ std::string class_name(dex->GetClassDescriptor(dex_class_def));
+ size_t class_name_slash = class_name.find_last_of('/');
+ std::string full_path(file_name);
+
+ // Guess directory from package name.
+ int directory_index = 0; // 0 - current directory of the compilation.
+ if (file_name_slash == std::string::npos && // Just filename.
+ class_name.front() == 'L' && // Type descriptor for a class.
+ class_name_slash != std::string::npos) { // Has package name.
+ std::string package_name = class_name.substr(1, class_name_slash - 1);
+ auto it = directories_map.find(package_name);
+ if (it == directories_map.end()) {
+ directory_index = 1 + directories.size();
+ directories_map.emplace(package_name, directory_index);
+ directories.push_back(package_name);
+ } else {
+ directory_index = it->second;
+ }
+ full_path = package_name + "/" + file_name;
+ }
+
+ // Add file entry.
+ auto it2 = files_map.find(full_path);
+ if (it2 == files_map.end()) {
+ file_index = 1 + files.size();
+ files_map.emplace(full_path, file_index);
+ files.push_back(FileEntry {
+ file_name,
+ directory_index,
+ 0, // Modification time - NA.
+ 0, // File size - NA.
+ });
+ } else {
+ file_index = it2->second;
+ }
+ }
+ opcodes.SetFile(file_index);
+
+ // Generate mapping opcodes from PC to Java lines.
+ const DefaultSrcMap& dex2line_map = debug_info_callbacks.dex2line_;
+ uint32_t low_pc = text_section_offset + mi.low_pc_;
+ if (file_index != 0 && !dex2line_map.empty()) {
+ bool first = true;
+ for (SrcMapElem pc2dex : mi.compiled_method_->GetSrcMappingTable()) {
+ uint32_t pc = pc2dex.from_;
+ int dex_pc = pc2dex.to_;
+ auto dex2line = dex2line_map.Find(static_cast<uint32_t>(dex_pc));
+ if (dex2line.first) {
+ int line = dex2line.second;
+ if (first) {
+ first = false;
+ if (pc > 0) {
+ // Assume that any preceding code is prologue.
+ int first_line = dex2line_map.front().to_;
+ // Prologue is not a sensible place for a breakpoint.
+ opcodes.NegateStmt();
+ opcodes.AddRow(low_pc, first_line);
+ opcodes.NegateStmt();
+ opcodes.SetPrologueEnd();
+ }
+ opcodes.AddRow(low_pc + pc, line);
+ } else if (line != opcodes.CurrentLine()) {
+ opcodes.AddRow(low_pc + pc, line);
+ }
+ }
+ }
+ } else {
+ // line 0 - instruction cannot be attributed to any source line.
+ opcodes.AddRow(low_pc, 0);
+ }
+ }
+ opcodes.AdvancePC(text_section_offset + cunit_high_pc);
+ opcodes.EndSequence();
+ WriteDebugLineTable(directories, files, opcodes, debug_line);
+}
+
+} // namespace dwarf
+} // namespace art
diff --git a/compiler/elf_writer_debug.h b/compiler/elf_writer_debug.h
new file mode 100644
index 0000000000..39a99d6d38
--- /dev/null
+++ b/compiler/elf_writer_debug.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_ELF_WRITER_DEBUG_H_
+#define ART_COMPILER_ELF_WRITER_DEBUG_H_
+
+#include <vector>
+
+#include "oat_writer.h"
+
+namespace art {
+namespace dwarf {
+
+void WriteDebugSections(const CompilerDriver* compiler,
+ const OatWriter* oat_writer,
+ uint32_t text_section_offset,
+ std::vector<uint8_t>* eh_frame_data,
+ std::vector<uint8_t>* debug_info_data,
+ std::vector<uint8_t>* debug_abbrev_data,
+ std::vector<uint8_t>* debug_str_data,
+ std::vector<uint8_t>* debug_line_data);
+
+} // namespace dwarf
+} // namespace art
+
+#endif // ART_COMPILER_ELF_WRITER_DEBUG_H_
diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc
index a822b24cde..e9af25f293 100644
--- a/compiler/elf_writer_quick.cc
+++ b/compiler/elf_writer_quick.cc
@@ -21,11 +21,14 @@
#include "base/logging.h"
#include "base/unix_file/fd_file.h"
#include "buffered_output_stream.h"
+#include "compiled_method.h"
+#include "dex_file-inl.h"
#include "driver/compiler_driver.h"
-#include "dwarf.h"
+#include "driver/compiler_options.h"
#include "elf_builder.h"
#include "elf_file.h"
#include "elf_utils.h"
+#include "elf_writer_debug.h"
#include "file_output_stream.h"
#include "globals.h"
#include "leb128.h"
@@ -35,42 +38,6 @@
namespace art {
-static void PushByte(std::vector<uint8_t>* buf, int data) {
- buf->push_back(data & 0xff);
-}
-
-static uint32_t PushStr(std::vector<uint8_t>* buf, const char* str, const char* def = nullptr) {
- if (str == nullptr) {
- str = def;
- }
-
- uint32_t offset = buf->size();
- for (size_t i = 0; str[i] != '\0'; ++i) {
- buf->push_back(str[i]);
- }
- buf->push_back('\0');
- return offset;
-}
-
-static uint32_t PushStr(std::vector<uint8_t>* buf, const std::string &str) {
- uint32_t offset = buf->size();
- buf->insert(buf->end(), str.begin(), str.end());
- buf->push_back('\0');
- return offset;
-}
-
-static void UpdateWord(std::vector<uint8_t>* buf, int offset, int data) {
- (*buf)[offset+0] = data;
- (*buf)[offset+1] = data >> 8;
- (*buf)[offset+2] = data >> 16;
- (*buf)[offset+3] = data >> 24;
-}
-
-static void PushHalf(std::vector<uint8_t>* buf, int data) {
- buf->push_back(data & 0xff);
- buf->push_back((data >> 8) & 0xff);
-}
-
template <typename Elf_Word, typename Elf_Sword, typename Elf_Addr,
typename Elf_Dyn, typename Elf_Sym, typename Elf_Ehdr,
typename Elf_Phdr, typename Elf_Shdr>
@@ -85,116 +52,6 @@ bool ElfWriterQuick<Elf_Word, Elf_Sword, Elf_Addr, Elf_Dyn,
return elf_writer.Write(oat_writer, dex_files, android_root, is_host);
}
-std::vector<uint8_t>* ConstructCIEFrameX86(bool is_x86_64) {
- std::vector<uint8_t>* cfi_info = new std::vector<uint8_t>;
-
- // Length (will be filled in later in this routine).
- if (is_x86_64) {
- Push32(cfi_info, 0xffffffff); // Indicates 64bit
- Push32(cfi_info, 0);
- Push32(cfi_info, 0);
- } else {
- Push32(cfi_info, 0);
- }
-
- // CIE id: always 0.
- if (is_x86_64) {
- Push32(cfi_info, 0);
- Push32(cfi_info, 0);
- } else {
- Push32(cfi_info, 0);
- }
-
- // Version: always 1.
- cfi_info->push_back(0x01);
-
- // Augmentation: 'zR\0'
- cfi_info->push_back(0x7a);
- cfi_info->push_back(0x52);
- cfi_info->push_back(0x0);
-
- // Code alignment: 1.
- EncodeUnsignedLeb128(1, cfi_info);
-
- // Data alignment.
- if (is_x86_64) {
- EncodeSignedLeb128(-8, cfi_info);
- } else {
- EncodeSignedLeb128(-4, cfi_info);
- }
-
- // Return address register.
- if (is_x86_64) {
- // R16(RIP)
- cfi_info->push_back(0x10);
- } else {
- // R8(EIP)
- cfi_info->push_back(0x08);
- }
-
- // Augmentation length: 1.
- cfi_info->push_back(1);
-
- // Augmentation data.
- if (is_x86_64) {
- // 0x04 ((DW_EH_PE_absptr << 4) | DW_EH_PE_udata8).
- cfi_info->push_back(0x04);
- } else {
- // 0x03 ((DW_EH_PE_absptr << 4) | DW_EH_PE_udata4).
- cfi_info->push_back(0x03);
- }
-
- // Initial instructions.
- if (is_x86_64) {
- // DW_CFA_def_cfa R7(RSP) 8.
- cfi_info->push_back(0x0c);
- cfi_info->push_back(0x07);
- cfi_info->push_back(0x08);
-
- // DW_CFA_offset R16(RIP) 1 (* -8).
- cfi_info->push_back(0x90);
- cfi_info->push_back(0x01);
- } else {
- // DW_CFA_def_cfa R4(ESP) 4.
- cfi_info->push_back(0x0c);
- cfi_info->push_back(0x04);
- cfi_info->push_back(0x04);
-
- // DW_CFA_offset R8(EIP) 1 (* -4).
- cfi_info->push_back(0x88);
- cfi_info->push_back(0x01);
- }
-
- // Padding to a multiple of 4
- while ((cfi_info->size() & 3) != 0) {
- // DW_CFA_nop is encoded as 0.
- cfi_info->push_back(0);
- }
-
- // Set the length of the CIE inside the generated bytes.
- if (is_x86_64) {
- uint32_t length = cfi_info->size() - 12;
- UpdateWord(cfi_info, 4, length);
- } else {
- uint32_t length = cfi_info->size() - 4;
- UpdateWord(cfi_info, 0, length);
- }
- return cfi_info;
-}
-
-std::vector<uint8_t>* ConstructCIEFrame(InstructionSet isa) {
- switch (isa) {
- case kX86:
- return ConstructCIEFrameX86(false);
- case kX86_64:
- return ConstructCIEFrameX86(true);
-
- default:
- // Not implemented.
- return nullptr;
- }
-}
-
class OatWriterWrapper FINAL : public CodeOutput {
public:
explicit OatWriterWrapper(OatWriter* oat_writer) : oat_writer_(oat_writer) {}
@@ -253,7 +110,8 @@ bool ElfWriterQuick<Elf_Word, Elf_Sword, Elf_Addr, Elf_Dyn,
return false;
}
- if (compiler_driver_->GetCompilerOptions().GetIncludeDebugSymbols()) {
+ if (compiler_driver_->GetCompilerOptions().GetIncludeDebugSymbols() &&
+ !oat_writer->GetMethodDebugInfo().empty()) {
WriteDebugSymbols(compiler_driver_, builder.get(), oat_writer);
}
@@ -273,402 +131,6 @@ bool ElfWriterQuick<Elf_Word, Elf_Sword, Elf_Addr, Elf_Dyn,
return builder->Write();
}
-class LineTableGenerator FINAL : public Leb128Encoder {
- public:
- LineTableGenerator(int line_base, int line_range, int opcode_base,
- std::vector<uint8_t>* data, uintptr_t current_address,
- size_t current_line)
- : Leb128Encoder(data), line_base_(line_base), line_range_(line_range),
- opcode_base_(opcode_base), current_address_(current_address),
- current_line_(current_line), current_file_index_(0) {}
-
- void PutDelta(unsigned delta_addr, int delta_line) {
- current_line_ += delta_line;
- current_address_ += delta_addr;
-
- if (delta_line >= line_base_ && delta_line < line_base_ + line_range_) {
- unsigned special_opcode = (delta_line - line_base_) +
- (line_range_ * delta_addr) + opcode_base_;
- if (special_opcode <= 255) {
- PushByte(data_, special_opcode);
- return;
- }
- }
-
- // generate standart opcode for address advance
- if (delta_addr != 0) {
- PushByte(data_, DW_LNS_advance_pc);
- PushBackUnsigned(delta_addr);
- }
-
- // generate standart opcode for line delta
- if (delta_line != 0) {
- PushByte(data_, DW_LNS_advance_line);
- PushBackSigned(delta_line);
- }
-
- // generate standart opcode for new LTN entry
- PushByte(data_, DW_LNS_copy);
- }
-
- void SetAddr(uintptr_t addr) {
- if (current_address_ == addr) {
- return;
- }
-
- current_address_ = addr;
-
- PushByte(data_, 0); // extended opcode:
- PushByte(data_, 1 + 4); // length: opcode_size + address_size
- PushByte(data_, DW_LNE_set_address);
- Push32(data_, addr);
- }
-
- void SetLine(unsigned line) {
- int delta_line = line - current_line_;
- if (delta_line) {
- current_line_ = line;
- PushByte(data_, DW_LNS_advance_line);
- PushBackSigned(delta_line);
- }
- }
-
- void SetFile(unsigned file_index) {
- if (current_file_index_ != file_index) {
- current_file_index_ = file_index;
- PushByte(data_, DW_LNS_set_file);
- PushBackUnsigned(file_index);
- }
- }
-
- void EndSequence() {
- // End of Line Table Program
- // 0(=ext), 1(len), DW_LNE_end_sequence
- PushByte(data_, 0);
- PushByte(data_, 1);
- PushByte(data_, DW_LNE_end_sequence);
- }
-
- private:
- const int line_base_;
- const int line_range_;
- const int opcode_base_;
- uintptr_t current_address_;
- size_t current_line_;
- unsigned current_file_index_;
-
- DISALLOW_COPY_AND_ASSIGN(LineTableGenerator);
-};
-
-// TODO: rewriting it using DexFile::DecodeDebugInfo needs unneeded stuff.
-static void GetLineInfoForJava(const uint8_t* dbgstream, const SwapSrcMap& pc2dex,
- DefaultSrcMap* result, uint32_t start_pc = 0) {
- if (dbgstream == nullptr) {
- return;
- }
-
- int adjopcode;
- uint32_t dex_offset = 0;
- uint32_t java_line = DecodeUnsignedLeb128(&dbgstream);
-
- // skip parameters
- for (uint32_t param_count = DecodeUnsignedLeb128(&dbgstream); param_count != 0; --param_count) {
- DecodeUnsignedLeb128(&dbgstream);
- }
-
- for (bool is_end = false; is_end == false; ) {
- uint8_t opcode = *dbgstream;
- dbgstream++;
- switch (opcode) {
- case DexFile::DBG_END_SEQUENCE:
- is_end = true;
- break;
-
- case DexFile::DBG_ADVANCE_PC:
- dex_offset += DecodeUnsignedLeb128(&dbgstream);
- break;
-
- case DexFile::DBG_ADVANCE_LINE:
- java_line += DecodeSignedLeb128(&dbgstream);
- break;
-
- case DexFile::DBG_START_LOCAL:
- case DexFile::DBG_START_LOCAL_EXTENDED:
- DecodeUnsignedLeb128(&dbgstream);
- DecodeUnsignedLeb128(&dbgstream);
- DecodeUnsignedLeb128(&dbgstream);
-
- if (opcode == DexFile::DBG_START_LOCAL_EXTENDED) {
- DecodeUnsignedLeb128(&dbgstream);
- }
- break;
-
- case DexFile::DBG_END_LOCAL:
- case DexFile::DBG_RESTART_LOCAL:
- DecodeUnsignedLeb128(&dbgstream);
- break;
-
- case DexFile::DBG_SET_PROLOGUE_END:
- case DexFile::DBG_SET_EPILOGUE_BEGIN:
- case DexFile::DBG_SET_FILE:
- break;
-
- default:
- adjopcode = opcode - DexFile::DBG_FIRST_SPECIAL;
- dex_offset += adjopcode / DexFile::DBG_LINE_RANGE;
- java_line += DexFile::DBG_LINE_BASE + (adjopcode % DexFile::DBG_LINE_RANGE);
-
- for (SwapSrcMap::const_iterator found = pc2dex.FindByTo(dex_offset);
- found != pc2dex.end() && found->to_ == static_cast<int32_t>(dex_offset);
- found++) {
- result->push_back({found->from_ + start_pc, static_cast<int32_t>(java_line)});
- }
- break;
- }
- }
-}
-
-/*
- * @brief Generate the DWARF debug_info and debug_abbrev sections
- * @param oat_writer The Oat file Writer.
- * @param dbg_info Compilation unit information.
- * @param dbg_abbrev Abbreviations used to generate dbg_info.
- * @param dbg_str Debug strings.
- */
-static void FillInCFIInformation(OatWriter* oat_writer,
- std::vector<uint8_t>* dbg_info,
- std::vector<uint8_t>* dbg_abbrev,
- std::vector<uint8_t>* dbg_str,
- std::vector<uint8_t>* dbg_line,
- uint32_t text_section_offset) {
- const std::vector<OatWriter::DebugInfo>& method_info = oat_writer->GetCFIMethodInfo();
-
- uint32_t producer_str_offset = PushStr(dbg_str, "Android dex2oat");
-
- // Create the debug_abbrev section with boilerplate information.
- // We only care about low_pc and high_pc right now for the compilation
- // unit and methods.
-
- // Tag 1: Compilation unit: DW_TAG_compile_unit.
- PushByte(dbg_abbrev, 1);
- PushByte(dbg_abbrev, DW_TAG_compile_unit);
-
- // There are children (the methods).
- PushByte(dbg_abbrev, DW_CHILDREN_yes);
-
- // DW_AT_producer DW_FORM_data1.
- // REVIEW: we can get rid of dbg_str section if
- // DW_FORM_string (immediate string) was used everywhere instead of
- // DW_FORM_strp (ref to string from .debug_str section).
- // DW_FORM_strp makes sense only if we reuse the strings.
- PushByte(dbg_abbrev, DW_AT_producer);
- PushByte(dbg_abbrev, DW_FORM_strp);
-
- // DW_LANG_Java DW_FORM_data1.
- PushByte(dbg_abbrev, DW_AT_language);
- PushByte(dbg_abbrev, DW_FORM_data1);
-
- // DW_AT_low_pc DW_FORM_addr.
- PushByte(dbg_abbrev, DW_AT_low_pc);
- PushByte(dbg_abbrev, DW_FORM_addr);
-
- // DW_AT_high_pc DW_FORM_addr.
- PushByte(dbg_abbrev, DW_AT_high_pc);
- PushByte(dbg_abbrev, DW_FORM_addr);
-
- if (dbg_line != nullptr) {
- // DW_AT_stmt_list DW_FORM_sec_offset.
- PushByte(dbg_abbrev, DW_AT_stmt_list);
- PushByte(dbg_abbrev, DW_FORM_sec_offset);
- }
-
- // End of DW_TAG_compile_unit.
- PushHalf(dbg_abbrev, 0);
-
- // Tag 2: Compilation unit: DW_TAG_subprogram.
- PushByte(dbg_abbrev, 2);
- PushByte(dbg_abbrev, DW_TAG_subprogram);
-
- // There are no children.
- PushByte(dbg_abbrev, DW_CHILDREN_no);
-
- // Name of the method.
- PushByte(dbg_abbrev, DW_AT_name);
- PushByte(dbg_abbrev, DW_FORM_strp);
-
- // DW_AT_low_pc DW_FORM_addr.
- PushByte(dbg_abbrev, DW_AT_low_pc);
- PushByte(dbg_abbrev, DW_FORM_addr);
-
- // DW_AT_high_pc DW_FORM_addr.
- PushByte(dbg_abbrev, DW_AT_high_pc);
- PushByte(dbg_abbrev, DW_FORM_addr);
-
- // End of DW_TAG_subprogram.
- PushHalf(dbg_abbrev, 0);
-
- // Start the debug_info section with the header information
- // 'unit_length' will be filled in later.
- int cunit_length = dbg_info->size();
- Push32(dbg_info, 0);
-
- // 'version' - 3.
- PushHalf(dbg_info, 3);
-
- // Offset into .debug_abbrev section (always 0).
- Push32(dbg_info, 0);
-
- // Address size: 4.
- PushByte(dbg_info, 4);
-
- // Start the description for the compilation unit.
- // This uses tag 1.
- PushByte(dbg_info, 1);
-
- // The producer is Android dex2oat.
- Push32(dbg_info, producer_str_offset);
-
- // The language is Java.
- PushByte(dbg_info, DW_LANG_Java);
-
- // low_pc and high_pc.
- uint32_t cunit_low_pc = 0 - 1;
- uint32_t cunit_high_pc = 0;
- int cunit_low_pc_pos = dbg_info->size();
- Push32(dbg_info, 0);
- Push32(dbg_info, 0);
-
- if (dbg_line == nullptr) {
- for (size_t i = 0; i < method_info.size(); ++i) {
- const OatWriter::DebugInfo &dbg = method_info[i];
-
- cunit_low_pc = std::min(cunit_low_pc, dbg.low_pc_);
- cunit_high_pc = std::max(cunit_high_pc, dbg.high_pc_);
-
- // Start a new TAG: subroutine (2).
- PushByte(dbg_info, 2);
-
- // Enter name, low_pc, high_pc.
- Push32(dbg_info, PushStr(dbg_str, dbg.method_name_));
- Push32(dbg_info, dbg.low_pc_ + text_section_offset);
- Push32(dbg_info, dbg.high_pc_ + text_section_offset);
- }
- } else {
- // TODO: in gdb info functions <regexp> - reports Java functions, but
- // source file is <unknown> because .debug_line is formed as one
- // compilation unit. To fix this it is possible to generate
- // a separate compilation unit for every distinct Java source.
- // Each of the these compilation units can have several non-adjacent
- // method ranges.
-
- // Line number table offset
- Push32(dbg_info, dbg_line->size());
-
- size_t lnt_length = dbg_line->size();
- Push32(dbg_line, 0);
-
- PushHalf(dbg_line, 4); // LNT Version DWARF v4 => 4
-
- size_t lnt_hdr_length = dbg_line->size();
- Push32(dbg_line, 0); // TODO: 64-bit uses 8-byte here
-
- PushByte(dbg_line, 1); // minimum_instruction_length (ubyte)
- PushByte(dbg_line, 1); // maximum_operations_per_instruction (ubyte) = always 1
- PushByte(dbg_line, 1); // default_is_stmt (ubyte)
-
- const int8_t LINE_BASE = -5;
- PushByte(dbg_line, LINE_BASE); // line_base (sbyte)
-
- const uint8_t LINE_RANGE = 14;
- PushByte(dbg_line, LINE_RANGE); // line_range (ubyte)
-
- const uint8_t OPCODE_BASE = 13;
- PushByte(dbg_line, OPCODE_BASE); // opcode_base (ubyte)
-
- // Standard_opcode_lengths (array of ubyte).
- PushByte(dbg_line, 0); PushByte(dbg_line, 1); PushByte(dbg_line, 1);
- PushByte(dbg_line, 1); PushByte(dbg_line, 1); PushByte(dbg_line, 0);
- PushByte(dbg_line, 0); PushByte(dbg_line, 0); PushByte(dbg_line, 1);
- PushByte(dbg_line, 0); PushByte(dbg_line, 0); PushByte(dbg_line, 1);
-
- PushByte(dbg_line, 0); // include_directories (sequence of path names) = EMPTY
-
- // File_names (sequence of file entries).
- std::unordered_map<const char*, size_t> files;
- for (size_t i = 0; i < method_info.size(); ++i) {
- const OatWriter::DebugInfo &dbg = method_info[i];
- // TODO: add package directory to the file name
- const char* file_name = dbg.src_file_name_ == nullptr ? "null" : dbg.src_file_name_;
- auto found = files.find(file_name);
- if (found == files.end()) {
- size_t file_index = 1 + files.size();
- files[file_name] = file_index;
- PushStr(dbg_line, file_name);
- PushByte(dbg_line, 0); // include directory index = LEB128(0) - no directory
- PushByte(dbg_line, 0); // modification time = LEB128(0) - NA
- PushByte(dbg_line, 0); // file length = LEB128(0) - NA
- }
- }
- PushByte(dbg_line, 0); // End of file_names.
-
- // Set lnt header length.
- UpdateWord(dbg_line, lnt_hdr_length, dbg_line->size() - lnt_hdr_length - 4);
-
- // Generate Line Number Program code, one long program for all methods.
- LineTableGenerator line_table_generator(LINE_BASE, LINE_RANGE, OPCODE_BASE,
- dbg_line, 0, 1);
-
- DefaultSrcMap pc2java_map;
- for (size_t i = 0; i < method_info.size(); ++i) {
- const OatWriter::DebugInfo &dbg = method_info[i];
- const char* file_name = (dbg.src_file_name_ == nullptr) ? "null" : dbg.src_file_name_;
- size_t file_index = files[file_name];
- DCHECK_NE(file_index, 0U) << file_name;
-
- cunit_low_pc = std::min(cunit_low_pc, dbg.low_pc_);
- cunit_high_pc = std::max(cunit_high_pc, dbg.high_pc_);
-
- // Start a new TAG: subroutine (2).
- PushByte(dbg_info, 2);
-
- // Enter name, low_pc, high_pc.
- Push32(dbg_info, PushStr(dbg_str, dbg.method_name_));
- Push32(dbg_info, dbg.low_pc_ + text_section_offset);
- Push32(dbg_info, dbg.high_pc_ + text_section_offset);
-
- GetLineInfoForJava(dbg.dbgstream_, dbg.compiled_method_->GetSrcMappingTable(),
- &pc2java_map, dbg.low_pc_);
- pc2java_map.DeltaFormat({dbg.low_pc_, 1}, dbg.high_pc_);
- if (!pc2java_map.empty()) {
- line_table_generator.SetFile(file_index);
- line_table_generator.SetAddr(dbg.low_pc_ + text_section_offset);
- line_table_generator.SetLine(1);
- for (auto& src_map_elem : pc2java_map) {
- line_table_generator.PutDelta(src_map_elem.from_, src_map_elem.to_);
- }
- pc2java_map.clear();
- }
- }
-
- // End Sequence should have the highest address set.
- line_table_generator.SetAddr(cunit_high_pc + text_section_offset);
- line_table_generator.EndSequence();
-
- // set lnt length
- UpdateWord(dbg_line, lnt_length, dbg_line->size() - lnt_length - 4);
- }
-
- // One byte terminator
- PushByte(dbg_info, 0);
-
- // Fill in cunit's low_pc and high_pc.
- UpdateWord(dbg_info, cunit_low_pc_pos, cunit_low_pc + text_section_offset);
- UpdateWord(dbg_info, cunit_low_pc_pos + 4, cunit_high_pc + text_section_offset);
-
- // We have now walked all the methods. Fill in lengths.
- UpdateWord(dbg_info, cunit_length, dbg_info->size() - cunit_length - 4);
-}
-
template <typename Elf_Word, typename Elf_Sword, typename Elf_Addr,
typename Elf_Dyn, typename Elf_Sym, typename Elf_Ehdr,
typename Elf_Phdr, typename Elf_Shdr>
@@ -678,18 +140,23 @@ static void WriteDebugSymbols(const CompilerDriver* compiler_driver,
ElfBuilder<Elf_Word, Elf_Sword, Elf_Addr, Elf_Dyn,
Elf_Sym, Elf_Ehdr, Elf_Phdr, Elf_Shdr>* builder,
OatWriter* oat_writer) {
- std::unique_ptr<std::vector<uint8_t>> cfi_info(
- ConstructCIEFrame(compiler_driver->GetInstructionSet()));
-
- Elf_Addr text_section_address = builder->GetTextBuilder().GetSection()->sh_addr;
-
// Iterate over the compiled methods.
- const std::vector<OatWriter::DebugInfo>& method_info = oat_writer->GetCFIMethodInfo();
+ const std::vector<OatWriter::DebugInfo>& method_info = oat_writer->GetMethodDebugInfo();
ElfSymtabBuilder<Elf_Word, Elf_Sword, Elf_Addr, Elf_Sym, Elf_Shdr>* symtab =
builder->GetSymtabBuilder();
for (auto it = method_info.begin(); it != method_info.end(); ++it) {
- symtab->AddSymbol(it->method_name_, &builder->GetTextBuilder(), it->low_pc_, true,
- it->high_pc_ - it->low_pc_, STB_GLOBAL, STT_FUNC);
+ std::string name = PrettyMethod(it->dex_method_index_, *it->dex_file_, true);
+ if (it->deduped_) {
+ // TODO We should place the DEDUPED tag on the first instance of a deduplicated symbol
+ // so that it will show up in a debuggerd crash report.
+ name += " [ DEDUPED ]";
+ }
+
+ uint32_t low_pc = it->low_pc_;
+ // Add in code delta, e.g., thumb bit 0 for Thumb2 code.
+ low_pc += it->compiled_method_->CodeDelta();
+ symtab->AddSymbol(name, &builder->GetTextBuilder(), low_pc,
+ true, it->high_pc_ - it->low_pc_, STB_GLOBAL, STT_FUNC);
// Conforming to aaelf, add $t mapping symbol to indicate start of a sequence of thumb2
// instructions, so that disassembler tools can correctly disassemble.
@@ -697,109 +164,29 @@ static void WriteDebugSymbols(const CompilerDriver* compiler_driver,
symtab->AddSymbol("$t", &builder->GetTextBuilder(), it->low_pc_ & ~1, true,
0, STB_LOCAL, STT_NOTYPE);
}
-
- // Include CFI for compiled method, if possible.
- if (cfi_info.get() != nullptr) {
- DCHECK(it->compiled_method_ != nullptr);
-
- // Copy in the FDE, if present
- const SwapVector<uint8_t>* fde = it->compiled_method_->GetCFIInfo();
- if (fde != nullptr) {
- // Copy the information into cfi_info and then fix the address in the new copy.
- int cur_offset = cfi_info->size();
- cfi_info->insert(cfi_info->end(), fde->begin(), fde->end());
-
- bool is_64bit = *(reinterpret_cast<const uint32_t*>(fde->data())) == 0xffffffff;
-
- // Set the 'CIE_pointer' field.
- uint64_t CIE_pointer = cur_offset + (is_64bit ? 12 : 4);
- uint64_t offset_to_update = CIE_pointer;
- if (is_64bit) {
- (*cfi_info)[offset_to_update+0] = CIE_pointer;
- (*cfi_info)[offset_to_update+1] = CIE_pointer >> 8;
- (*cfi_info)[offset_to_update+2] = CIE_pointer >> 16;
- (*cfi_info)[offset_to_update+3] = CIE_pointer >> 24;
- (*cfi_info)[offset_to_update+4] = CIE_pointer >> 32;
- (*cfi_info)[offset_to_update+5] = CIE_pointer >> 40;
- (*cfi_info)[offset_to_update+6] = CIE_pointer >> 48;
- (*cfi_info)[offset_to_update+7] = CIE_pointer >> 56;
- } else {
- (*cfi_info)[offset_to_update+0] = CIE_pointer;
- (*cfi_info)[offset_to_update+1] = CIE_pointer >> 8;
- (*cfi_info)[offset_to_update+2] = CIE_pointer >> 16;
- (*cfi_info)[offset_to_update+3] = CIE_pointer >> 24;
- }
-
- // Set the 'initial_location' field.
- offset_to_update += is_64bit ? 8 : 4;
- if (is_64bit) {
- const uint64_t quick_code_start = it->low_pc_ + text_section_address;
- (*cfi_info)[offset_to_update+0] = quick_code_start;
- (*cfi_info)[offset_to_update+1] = quick_code_start >> 8;
- (*cfi_info)[offset_to_update+2] = quick_code_start >> 16;
- (*cfi_info)[offset_to_update+3] = quick_code_start >> 24;
- (*cfi_info)[offset_to_update+4] = quick_code_start >> 32;
- (*cfi_info)[offset_to_update+5] = quick_code_start >> 40;
- (*cfi_info)[offset_to_update+6] = quick_code_start >> 48;
- (*cfi_info)[offset_to_update+7] = quick_code_start >> 56;
- } else {
- const uint32_t quick_code_start = it->low_pc_ + text_section_address;
- (*cfi_info)[offset_to_update+0] = quick_code_start;
- (*cfi_info)[offset_to_update+1] = quick_code_start >> 8;
- (*cfi_info)[offset_to_update+2] = quick_code_start >> 16;
- (*cfi_info)[offset_to_update+3] = quick_code_start >> 24;
- }
- }
- }
- }
-
- bool hasCFI = (cfi_info.get() != nullptr);
- bool hasLineInfo = false;
- for (auto& dbg_info : oat_writer->GetCFIMethodInfo()) {
- if (dbg_info.dbgstream_ != nullptr &&
- !dbg_info.compiled_method_->GetSrcMappingTable().empty()) {
- hasLineInfo = true;
- break;
- }
}
- if (hasLineInfo || hasCFI) {
- ElfRawSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr> debug_info(".debug_info",
- SHT_PROGBITS,
- 0, nullptr, 0, 1, 0);
- ElfRawSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr> debug_abbrev(".debug_abbrev",
- SHT_PROGBITS,
- 0, nullptr, 0, 1, 0);
- ElfRawSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr> debug_str(".debug_str",
- SHT_PROGBITS,
- 0, nullptr, 0, 1, 0);
- ElfRawSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr> debug_line(".debug_line",
- SHT_PROGBITS,
- 0, nullptr, 0, 1, 0);
-
- FillInCFIInformation(oat_writer, debug_info.GetBuffer(),
- debug_abbrev.GetBuffer(), debug_str.GetBuffer(),
- hasLineInfo ? debug_line.GetBuffer() : nullptr,
- text_section_address);
-
- builder->RegisterRawSection(debug_info);
- builder->RegisterRawSection(debug_abbrev);
-
- if (hasCFI) {
- ElfRawSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr> eh_frame(".eh_frame",
- SHT_PROGBITS,
- SHF_ALLOC,
- nullptr, 0, 4, 0);
- eh_frame.SetBuffer(std::move(*cfi_info.get()));
- builder->RegisterRawSection(eh_frame);
- }
-
- if (hasLineInfo) {
- builder->RegisterRawSection(debug_line);
- }
-
- builder->RegisterRawSection(debug_str);
- }
+ typedef ElfRawSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr> Section;
+ Section eh_frame(".eh_frame", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, 4, 0);
+ Section debug_info(".debug_info", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
+ Section debug_abbrev(".debug_abbrev", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
+ Section debug_str(".debug_str", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
+ Section debug_line(".debug_line", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
+
+ dwarf::WriteDebugSections(compiler_driver,
+ oat_writer,
+ builder->GetTextBuilder().GetSection()->sh_addr,
+ eh_frame.GetBuffer(),
+ debug_info.GetBuffer(),
+ debug_abbrev.GetBuffer(),
+ debug_str.GetBuffer(),
+ debug_line.GetBuffer());
+
+ builder->RegisterRawSection(eh_frame);
+ builder->RegisterRawSection(debug_info);
+ builder->RegisterRawSection(debug_abbrev);
+ builder->RegisterRawSection(debug_str);
+ builder->RegisterRawSection(debug_line);
}
// Explicit instantiations
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index c1555aa523..1ede228c4f 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -19,6 +19,7 @@
#include <sys/stat.h>
#include <memory>
+#include <numeric>
#include <vector>
#include "base/logging.h"
@@ -54,8 +55,7 @@
#include "runtime.h"
#include "scoped_thread_state_change.h"
#include "handle_scope-inl.h"
-
-#include <numeric>
+#include "utils/dex_cache_arrays_layout-inl.h"
using ::art::mirror::ArtField;
using ::art::mirror::ArtMethod;
@@ -238,7 +238,7 @@ void ImageWriter::AssignImageOffset(mirror::Object* object, ImageWriter::BinSlot
DCHECK(object != nullptr);
DCHECK_NE(image_objects_offset_begin_, 0u);
- size_t previous_bin_sizes = GetBinSizeSum(bin_slot.GetBin()); // sum sizes in [0..bin#)
+ size_t previous_bin_sizes = bin_slot_previous_sizes_[bin_slot.GetBin()];
size_t new_offset = image_objects_offset_begin_ + previous_bin_sizes + bin_slot.GetIndex();
DCHECK_ALIGNED(new_offset, kObjectAlignment);
@@ -293,6 +293,28 @@ void ImageWriter::SetImageBinSlot(mirror::Object* object, BinSlot bin_slot) {
DCHECK(IsImageBinSlotAssigned(object));
}
+void ImageWriter::PrepareDexCacheArraySlots() {
+ ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+ ReaderMutexLock mu(Thread::Current(), *class_linker->DexLock());
+ size_t dex_cache_count = class_linker->GetDexCacheCount();
+ uint32_t size = 0u;
+ for (size_t idx = 0; idx < dex_cache_count; ++idx) {
+ DexCache* dex_cache = class_linker->GetDexCache(idx);
+ const DexFile* dex_file = dex_cache->GetDexFile();
+ dex_cache_array_starts_.Put(dex_file, size);
+ DexCacheArraysLayout layout(dex_file);
+ DCHECK(layout.Valid());
+ dex_cache_array_indexes_.Put(dex_cache->GetResolvedTypes(), size + layout.TypesOffset());
+ dex_cache_array_indexes_.Put(dex_cache->GetResolvedMethods(), size + layout.MethodsOffset());
+ dex_cache_array_indexes_.Put(dex_cache->GetResolvedFields(), size + layout.FieldsOffset());
+ dex_cache_array_indexes_.Put(dex_cache->GetStrings(), size + layout.StringsOffset());
+ size += layout.Size();
+ }
+ // Set the slot size early to avoid DCHECK() failures in IsImageBinSlotAssigned()
+ // when AssignImageBinSlot() assigns their indexes out or order.
+ bin_slot_sizes_[kBinDexCacheArray] = size;
+}
+
void ImageWriter::AssignImageBinSlot(mirror::Object* object) {
DCHECK(object != nullptr);
size_t object_size = object->SizeOf();
@@ -307,6 +329,7 @@ void ImageWriter::AssignImageBinSlot(mirror::Object* object) {
// This means more pages will stay either clean or shared dirty (with zygote) and
// the app will use less of its own (private) memory.
Bin bin = kBinRegular;
+ size_t current_offset = 0u;
if (kBinObjects) {
//
@@ -316,6 +339,12 @@ void ImageWriter::AssignImageBinSlot(mirror::Object* object) {
// Memory analysis has determined that the following types of objects get dirtied
// the most:
//
+ // * Dex cache arrays are stored in a special bin. The arrays for each dex cache have
+ // a fixed layout which helps improve generated code (using PC-relative addressing),
+ // so we pre-calculate their offsets separately in PrepareDexCacheArraySlots().
+ // Since these arrays are huge, most pages do not overlap other objects and it's not
+ // really important where they are for the clean/dirty separation. Due to their
+ // special PC-relative addressing, we arbitrarily keep them at the beginning.
// * Class'es which are verified [their clinit runs only at runtime]
// - classes in general [because their static fields get overwritten]
// - initialized classes with all-final statics are unlikely to be ever dirty,
@@ -376,13 +405,21 @@ void ImageWriter::AssignImageBinSlot(mirror::Object* object) {
}
} else if (object->GetClass<kVerifyNone>()->IsStringClass()) {
bin = kBinString; // Strings are almost always immutable (except for object header).
+ } else if (object->IsObjectArray()) {
+ auto it = dex_cache_array_indexes_.find(object);
+ if (it != dex_cache_array_indexes_.end()) {
+ bin = kBinDexCacheArray;
+ current_offset = it->second; // Use prepared offset defined by the DexCacheLayout.
+ } // else bin = kBinRegular
} // else bin = kBinRegular
}
- size_t current_offset = bin_slot_sizes_[bin]; // How many bytes the current bin is at (aligned).
- // Move the current bin size up to accomodate the object we just assigned a bin slot.
size_t offset_delta = RoundUp(object_size, kObjectAlignment); // 64-bit alignment
- bin_slot_sizes_[bin] += offset_delta;
+ if (bin != kBinDexCacheArray) {
+ current_offset = bin_slot_sizes_[bin]; // How many bytes the current bin is at (aligned).
+ // Move the current bin size up to accomodate the object we just assigned a bin slot.
+ bin_slot_sizes_[bin] += offset_delta;
+ }
BinSlot new_bin_slot(bin, current_offset);
SetImageBinSlot(object, new_bin_slot);
@@ -887,8 +924,17 @@ void ImageWriter::CalculateNewObjectOffsets() {
// TODO: Image spaces only?
DCHECK_LT(image_end_, image_->Size());
image_objects_offset_begin_ = image_end_;
+ // Prepare bin slots for dex cache arrays.
+ PrepareDexCacheArraySlots();
// Clear any pre-existing monitors which may have been in the monitor words, assign bin slots.
heap->VisitObjects(WalkFieldsCallback, this);
+ // Calculate cumulative bin slot sizes.
+ size_t previous_sizes = 0u;
+ for (size_t i = 0; i != kBinSize; ++i) {
+ bin_slot_previous_sizes_[i] = previous_sizes;
+ previous_sizes += bin_slot_sizes_[i];
+ }
+ DCHECK_EQ(previous_sizes, GetBinSizeSum());
// Transform each object's bin slot into an offset which will be used to do the final copy.
heap->VisitObjects(UnbinObjectsIntoOffsetCallback, this);
DCHECK(saved_hashes_map_.empty()); // All binslot hashes should've been put into vector by now.
@@ -1187,8 +1233,8 @@ size_t ImageWriter::GetBinSizeSum(ImageWriter::Bin up_to) const {
ImageWriter::BinSlot::BinSlot(uint32_t lockword) : lockword_(lockword) {
// These values may need to get updated if more bins are added to the enum Bin
- static_assert(kBinBits == 3, "wrong number of bin bits");
- static_assert(kBinShift == 29, "wrong number of shift");
+ static_assert(kBinBits == 4, "wrong number of bin bits");
+ static_assert(kBinShift == 28, "wrong number of shift");
static_assert(sizeof(BinSlot) == sizeof(LockWord), "BinSlot/LockWord must have equal sizes");
DCHECK_LT(GetBin(), kBinSize);
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index 53f5ce4545..71044f7b6e 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -52,7 +52,8 @@ class ImageWriter FINAL {
quick_imt_conflict_trampoline_offset_(0), quick_resolution_trampoline_offset_(0),
quick_to_interpreter_bridge_offset_(0), compile_pic_(compile_pic),
target_ptr_size_(InstructionSetPointerSize(compiler_driver_.GetInstructionSet())),
- bin_slot_sizes_(), bin_slot_count_() {
+ bin_slot_sizes_(), bin_slot_previous_sizes_(), bin_slot_count_(),
+ string_data_array_(nullptr) {
CHECK_NE(image_begin, 0U);
}
@@ -80,6 +81,14 @@ class ImageWriter FINAL {
return reinterpret_cast<mirror::Object*>(image_begin_ + GetImageOffset(object));
}
+ mirror::HeapReference<mirror::Object>* GetDexCacheArrayElementImageAddress(
+ const DexFile* dex_file, uint32_t offset) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+ auto it = dex_cache_array_starts_.find(dex_file);
+ DCHECK(it != dex_cache_array_starts_.end());
+ return reinterpret_cast<mirror::HeapReference<mirror::Object>*>(
+ image_begin_ + RoundUp(sizeof(ImageHeader), kObjectAlignment) + it->second + offset);
+ }
+
uint8_t* GetOatFileBegin() const {
return image_begin_ + RoundUp(image_end_, kPageSize);
}
@@ -101,6 +110,10 @@ class ImageWriter FINAL {
// Classify different kinds of bins that objects end up getting packed into during image writing.
enum Bin {
+ // Dex cache arrays have a special slot for PC-relative addressing. Since they are
+ // huge, and as such their dirtiness is not important for the clean/dirty separation,
+ // we arbitrarily keep them at the beginning.
+ kBinDexCacheArray, // Object arrays belonging to dex cache.
// Likely-clean:
kBinString, // [String] Almost always immutable (except for obj header).
kBinArtMethodsManagedInitialized, // [ArtMethod] Not-native, and initialized. Unlikely to dirty
@@ -113,7 +126,6 @@ class ImageWriter FINAL {
kBinClassVerified, // Class verified, but initializers haven't been run
kBinArtMethodNative, // Art method that is actually native
kBinArtMethodNotInitialized, // Art method with a declaring class that wasn't initialized
- // Don't care about other art methods since they don't dirty
// Add more bins here if we add more segregation code.
kBinSize,
};
@@ -157,6 +169,7 @@ class ImageWriter FINAL {
SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
size_t GetImageOffset(mirror::Object* object) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+ void PrepareDexCacheArraySlots() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
void AssignImageBinSlot(mirror::Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
void SetImageBinSlot(mirror::Object* object, BinSlot bin_slot)
SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -282,6 +295,12 @@ class ImageWriter FINAL {
// Memory mapped for generating the image.
std::unique_ptr<MemMap> image_;
+ // Indexes for dex cache arrays (objects are inside of the image so that they don't move).
+ SafeMap<mirror::Object*, size_t> dex_cache_array_indexes_;
+
+ // The start offsets of the dex cache arrays.
+ SafeMap<const DexFile*, size_t> dex_cache_array_starts_;
+
// Saved hashes (objects are inside of the image so that they don't move).
std::vector<std::pair<mirror::Object*, uint32_t>> saved_hashes_;
@@ -309,6 +328,7 @@ class ImageWriter FINAL {
// Bin slot tracking for dirty object packing
size_t bin_slot_sizes_[kBinSize]; // Number of bytes in a bin
+ size_t bin_slot_previous_sizes_[kBinSize]; // Number of bytes in previous bins.
size_t bin_slot_count_[kBinSize]; // Number of objects in a bin
void* string_data_array_; // The backing for the interned strings.
diff --git a/compiler/jni/jni_cfi_test.cc b/compiler/jni/jni_cfi_test.cc
new file mode 100644
index 0000000000..3a0d520e47
--- /dev/null
+++ b/compiler/jni/jni_cfi_test.cc
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <memory>
+#include <vector>
+
+#include "arch/instruction_set.h"
+#include "cfi_test.h"
+#include "gtest/gtest.h"
+#include "jni/quick/calling_convention.h"
+#include "utils/assembler.h"
+
+#include "jni/jni_cfi_test_expected.inc"
+
+namespace art {
+
+// Run the tests only on host.
+#ifndef HAVE_ANDROID_OS
+
+class JNICFITest : public CFITest {
+ public:
+ // Enable this flag to generate the expected outputs.
+ static constexpr bool kGenerateExpected = false;
+
+ void TestImpl(InstructionSet isa, const char* isa_str,
+ const std::vector<uint8_t>& expected_asm,
+ const std::vector<uint8_t>& expected_cfi) {
+ // Description of simple method.
+ const bool is_static = true;
+ const bool is_synchronized = false;
+ const char* shorty = "IIFII";
+ std::unique_ptr<JniCallingConvention> jni_conv(
+ JniCallingConvention::Create(is_static, is_synchronized, shorty, isa));
+ std::unique_ptr<ManagedRuntimeCallingConvention> mr_conv(
+ ManagedRuntimeCallingConvention::Create(is_static, is_synchronized, shorty, isa));
+ const int frame_size(jni_conv->FrameSize());
+ const std::vector<ManagedRegister>& callee_save_regs = jni_conv->CalleeSaveRegisters();
+
+ // Assemble the method.
+ std::unique_ptr<Assembler> jni_asm(Assembler::Create(isa));
+ jni_asm->BuildFrame(frame_size, mr_conv->MethodRegister(),
+ callee_save_regs, mr_conv->EntrySpills());
+ jni_asm->IncreaseFrameSize(32);
+ jni_asm->DecreaseFrameSize(32);
+ jni_asm->RemoveFrame(frame_size, callee_save_regs);
+ jni_asm->EmitSlowPaths();
+ std::vector<uint8_t> actual_asm(jni_asm->CodeSize());
+ MemoryRegion code(&actual_asm[0], actual_asm.size());
+ jni_asm->FinalizeInstructions(code);
+ ASSERT_EQ(jni_asm->cfi().GetCurrentCFAOffset(), frame_size);
+ const std::vector<uint8_t>& actual_cfi = *(jni_asm->cfi().data());
+
+ if (kGenerateExpected) {
+ GenerateExpected(stdout, isa, isa_str, actual_asm, actual_cfi);
+ } else {
+ EXPECT_EQ(expected_asm, actual_asm);
+ EXPECT_EQ(expected_cfi, actual_cfi);
+ }
+ }
+};
+
+#define TEST_ISA(isa) \
+ TEST_F(JNICFITest, isa) { \
+ std::vector<uint8_t> expected_asm(expected_asm_##isa, \
+ expected_asm_##isa + arraysize(expected_asm_##isa)); \
+ std::vector<uint8_t> expected_cfi(expected_cfi_##isa, \
+ expected_cfi_##isa + arraysize(expected_cfi_##isa)); \
+ TestImpl(isa, #isa, expected_asm, expected_cfi); \
+ }
+
+TEST_ISA(kThumb2)
+TEST_ISA(kArm64)
+TEST_ISA(kX86)
+TEST_ISA(kX86_64)
+TEST_ISA(kMips)
+TEST_ISA(kMips64)
+
+#endif // HAVE_ANDROID_OS
+
+} // namespace art
diff --git a/compiler/jni/jni_cfi_test_expected.inc b/compiler/jni/jni_cfi_test_expected.inc
new file mode 100644
index 0000000000..47e6f106ca
--- /dev/null
+++ b/compiler/jni/jni_cfi_test_expected.inc
@@ -0,0 +1,505 @@
+static constexpr uint8_t expected_asm_kThumb2[] = {
+ 0x2D, 0xE9, 0xE0, 0x4D, 0x2D, 0xED, 0x10, 0x8A, 0x89, 0xB0, 0x00, 0x90,
+ 0xCD, 0xF8, 0x84, 0x10, 0x8D, 0xED, 0x22, 0x0A, 0xCD, 0xF8, 0x8C, 0x20,
+ 0xCD, 0xF8, 0x90, 0x30, 0x88, 0xB0, 0x08, 0xB0, 0x09, 0xB0, 0xBD, 0xEC,
+ 0x10, 0x8A, 0xBD, 0xE8, 0xE0, 0x8D,
+};
+static constexpr uint8_t expected_cfi_kThumb2[] = {
+ 0x44, 0x0E, 0x1C, 0x85, 0x07, 0x86, 0x06, 0x87, 0x05, 0x88, 0x04, 0x8A,
+ 0x03, 0x8B, 0x02, 0x8E, 0x01, 0x44, 0x0E, 0x5C, 0x05, 0x50, 0x17, 0x05,
+ 0x51, 0x16, 0x05, 0x52, 0x15, 0x05, 0x53, 0x14, 0x05, 0x54, 0x13, 0x05,
+ 0x55, 0x12, 0x05, 0x56, 0x11, 0x05, 0x57, 0x10, 0x05, 0x58, 0x0F, 0x05,
+ 0x59, 0x0E, 0x05, 0x5A, 0x0D, 0x05, 0x5B, 0x0C, 0x05, 0x5C, 0x0B, 0x05,
+ 0x5D, 0x0A, 0x05, 0x5E, 0x09, 0x05, 0x5F, 0x08, 0x42, 0x0E, 0x80, 0x01,
+ 0x54, 0x0E, 0xA0, 0x01, 0x42, 0x0E, 0x80, 0x01, 0x0A, 0x42, 0x0E, 0x5C,
+ 0x44, 0x0E, 0x1C, 0x06, 0x50, 0x06, 0x51, 0x06, 0x52, 0x06, 0x53, 0x06,
+ 0x54, 0x06, 0x55, 0x06, 0x56, 0x06, 0x57, 0x06, 0x58, 0x06, 0x59, 0x06,
+ 0x5A, 0x06, 0x5B, 0x06, 0x5C, 0x06, 0x5D, 0x06, 0x5E, 0x06, 0x5F, 0x44,
+ 0x0B, 0x0E, 0x80, 0x01,
+};
+// 0x00000000: push {r5, r6, r7, r8, r10, r11, lr}
+// 0x00000004: .cfi_def_cfa_offset: 28
+// 0x00000004: .cfi_offset: r5 at cfa-28
+// 0x00000004: .cfi_offset: r6 at cfa-24
+// 0x00000004: .cfi_offset: r7 at cfa-20
+// 0x00000004: .cfi_offset: r8 at cfa-16
+// 0x00000004: .cfi_offset: r10 at cfa-12
+// 0x00000004: .cfi_offset: r11 at cfa-8
+// 0x00000004: .cfi_offset: r14 at cfa-4
+// 0x00000004: vpush.f32 {s16-s31}
+// 0x00000008: .cfi_def_cfa_offset: 92
+// 0x00000008: .cfi_offset_extended: r80 at cfa-92
+// 0x00000008: .cfi_offset_extended: r81 at cfa-88
+// 0x00000008: .cfi_offset_extended: r82 at cfa-84
+// 0x00000008: .cfi_offset_extended: r83 at cfa-80
+// 0x00000008: .cfi_offset_extended: r84 at cfa-76
+// 0x00000008: .cfi_offset_extended: r85 at cfa-72
+// 0x00000008: .cfi_offset_extended: r86 at cfa-68
+// 0x00000008: .cfi_offset_extended: r87 at cfa-64
+// 0x00000008: .cfi_offset_extended: r88 at cfa-60
+// 0x00000008: .cfi_offset_extended: r89 at cfa-56
+// 0x00000008: .cfi_offset_extended: r90 at cfa-52
+// 0x00000008: .cfi_offset_extended: r91 at cfa-48
+// 0x00000008: .cfi_offset_extended: r92 at cfa-44
+// 0x00000008: .cfi_offset_extended: r93 at cfa-40
+// 0x00000008: .cfi_offset_extended: r94 at cfa-36
+// 0x00000008: .cfi_offset_extended: r95 at cfa-32
+// 0x00000008: sub sp, sp, #36
+// 0x0000000a: .cfi_def_cfa_offset: 128
+// 0x0000000a: str r0, [sp, #0]
+// 0x0000000c: str.w r1, [sp, #132]
+// 0x00000010: vstr.f32 s0, [sp, #136]
+// 0x00000014: str.w r2, [sp, #140]
+// 0x00000018: str.w r3, [sp, #144]
+// 0x0000001c: sub sp, sp, #32
+// 0x0000001e: .cfi_def_cfa_offset: 160
+// 0x0000001e: add sp, sp, #32
+// 0x00000020: .cfi_def_cfa_offset: 128
+// 0x00000020: .cfi_remember_state
+// 0x00000020: add sp, sp, #36
+// 0x00000022: .cfi_def_cfa_offset: 92
+// 0x00000022: vpop.f32 {s16-s31}
+// 0x00000026: .cfi_def_cfa_offset: 28
+// 0x00000026: .cfi_restore_extended: r80
+// 0x00000026: .cfi_restore_extended: r81
+// 0x00000026: .cfi_restore_extended: r82
+// 0x00000026: .cfi_restore_extended: r83
+// 0x00000026: .cfi_restore_extended: r84
+// 0x00000026: .cfi_restore_extended: r85
+// 0x00000026: .cfi_restore_extended: r86
+// 0x00000026: .cfi_restore_extended: r87
+// 0x00000026: .cfi_restore_extended: r88
+// 0x00000026: .cfi_restore_extended: r89
+// 0x00000026: .cfi_restore_extended: r90
+// 0x00000026: .cfi_restore_extended: r91
+// 0x00000026: .cfi_restore_extended: r92
+// 0x00000026: .cfi_restore_extended: r93
+// 0x00000026: .cfi_restore_extended: r94
+// 0x00000026: .cfi_restore_extended: r95
+// 0x00000026: pop {r5, r6, r7, r8, r10, r11, pc}
+// 0x0000002a: .cfi_restore_state
+// 0x0000002a: .cfi_def_cfa_offset: 128
+
+static constexpr uint8_t expected_asm_kArm64[] = {
+ 0xFF, 0x03, 0x03, 0xD1, 0xFE, 0x5F, 0x00, 0xF9, 0xFD, 0x5B, 0x00, 0xF9,
+ 0xFC, 0x57, 0x00, 0xF9, 0xFB, 0x53, 0x00, 0xF9, 0xFA, 0x4F, 0x00, 0xF9,
+ 0xF9, 0x4B, 0x00, 0xF9, 0xF8, 0x47, 0x00, 0xF9, 0xF7, 0x43, 0x00, 0xF9,
+ 0xF6, 0x3F, 0x00, 0xF9, 0xF5, 0x3B, 0x00, 0xF9, 0xF4, 0x37, 0x00, 0xF9,
+ 0xEF, 0x33, 0x00, 0xFD, 0xEE, 0x2F, 0x00, 0xFD, 0xED, 0x2B, 0x00, 0xFD,
+ 0xEC, 0x27, 0x00, 0xFD, 0xEB, 0x23, 0x00, 0xFD, 0xEA, 0x1F, 0x00, 0xFD,
+ 0xE9, 0x1B, 0x00, 0xFD, 0xE8, 0x17, 0x00, 0xFD, 0xF5, 0x03, 0x12, 0xAA,
+ 0xE0, 0x03, 0x00, 0xB9, 0xE1, 0xC7, 0x00, 0xB9, 0xE0, 0xCB, 0x00, 0xBD,
+ 0xE2, 0xCF, 0x00, 0xB9, 0xE3, 0xD3, 0x00, 0xB9, 0xFF, 0x83, 0x00, 0xD1,
+ 0xFF, 0x83, 0x00, 0x91, 0xF2, 0x03, 0x15, 0xAA, 0xFE, 0x5F, 0x40, 0xF9,
+ 0xFD, 0x5B, 0x40, 0xF9, 0xFC, 0x57, 0x40, 0xF9, 0xFB, 0x53, 0x40, 0xF9,
+ 0xFA, 0x4F, 0x40, 0xF9, 0xF9, 0x4B, 0x40, 0xF9, 0xF8, 0x47, 0x40, 0xF9,
+ 0xF7, 0x43, 0x40, 0xF9, 0xF6, 0x3F, 0x40, 0xF9, 0xF5, 0x3B, 0x40, 0xF9,
+ 0xF4, 0x37, 0x40, 0xF9, 0xEF, 0x33, 0x40, 0xFD, 0xEE, 0x2F, 0x40, 0xFD,
+ 0xED, 0x2B, 0x40, 0xFD, 0xEC, 0x27, 0x40, 0xFD, 0xEB, 0x23, 0x40, 0xFD,
+ 0xEA, 0x1F, 0x40, 0xFD, 0xE9, 0x1B, 0x40, 0xFD, 0xE8, 0x17, 0x40, 0xFD,
+ 0xFF, 0x03, 0x03, 0x91, 0xC0, 0x03, 0x5F, 0xD6,
+};
+static constexpr uint8_t expected_cfi_kArm64[] = {
+ 0x44, 0x0E, 0xC0, 0x01, 0x44, 0x9E, 0x02, 0x44, 0x9D, 0x04, 0x44, 0x9C,
+ 0x06, 0x44, 0x9B, 0x08, 0x44, 0x9A, 0x0A, 0x44, 0x99, 0x0C, 0x44, 0x98,
+ 0x0E, 0x44, 0x97, 0x10, 0x44, 0x96, 0x12, 0x44, 0x95, 0x14, 0x44, 0x94,
+ 0x16, 0x44, 0x05, 0x4F, 0x18, 0x44, 0x05, 0x4E, 0x1A, 0x44, 0x05, 0x4D,
+ 0x1C, 0x44, 0x05, 0x4C, 0x1E, 0x44, 0x05, 0x4B, 0x20, 0x44, 0x05, 0x4A,
+ 0x22, 0x44, 0x05, 0x49, 0x24, 0x44, 0x05, 0x48, 0x26, 0x5C, 0x0E, 0xE0,
+ 0x01, 0x44, 0x0E, 0xC0, 0x01, 0x0A, 0x48, 0xDE, 0x44, 0xDD, 0x44, 0xDC,
+ 0x44, 0xDB, 0x44, 0xDA, 0x44, 0xD9, 0x44, 0xD8, 0x44, 0xD7, 0x44, 0xD6,
+ 0x44, 0xD5, 0x44, 0xD4, 0x44, 0x06, 0x4F, 0x44, 0x06, 0x4E, 0x44, 0x06,
+ 0x4D, 0x44, 0x06, 0x4C, 0x44, 0x06, 0x4B, 0x44, 0x06, 0x4A, 0x44, 0x06,
+ 0x49, 0x44, 0x06, 0x48, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0xC0, 0x01,
+};
+// 0x00000000: sub sp, sp, #0xc0 (192)
+// 0x00000004: .cfi_def_cfa_offset: 192
+// 0x00000004: str lr, [sp, #184]
+// 0x00000008: .cfi_offset: r30 at cfa-8
+// 0x00000008: str x29, [sp, #176]
+// 0x0000000c: .cfi_offset: r29 at cfa-16
+// 0x0000000c: str x28, [sp, #168]
+// 0x00000010: .cfi_offset: r28 at cfa-24
+// 0x00000010: str x27, [sp, #160]
+// 0x00000014: .cfi_offset: r27 at cfa-32
+// 0x00000014: str x26, [sp, #152]
+// 0x00000018: .cfi_offset: r26 at cfa-40
+// 0x00000018: str x25, [sp, #144]
+// 0x0000001c: .cfi_offset: r25 at cfa-48
+// 0x0000001c: str x24, [sp, #136]
+// 0x00000020: .cfi_offset: r24 at cfa-56
+// 0x00000020: str x23, [sp, #128]
+// 0x00000024: .cfi_offset: r23 at cfa-64
+// 0x00000024: str x22, [sp, #120]
+// 0x00000028: .cfi_offset: r22 at cfa-72
+// 0x00000028: str x21, [sp, #112]
+// 0x0000002c: .cfi_offset: r21 at cfa-80
+// 0x0000002c: str x20, [sp, #104]
+// 0x00000030: .cfi_offset: r20 at cfa-88
+// 0x00000030: str d15, [sp, #96]
+// 0x00000034: .cfi_offset_extended: r79 at cfa-96
+// 0x00000034: str d14, [sp, #88]
+// 0x00000038: .cfi_offset_extended: r78 at cfa-104
+// 0x00000038: str d13, [sp, #80]
+// 0x0000003c: .cfi_offset_extended: r77 at cfa-112
+// 0x0000003c: str d12, [sp, #72]
+// 0x00000040: .cfi_offset_extended: r76 at cfa-120
+// 0x00000040: str d11, [sp, #64]
+// 0x00000044: .cfi_offset_extended: r75 at cfa-128
+// 0x00000044: str d10, [sp, #56]
+// 0x00000048: .cfi_offset_extended: r74 at cfa-136
+// 0x00000048: str d9, [sp, #48]
+// 0x0000004c: .cfi_offset_extended: r73 at cfa-144
+// 0x0000004c: str d8, [sp, #40]
+// 0x00000050: .cfi_offset_extended: r72 at cfa-152
+// 0x00000050: mov x21, tr
+// 0x00000054: str w0, [sp]
+// 0x00000058: str w1, [sp, #196]
+// 0x0000005c: str s0, [sp, #200]
+// 0x00000060: str w2, [sp, #204]
+// 0x00000064: str w3, [sp, #208]
+// 0x00000068: sub sp, sp, #0x20 (32)
+// 0x0000006c: .cfi_def_cfa_offset: 224
+// 0x0000006c: add sp, sp, #0x20 (32)
+// 0x00000070: .cfi_def_cfa_offset: 192
+// 0x00000070: .cfi_remember_state
+// 0x00000070: mov tr, x21
+// 0x00000074: ldr lr, [sp, #184]
+// 0x00000078: .cfi_restore: r30
+// 0x00000078: ldr x29, [sp, #176]
+// 0x0000007c: .cfi_restore: r29
+// 0x0000007c: ldr x28, [sp, #168]
+// 0x00000080: .cfi_restore: r28
+// 0x00000080: ldr x27, [sp, #160]
+// 0x00000084: .cfi_restore: r27
+// 0x00000084: ldr x26, [sp, #152]
+// 0x00000088: .cfi_restore: r26
+// 0x00000088: ldr x25, [sp, #144]
+// 0x0000008c: .cfi_restore: r25
+// 0x0000008c: ldr x24, [sp, #136]
+// 0x00000090: .cfi_restore: r24
+// 0x00000090: ldr x23, [sp, #128]
+// 0x00000094: .cfi_restore: r23
+// 0x00000094: ldr x22, [sp, #120]
+// 0x00000098: .cfi_restore: r22
+// 0x00000098: ldr x21, [sp, #112]
+// 0x0000009c: .cfi_restore: r21
+// 0x0000009c: ldr x20, [sp, #104]
+// 0x000000a0: .cfi_restore: r20
+// 0x000000a0: ldr d15, [sp, #96]
+// 0x000000a4: .cfi_restore_extended: r79
+// 0x000000a4: ldr d14, [sp, #88]
+// 0x000000a8: .cfi_restore_extended: r78
+// 0x000000a8: ldr d13, [sp, #80]
+// 0x000000ac: .cfi_restore_extended: r77
+// 0x000000ac: ldr d12, [sp, #72]
+// 0x000000b0: .cfi_restore_extended: r76
+// 0x000000b0: ldr d11, [sp, #64]
+// 0x000000b4: .cfi_restore_extended: r75
+// 0x000000b4: ldr d10, [sp, #56]
+// 0x000000b8: .cfi_restore_extended: r74
+// 0x000000b8: ldr d9, [sp, #48]
+// 0x000000bc: .cfi_restore_extended: r73
+// 0x000000bc: ldr d8, [sp, #40]
+// 0x000000c0: .cfi_restore_extended: r72
+// 0x000000c0: add sp, sp, #0xc0 (192)
+// 0x000000c4: .cfi_def_cfa_offset: 0
+// 0x000000c4: ret
+// 0x000000c8: .cfi_restore_state
+// 0x000000c8: .cfi_def_cfa_offset: 192
+
+static constexpr uint8_t expected_asm_kX86[] = {
+ 0x57, 0x56, 0x55, 0x83, 0xC4, 0xE4, 0x50, 0x89, 0x4C, 0x24, 0x34, 0xF3,
+ 0x0F, 0x11, 0x44, 0x24, 0x38, 0x89, 0x54, 0x24, 0x3C, 0x89, 0x5C, 0x24,
+ 0x40, 0x83, 0xC4, 0xE0, 0x83, 0xC4, 0x20, 0x83, 0xC4, 0x20, 0x5D, 0x5E,
+ 0x5F, 0xC3,
+};
+static constexpr uint8_t expected_cfi_kX86[] = {
+ 0x41, 0x0E, 0x08, 0x87, 0x02, 0x41, 0x0E, 0x0C, 0x86, 0x03, 0x41, 0x0E,
+ 0x10, 0x85, 0x04, 0x43, 0x0E, 0x2C, 0x41, 0x0E, 0x30, 0x55, 0x0E, 0x50,
+ 0x43, 0x0E, 0x30, 0x0A, 0x43, 0x0E, 0x10, 0x41, 0x0E, 0x0C, 0xC5, 0x41,
+ 0x0E, 0x08, 0xC6, 0x41, 0x0E, 0x04, 0xC7, 0x41, 0x0B, 0x0E, 0x30,
+};
+// 0x00000000: push edi
+// 0x00000001: .cfi_def_cfa_offset: 8
+// 0x00000001: .cfi_offset: r7 at cfa-8
+// 0x00000001: push esi
+// 0x00000002: .cfi_def_cfa_offset: 12
+// 0x00000002: .cfi_offset: r6 at cfa-12
+// 0x00000002: push ebp
+// 0x00000003: .cfi_def_cfa_offset: 16
+// 0x00000003: .cfi_offset: r5 at cfa-16
+// 0x00000003: add esp, -28
+// 0x00000006: .cfi_def_cfa_offset: 44
+// 0x00000006: push eax
+// 0x00000007: .cfi_def_cfa_offset: 48
+// 0x00000007: mov [esp + 52], ecx
+// 0x0000000b: movss [esp + 56], xmm0
+// 0x00000011: mov [esp + 60], edx
+// 0x00000015: mov [esp + 64], ebx
+// 0x00000019: add esp, -32
+// 0x0000001c: .cfi_def_cfa_offset: 80
+// 0x0000001c: add esp, 32
+// 0x0000001f: .cfi_def_cfa_offset: 48
+// 0x0000001f: .cfi_remember_state
+// 0x0000001f: add esp, 32
+// 0x00000022: .cfi_def_cfa_offset: 16
+// 0x00000022: pop ebp
+// 0x00000023: .cfi_def_cfa_offset: 12
+// 0x00000023: .cfi_restore: r5
+// 0x00000023: pop esi
+// 0x00000024: .cfi_def_cfa_offset: 8
+// 0x00000024: .cfi_restore: r6
+// 0x00000024: pop edi
+// 0x00000025: .cfi_def_cfa_offset: 4
+// 0x00000025: .cfi_restore: r7
+// 0x00000025: ret
+// 0x00000026: .cfi_restore_state
+// 0x00000026: .cfi_def_cfa_offset: 48
+
+static constexpr uint8_t expected_asm_kX86_64[] = {
+ 0x41, 0x57, 0x41, 0x56, 0x41, 0x55, 0x41, 0x54, 0x55, 0x53, 0x48, 0x83,
+ 0xEC, 0x48, 0xF2, 0x44, 0x0F, 0x11, 0x7C, 0x24, 0x40, 0xF2, 0x44, 0x0F,
+ 0x11, 0x74, 0x24, 0x38, 0xF2, 0x44, 0x0F, 0x11, 0x6C, 0x24, 0x30, 0xF2,
+ 0x44, 0x0F, 0x11, 0x64, 0x24, 0x28, 0x89, 0x3C, 0x24, 0x89, 0xB4, 0x24,
+ 0x84, 0x00, 0x00, 0x00, 0xF3, 0x0F, 0x11, 0x84, 0x24, 0x88, 0x00, 0x00,
+ 0x00, 0x89, 0x94, 0x24, 0x8C, 0x00, 0x00, 0x00, 0x89, 0x8C, 0x24, 0x90,
+ 0x00, 0x00, 0x00, 0x48, 0x83, 0xC4, 0xE0, 0x48, 0x83, 0xC4, 0x20, 0xF2,
+ 0x44, 0x0F, 0x10, 0x64, 0x24, 0x28, 0xF2, 0x44, 0x0F, 0x10, 0x6C, 0x24,
+ 0x30, 0xF2, 0x44, 0x0F, 0x10, 0x74, 0x24, 0x38, 0xF2, 0x44, 0x0F, 0x10,
+ 0x7C, 0x24, 0x40, 0x48, 0x83, 0xC4, 0x48, 0x5B, 0x5D, 0x41, 0x5C, 0x41,
+ 0x5D, 0x41, 0x5E, 0x41, 0x5F, 0xC3,
+};
+static constexpr uint8_t expected_cfi_kX86_64[] = {
+ 0x42, 0x0E, 0x10, 0x8F, 0x04, 0x42, 0x0E, 0x18, 0x8E, 0x06, 0x42, 0x0E,
+ 0x20, 0x8D, 0x08, 0x42, 0x0E, 0x28, 0x8C, 0x0A, 0x41, 0x0E, 0x30, 0x86,
+ 0x0C, 0x41, 0x0E, 0x38, 0x83, 0x0E, 0x44, 0x0E, 0x80, 0x01, 0x47, 0xA0,
+ 0x10, 0x47, 0x9F, 0x12, 0x47, 0x9E, 0x14, 0x47, 0x9D, 0x16, 0x65, 0x0E,
+ 0xA0, 0x01, 0x44, 0x0E, 0x80, 0x01, 0x0A, 0x47, 0xDD, 0x47, 0xDE, 0x47,
+ 0xDF, 0x47, 0xE0, 0x44, 0x0E, 0x38, 0x41, 0x0E, 0x30, 0xC3, 0x41, 0x0E,
+ 0x28, 0xC6, 0x42, 0x0E, 0x20, 0xCC, 0x42, 0x0E, 0x18, 0xCD, 0x42, 0x0E,
+ 0x10, 0xCE, 0x42, 0x0E, 0x08, 0xCF, 0x41, 0x0B, 0x0E, 0x80, 0x01,
+};
+// 0x00000000: push r15
+// 0x00000002: .cfi_def_cfa_offset: 16
+// 0x00000002: .cfi_offset: r15 at cfa-16
+// 0x00000002: push r14
+// 0x00000004: .cfi_def_cfa_offset: 24
+// 0x00000004: .cfi_offset: r14 at cfa-24
+// 0x00000004: push r13
+// 0x00000006: .cfi_def_cfa_offset: 32
+// 0x00000006: .cfi_offset: r13 at cfa-32
+// 0x00000006: push r12
+// 0x00000008: .cfi_def_cfa_offset: 40
+// 0x00000008: .cfi_offset: r12 at cfa-40
+// 0x00000008: push rbp
+// 0x00000009: .cfi_def_cfa_offset: 48
+// 0x00000009: .cfi_offset: r6 at cfa-48
+// 0x00000009: push rbx
+// 0x0000000a: .cfi_def_cfa_offset: 56
+// 0x0000000a: .cfi_offset: r3 at cfa-56
+// 0x0000000a: subq rsp, 72
+// 0x0000000e: .cfi_def_cfa_offset: 128
+// 0x0000000e: movsd [rsp + 64], xmm15
+// 0x00000015: .cfi_offset: r32 at cfa-64
+// 0x00000015: movsd [rsp + 56], xmm14
+// 0x0000001c: .cfi_offset: r31 at cfa-72
+// 0x0000001c: movsd [rsp + 48], xmm13
+// 0x00000023: .cfi_offset: r30 at cfa-80
+// 0x00000023: movsd [rsp + 40], xmm12
+// 0x0000002a: .cfi_offset: r29 at cfa-88
+// 0x0000002a: mov [rsp], edi
+// 0x0000002d: mov [rsp + 132], esi
+// 0x00000034: movss [rsp + 136], xmm0
+// 0x0000003d: mov [rsp + 140], edx
+// 0x00000044: mov [rsp + 144], ecx
+// 0x0000004b: addq rsp, -32
+// 0x0000004f: .cfi_def_cfa_offset: 160
+// 0x0000004f: addq rsp, 32
+// 0x00000053: .cfi_def_cfa_offset: 128
+// 0x00000053: .cfi_remember_state
+// 0x00000053: movsd xmm12, [rsp + 40]
+// 0x0000005a: .cfi_restore: r29
+// 0x0000005a: movsd xmm13, [rsp + 48]
+// 0x00000061: .cfi_restore: r30
+// 0x00000061: movsd xmm14, [rsp + 56]
+// 0x00000068: .cfi_restore: r31
+// 0x00000068: movsd xmm15, [rsp + 64]
+// 0x0000006f: .cfi_restore: r32
+// 0x0000006f: addq rsp, 72
+// 0x00000073: .cfi_def_cfa_offset: 56
+// 0x00000073: pop rbx
+// 0x00000074: .cfi_def_cfa_offset: 48
+// 0x00000074: .cfi_restore: r3
+// 0x00000074: pop rbp
+// 0x00000075: .cfi_def_cfa_offset: 40
+// 0x00000075: .cfi_restore: r6
+// 0x00000075: pop r12
+// 0x00000077: .cfi_def_cfa_offset: 32
+// 0x00000077: .cfi_restore: r12
+// 0x00000077: pop r13
+// 0x00000079: .cfi_def_cfa_offset: 24
+// 0x00000079: .cfi_restore: r13
+// 0x00000079: pop r14
+// 0x0000007b: .cfi_def_cfa_offset: 16
+// 0x0000007b: .cfi_restore: r14
+// 0x0000007b: pop r15
+// 0x0000007d: .cfi_def_cfa_offset: 8
+// 0x0000007d: .cfi_restore: r15
+// 0x0000007d: ret
+// 0x0000007e: .cfi_restore_state
+// 0x0000007e: .cfi_def_cfa_offset: 128
+
+static constexpr uint8_t expected_asm_kMips[] = {
+ 0xC0, 0xFF, 0xBD, 0x27, 0x3C, 0x00, 0xBF, 0xAF, 0x38, 0x00, 0xB8, 0xAF,
+ 0x34, 0x00, 0xAF, 0xAF, 0x30, 0x00, 0xAE, 0xAF, 0x2C, 0x00, 0xAD, 0xAF,
+ 0x28, 0x00, 0xAC, 0xAF, 0x24, 0x00, 0xAB, 0xAF, 0x20, 0x00, 0xAA, 0xAF,
+ 0x1C, 0x00, 0xA9, 0xAF, 0x18, 0x00, 0xA8, 0xAF, 0x00, 0x00, 0xA4, 0xAF,
+ 0x44, 0x00, 0xA5, 0xAF, 0x48, 0x00, 0xA6, 0xAF, 0x4C, 0x00, 0xA7, 0xAF,
+ 0xE0, 0xFF, 0xBD, 0x27, 0x20, 0x00, 0xBD, 0x27, 0x18, 0x00, 0xA8, 0x8F,
+ 0x1C, 0x00, 0xA9, 0x8F, 0x20, 0x00, 0xAA, 0x8F, 0x24, 0x00, 0xAB, 0x8F,
+ 0x28, 0x00, 0xAC, 0x8F, 0x2C, 0x00, 0xAD, 0x8F, 0x30, 0x00, 0xAE, 0x8F,
+ 0x34, 0x00, 0xAF, 0x8F, 0x38, 0x00, 0xB8, 0x8F, 0x3C, 0x00, 0xBF, 0x8F,
+ 0x40, 0x00, 0xBD, 0x27, 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00,
+};
+static constexpr uint8_t expected_cfi_kMips[] = {
+ 0x44, 0x0E, 0x40, 0x44, 0x9F, 0x01, 0x44, 0x98, 0x02, 0x44, 0x8F, 0x03,
+ 0x44, 0x8E, 0x04, 0x44, 0x8D, 0x05, 0x44, 0x8C, 0x06, 0x44, 0x8B, 0x07,
+ 0x44, 0x8A, 0x08, 0x44, 0x89, 0x09, 0x44, 0x88, 0x0A, 0x54, 0x0E, 0x60,
+ 0x44, 0x0E, 0x40, 0x0A, 0x44, 0xC8, 0x44, 0xC9, 0x44, 0xCA, 0x44, 0xCB,
+ 0x44, 0xCC, 0x44, 0xCD, 0x44, 0xCE, 0x44, 0xCF, 0x44, 0xD8, 0x44, 0xDF,
+ 0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40,
+};
+// 0x00000000: addiu r29, r29, -64
+// 0x00000004: .cfi_def_cfa_offset: 64
+// 0x00000004: sw r31, +60(r29)
+// 0x00000008: .cfi_offset: r31 at cfa-4
+// 0x00000008: sw r24, +56(r29)
+// 0x0000000c: .cfi_offset: r24 at cfa-8
+// 0x0000000c: sw r15, +52(r29)
+// 0x00000010: .cfi_offset: r15 at cfa-12
+// 0x00000010: sw r14, +48(r29)
+// 0x00000014: .cfi_offset: r14 at cfa-16
+// 0x00000014: sw r13, +44(r29)
+// 0x00000018: .cfi_offset: r13 at cfa-20
+// 0x00000018: sw r12, +40(r29)
+// 0x0000001c: .cfi_offset: r12 at cfa-24
+// 0x0000001c: sw r11, +36(r29)
+// 0x00000020: .cfi_offset: r11 at cfa-28
+// 0x00000020: sw r10, +32(r29)
+// 0x00000024: .cfi_offset: r10 at cfa-32
+// 0x00000024: sw r9, +28(r29)
+// 0x00000028: .cfi_offset: r9 at cfa-36
+// 0x00000028: sw r8, +24(r29)
+// 0x0000002c: .cfi_offset: r8 at cfa-40
+// 0x0000002c: sw r4, +0(r29)
+// 0x00000030: sw r5, +68(r29)
+// 0x00000034: sw r6, +72(r29)
+// 0x00000038: sw r7, +76(r29)
+// 0x0000003c: addiu r29, r29, -32
+// 0x00000040: .cfi_def_cfa_offset: 96
+// 0x00000040: addiu r29, r29, 32
+// 0x00000044: .cfi_def_cfa_offset: 64
+// 0x00000044: .cfi_remember_state
+// 0x00000044: lw r8, +24(r29)
+// 0x00000048: .cfi_restore: r8
+// 0x00000048: lw r9, +28(r29)
+// 0x0000004c: .cfi_restore: r9
+// 0x0000004c: lw r10, +32(r29)
+// 0x00000050: .cfi_restore: r10
+// 0x00000050: lw r11, +36(r29)
+// 0x00000054: .cfi_restore: r11
+// 0x00000054: lw r12, +40(r29)
+// 0x00000058: .cfi_restore: r12
+// 0x00000058: lw r13, +44(r29)
+// 0x0000005c: .cfi_restore: r13
+// 0x0000005c: lw r14, +48(r29)
+// 0x00000060: .cfi_restore: r14
+// 0x00000060: lw r15, +52(r29)
+// 0x00000064: .cfi_restore: r15
+// 0x00000064: lw r24, +56(r29)
+// 0x00000068: .cfi_restore: r24
+// 0x00000068: lw r31, +60(r29)
+// 0x0000006c: .cfi_restore: r31
+// 0x0000006c: addiu r29, r29, 64
+// 0x00000070: .cfi_def_cfa_offset: 0
+// 0x00000070: jalr r0, r31
+// 0x00000074: nop
+// 0x00000078: .cfi_restore_state
+// 0x00000078: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kMips64[] = {
+ 0xA0, 0xFF, 0xBD, 0x67, 0x58, 0x00, 0xBF, 0xFF, 0x50, 0x00, 0xBE, 0xFF,
+ 0x48, 0x00, 0xBC, 0xFF, 0x40, 0x00, 0xB7, 0xFF, 0x38, 0x00, 0xB6, 0xFF,
+ 0x30, 0x00, 0xB5, 0xFF, 0x28, 0x00, 0xB4, 0xFF, 0x20, 0x00, 0xB3, 0xFF,
+ 0x18, 0x00, 0xB2, 0xFF, 0x00, 0x00, 0xA4, 0xAF, 0x64, 0x00, 0xA5, 0xAF,
+ 0x68, 0x00, 0xAE, 0xE7, 0x6C, 0x00, 0xA7, 0xAF, 0x70, 0x00, 0xA8, 0xAF,
+ 0xE0, 0xFF, 0xBD, 0x67, 0x20, 0x00, 0xBD, 0x67, 0x18, 0x00, 0xB2, 0xDF,
+ 0x20, 0x00, 0xB3, 0xDF, 0x28, 0x00, 0xB4, 0xDF, 0x30, 0x00, 0xB5, 0xDF,
+ 0x38, 0x00, 0xB6, 0xDF, 0x40, 0x00, 0xB7, 0xDF, 0x48, 0x00, 0xBC, 0xDF,
+ 0x50, 0x00, 0xBE, 0xDF, 0x58, 0x00, 0xBF, 0xDF, 0x60, 0x00, 0xBD, 0x67,
+ 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00,
+};
+static constexpr uint8_t expected_cfi_kMips64[] = {
+ 0x44, 0x0E, 0x60, 0x44, 0x9F, 0x02, 0x44, 0x9E, 0x04, 0x44, 0x9C, 0x06,
+ 0x44, 0x97, 0x08, 0x44, 0x96, 0x0A, 0x44, 0x95, 0x0C, 0x44, 0x94, 0x0E,
+ 0x44, 0x93, 0x10, 0x44, 0x92, 0x12, 0x58, 0x0E, 0x80, 0x01, 0x44, 0x0E,
+ 0x60, 0x0A, 0x44, 0xD2, 0x44, 0xD3, 0x44, 0xD4, 0x44, 0xD5, 0x44, 0xD6,
+ 0x44, 0xD7, 0x44, 0xDC, 0x44, 0xDE, 0x44, 0xDF, 0x44, 0x0E, 0x00, 0x48,
+ 0x0B, 0x0E, 0x60,
+};
+// 0x00000000: daddiu r29, r29, -96
+// 0x00000004: .cfi_def_cfa_offset: 96
+// 0x00000004: sd r31, +88(r29)
+// 0x00000008: .cfi_offset: r31 at cfa-8
+// 0x00000008: sd r30, +80(r29)
+// 0x0000000c: .cfi_offset: r30 at cfa-16
+// 0x0000000c: sd r28, +72(r29)
+// 0x00000010: .cfi_offset: r28 at cfa-24
+// 0x00000010: sd r23, +64(r29)
+// 0x00000014: .cfi_offset: r23 at cfa-32
+// 0x00000014: sd r22, +56(r29)
+// 0x00000018: .cfi_offset: r22 at cfa-40
+// 0x00000018: sd r21, +48(r29)
+// 0x0000001c: .cfi_offset: r21 at cfa-48
+// 0x0000001c: sd r20, +40(r29)
+// 0x00000020: .cfi_offset: r20 at cfa-56
+// 0x00000020: sd r19, +32(r29)
+// 0x00000024: .cfi_offset: r19 at cfa-64
+// 0x00000024: sd r18, +24(r29)
+// 0x00000028: .cfi_offset: r18 at cfa-72
+// 0x00000028: sw r4, +0(r29)
+// 0x0000002c: sw r5, +100(r29)
+// 0x00000030: swc1 f14, +104(r29)
+// 0x00000034: sw r7, +108(r29)
+// 0x00000038: sw r8, +112(r29)
+// 0x0000003c: daddiu r29, r29, -32
+// 0x00000040: .cfi_def_cfa_offset: 128
+// 0x00000040: daddiu r29, r29, 32
+// 0x00000044: .cfi_def_cfa_offset: 96
+// 0x00000044: .cfi_remember_state
+// 0x00000044: ld r18, +24(r29)
+// 0x00000048: .cfi_restore: r18
+// 0x00000048: ld r19, +32(r29)
+// 0x0000004c: .cfi_restore: r19
+// 0x0000004c: ld r20, +40(r29)
+// 0x00000050: .cfi_restore: r20
+// 0x00000050: ld r21, +48(r29)
+// 0x00000054: .cfi_restore: r21
+// 0x00000054: ld r22, +56(r29)
+// 0x00000058: .cfi_restore: r22
+// 0x00000058: ld r23, +64(r29)
+// 0x0000005c: .cfi_restore: r23
+// 0x0000005c: ld r28, +72(r29)
+// 0x00000060: .cfi_restore: r28
+// 0x00000060: ld r30, +80(r29)
+// 0x00000064: .cfi_restore: r30
+// 0x00000064: ld r31, +88(r29)
+// 0x00000068: .cfi_restore: r31
+// 0x00000068: daddiu r29, r29, 96
+// 0x0000006c: .cfi_def_cfa_offset: 0
+// 0x0000006c: jr r31
+// 0x00000070: nop
+// 0x00000074: .cfi_restore_state
+// 0x00000074: .cfi_def_cfa_offset: 96
+
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index 2d9e03a718..8a14038074 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -28,6 +28,7 @@
#include "compiled_method.h"
#include "dex_file-inl.h"
#include "driver/compiler_driver.h"
+#include "driver/compiler_options.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "jni_env_ext.h"
#include "mirror/art_method.h"
@@ -93,7 +94,7 @@ CompiledMethod* ArtJniCompileMethodInternal(CompilerDriver* driver,
// Assembler that holds generated instructions
std::unique_ptr<Assembler> jni_asm(Assembler::Create(instruction_set));
- jni_asm->InitializeFrameDescriptionEntry();
+ jni_asm->cfi().SetEnabled(driver->GetCompilerOptions().GetIncludeDebugSymbols());
// Offsets into data structures
// TODO: if cross compiling these offsets are for the host not the target
@@ -105,6 +106,7 @@ CompiledMethod* ArtJniCompileMethodInternal(CompilerDriver* driver,
const size_t frame_size(main_jni_conv->FrameSize());
const std::vector<ManagedRegister>& callee_save_regs = main_jni_conv->CalleeSaveRegisters();
__ BuildFrame(frame_size, mr_conv->MethodRegister(), callee_save_regs, mr_conv->EntrySpills());
+ DCHECK_EQ(jni_asm->cfi().GetCurrentCFAOffset(), static_cast<int>(frame_size));
// 2. Set up the HandleScope
mr_conv->ResetIterator(FrameOffset(frame_size));
@@ -424,7 +426,9 @@ CompiledMethod* ArtJniCompileMethodInternal(CompilerDriver* driver,
// 16. Remove activation - need to restore callee save registers since the GC may have changed
// them.
+ DCHECK_EQ(jni_asm->cfi().GetCurrentCFAOffset(), static_cast<int>(frame_size));
__ RemoveFrame(frame_size, callee_save_regs);
+ DCHECK_EQ(jni_asm->cfi().GetCurrentCFAOffset(), static_cast<int>(frame_size));
// 17. Finalize code generation
__ EmitSlowPaths();
@@ -432,19 +436,19 @@ CompiledMethod* ArtJniCompileMethodInternal(CompilerDriver* driver,
std::vector<uint8_t> managed_code(cs);
MemoryRegion code(&managed_code[0], managed_code.size());
__ FinalizeInstructions(code);
- jni_asm->FinalizeFrameDescriptionEntry();
- std::vector<uint8_t>* fde(jni_asm->GetFrameDescriptionEntry());
- ArrayRef<const uint8_t> cfi_ref;
- if (fde != nullptr) {
- cfi_ref = ArrayRef<const uint8_t>(*fde);
- }
- return CompiledMethod::SwapAllocCompiledMethodCFI(driver,
- instruction_set,
- ArrayRef<const uint8_t>(managed_code),
- frame_size,
- main_jni_conv->CoreSpillMask(),
- main_jni_conv->FpSpillMask(),
- cfi_ref);
+
+ return CompiledMethod::SwapAllocCompiledMethod(driver,
+ instruction_set,
+ ArrayRef<const uint8_t>(managed_code),
+ frame_size,
+ main_jni_conv->CoreSpillMask(),
+ main_jni_conv->FpSpillMask(),
+ nullptr, // src_mapping_table.
+ ArrayRef<const uint8_t>(), // mapping_table.
+ ArrayRef<const uint8_t>(), // vmap_table.
+ ArrayRef<const uint8_t>(), // native_gc_map.
+ ArrayRef<const uint8_t>(*jni_asm->cfi().data()),
+ ArrayRef<const LinkerPatch>());
}
// Copy a single parameter from the managed to the JNI calling convention
diff --git a/compiler/jni/quick/mips64/calling_convention_mips64.cc b/compiler/jni/quick/mips64/calling_convention_mips64.cc
index 17325d6d49..d446867d32 100644
--- a/compiler/jni/quick/mips64/calling_convention_mips64.cc
+++ b/compiler/jni/quick/mips64/calling_convention_mips64.cc
@@ -126,25 +126,20 @@ const ManagedRegisterEntrySpills& Mips64ManagedRuntimeCallingConvention::EntrySp
Mips64JniCallingConvention::Mips64JniCallingConvention(bool is_static, bool is_synchronized,
const char* shorty)
: JniCallingConvention(is_static, is_synchronized, shorty, kFramePointerSize) {
- callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S0));
- callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S1));
callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S2));
callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S3));
callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S4));
callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S5));
callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S6));
callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S7));
-
callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(GP));
- callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(SP));
callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S8));
}
uint32_t Mips64JniCallingConvention::CoreSpillMask() const {
// Compute spill mask to agree with callee saves initialized in the constructor
uint32_t result = 0;
- result = 1 << S0 | 1 << S1 | 1 << S2 | 1 << S3 | 1 << S4 | 1 << S5 | 1 << S6 |
- 1 << S7 | 1 << GP | 1 << SP | 1 << S8;
+ result = 1 << S2 | 1 << S3 | 1 << S4 | 1 << S5 | 1 << S6 | 1 << S7 | 1 << GP | 1 << S8 | 1 << RA;
return result;
}
diff --git a/compiler/linker/arm/relative_patcher_arm_base.cc b/compiler/linker/arm/relative_patcher_arm_base.cc
new file mode 100644
index 0000000000..ceace824ea
--- /dev/null
+++ b/compiler/linker/arm/relative_patcher_arm_base.cc
@@ -0,0 +1,182 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "linker/arm/relative_patcher_arm_base.h"
+
+#include "compiled_method.h"
+#include "oat.h"
+#include "output_stream.h"
+
+namespace art {
+namespace linker {
+
+uint32_t ArmBaseRelativePatcher::ReserveSpace(uint32_t offset,
+ const CompiledMethod* compiled_method,
+ MethodReference method_ref) {
+ return ReserveSpaceInternal(offset, compiled_method, method_ref, 0u);
+}
+
+uint32_t ArmBaseRelativePatcher::ReserveSpaceEnd(uint32_t offset) {
+ // NOTE: The final thunk can be reserved from InitCodeMethodVisitor::EndClass() while it
+ // may be written early by WriteCodeMethodVisitor::VisitMethod() for a deduplicated chunk
+ // of code. To avoid any alignment discrepancies for the final chunk, we always align the
+ // offset after reserving of writing any chunk.
+ uint32_t aligned_offset = CompiledMethod::AlignCode(offset, instruction_set_);
+ bool needs_thunk = ReserveSpaceProcessPatches(aligned_offset, MethodReference(nullptr, 0u),
+ aligned_offset);
+ if (needs_thunk) {
+ thunk_locations_.push_back(aligned_offset);
+ offset = CompiledMethod::AlignCode(aligned_offset + thunk_code_.size(), instruction_set_);
+ }
+ return offset;
+}
+
+uint32_t ArmBaseRelativePatcher::WriteThunks(OutputStream* out, uint32_t offset) {
+ if (current_thunk_to_write_ == thunk_locations_.size()) {
+ return offset;
+ }
+ uint32_t aligned_offset = CompiledMethod::AlignCode(offset, instruction_set_);
+ if (UNLIKELY(aligned_offset == thunk_locations_[current_thunk_to_write_])) {
+ ++current_thunk_to_write_;
+ uint32_t aligned_code_delta = aligned_offset - offset;
+ if (aligned_code_delta != 0u && !WriteCodeAlignment(out, aligned_code_delta)) {
+ return 0u;
+ }
+ if (UNLIKELY(!WriteRelCallThunk(out, ArrayRef<const uint8_t>(thunk_code_)))) {
+ return 0u;
+ }
+ uint32_t thunk_end_offset = aligned_offset + thunk_code_.size();
+ // Align after writing chunk, see the ReserveSpace() above.
+ offset = CompiledMethod::AlignCode(thunk_end_offset, instruction_set_);
+ aligned_code_delta = offset - thunk_end_offset;
+ if (aligned_code_delta != 0u && !WriteCodeAlignment(out, aligned_code_delta)) {
+ return 0u;
+ }
+ }
+ return offset;
+}
+
+ArmBaseRelativePatcher::ArmBaseRelativePatcher(RelativePatcherTargetProvider* provider,
+ InstructionSet instruction_set,
+ std::vector<uint8_t> thunk_code,
+ uint32_t max_positive_displacement,
+ uint32_t max_negative_displacement)
+ : provider_(provider), instruction_set_(instruction_set), thunk_code_(thunk_code),
+ max_positive_displacement_(max_positive_displacement),
+ max_negative_displacement_(max_negative_displacement),
+ thunk_locations_(), current_thunk_to_write_(0u), unprocessed_patches_() {
+}
+
+uint32_t ArmBaseRelativePatcher::ReserveSpaceInternal(uint32_t offset,
+ const CompiledMethod* compiled_method,
+ MethodReference method_ref,
+ uint32_t max_extra_space) {
+ DCHECK(compiled_method->GetQuickCode() != nullptr);
+ uint32_t quick_code_size = compiled_method->GetQuickCode()->size();
+ uint32_t quick_code_offset = compiled_method->AlignCode(offset) + sizeof(OatQuickMethodHeader);
+ uint32_t next_aligned_offset = compiled_method->AlignCode(quick_code_offset + quick_code_size);
+ // Adjust for extra space required by the subclass.
+ next_aligned_offset = compiled_method->AlignCode(next_aligned_offset + max_extra_space);
+ // TODO: ignore unprocessed patches targeting this method if they can reach quick_code_offset.
+ // We need the MethodReference for that.
+ if (!unprocessed_patches_.empty() &&
+ next_aligned_offset - unprocessed_patches_.front().second > max_positive_displacement_) {
+ bool needs_thunk = ReserveSpaceProcessPatches(quick_code_offset, method_ref,
+ next_aligned_offset);
+ if (needs_thunk) {
+ // A single thunk will cover all pending patches.
+ unprocessed_patches_.clear();
+ uint32_t thunk_location = compiled_method->AlignCode(offset);
+ thunk_locations_.push_back(thunk_location);
+ offset = CompiledMethod::AlignCode(thunk_location + thunk_code_.size(), instruction_set_);
+ }
+ }
+ for (const LinkerPatch& patch : compiled_method->GetPatches()) {
+ if (patch.Type() == kLinkerPatchCallRelative) {
+ unprocessed_patches_.emplace_back(patch.TargetMethod(),
+ quick_code_offset + patch.LiteralOffset());
+ }
+ }
+ return offset;
+}
+
+uint32_t ArmBaseRelativePatcher::CalculateDisplacement(uint32_t patch_offset,
+ uint32_t target_offset) {
+ // Unsigned arithmetic with its well-defined overflow behavior is just fine here.
+ uint32_t displacement = target_offset - patch_offset;
+ // NOTE: With unsigned arithmetic we do mean to use && rather than || below.
+ if (displacement > max_positive_displacement_ && displacement < -max_negative_displacement_) {
+ // Unwritten thunks have higher offsets, check if it's within range.
+ DCHECK(current_thunk_to_write_ == thunk_locations_.size() ||
+ thunk_locations_[current_thunk_to_write_] > patch_offset);
+ if (current_thunk_to_write_ != thunk_locations_.size() &&
+ thunk_locations_[current_thunk_to_write_] - patch_offset < max_positive_displacement_) {
+ displacement = thunk_locations_[current_thunk_to_write_] - patch_offset;
+ } else {
+ // We must have a previous thunk then.
+ DCHECK_NE(current_thunk_to_write_, 0u);
+ DCHECK_LT(thunk_locations_[current_thunk_to_write_ - 1], patch_offset);
+ displacement = thunk_locations_[current_thunk_to_write_ - 1] - patch_offset;
+ DCHECK(displacement >= -max_negative_displacement_);
+ }
+ }
+ return displacement;
+}
+
+bool ArmBaseRelativePatcher::ReserveSpaceProcessPatches(uint32_t quick_code_offset,
+ MethodReference method_ref,
+ uint32_t next_aligned_offset) {
+ // Process as many patches as possible, stop only on unresolved targets or calls too far back.
+ while (!unprocessed_patches_.empty()) {
+ MethodReference patch_ref = unprocessed_patches_.front().first;
+ uint32_t patch_offset = unprocessed_patches_.front().second;
+ DCHECK(thunk_locations_.empty() || thunk_locations_.back() <= patch_offset);
+ if (patch_ref.dex_file == method_ref.dex_file &&
+ patch_ref.dex_method_index == method_ref.dex_method_index) {
+ DCHECK_GT(quick_code_offset, patch_offset);
+ if (quick_code_offset - patch_offset > max_positive_displacement_) {
+ return true;
+ }
+ } else {
+ auto result = provider_->FindMethodOffset(patch_ref);
+ if (!result.first) {
+ // If still unresolved, check if we have a thunk within range.
+ if (thunk_locations_.empty() ||
+ patch_offset - thunk_locations_.back() > max_negative_displacement_) {
+ return next_aligned_offset - patch_offset > max_positive_displacement_;
+ }
+ } else {
+ uint32_t target_offset = result.second - CompiledCode::CodeDelta(instruction_set_);
+ if (target_offset >= patch_offset) {
+ DCHECK_LE(target_offset - patch_offset, max_positive_displacement_);
+ } else {
+ // When calling back, check if we have a thunk that's closer than the actual target.
+ if (!thunk_locations_.empty()) {
+ target_offset = std::max(target_offset, thunk_locations_.back());
+ }
+ if (patch_offset - target_offset > max_negative_displacement_) {
+ return true;
+ }
+ }
+ }
+ }
+ unprocessed_patches_.pop_front();
+ }
+ return false;
+}
+
+} // namespace linker
+} // namespace art
diff --git a/compiler/linker/arm/relative_patcher_arm_base.h b/compiler/linker/arm/relative_patcher_arm_base.h
new file mode 100644
index 0000000000..f80dd962ce
--- /dev/null
+++ b/compiler/linker/arm/relative_patcher_arm_base.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_ARM_BASE_H_
+#define ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_ARM_BASE_H_
+
+#include <deque>
+
+#include "linker/relative_patcher.h"
+#include "method_reference.h"
+
+namespace art {
+namespace linker {
+
+class ArmBaseRelativePatcher : public RelativePatcher {
+ public:
+ uint32_t ReserveSpace(uint32_t offset, const CompiledMethod* compiled_method,
+ MethodReference method_ref) OVERRIDE;
+ uint32_t ReserveSpaceEnd(uint32_t offset) OVERRIDE;
+ uint32_t WriteThunks(OutputStream* out, uint32_t offset) OVERRIDE;
+
+ protected:
+ ArmBaseRelativePatcher(RelativePatcherTargetProvider* provider,
+ InstructionSet instruction_set, std::vector<uint8_t> thunk_code,
+ uint32_t max_positive_displacement, uint32_t max_negative_displacement);
+
+ uint32_t ReserveSpaceInternal(uint32_t offset, const CompiledMethod* compiled_method,
+ MethodReference method_ref, uint32_t max_extra_space);
+ uint32_t CalculateDisplacement(uint32_t patch_offset, uint32_t target_offset);
+
+ private:
+ bool ReserveSpaceProcessPatches(uint32_t quick_code_offset, MethodReference method_ref,
+ uint32_t next_aligned_offset);
+
+ RelativePatcherTargetProvider* const provider_;
+ const InstructionSet instruction_set_;
+ const std::vector<uint8_t> thunk_code_;
+ const uint32_t max_positive_displacement_;
+ const uint32_t max_negative_displacement_;
+ std::vector<uint32_t> thunk_locations_;
+ size_t current_thunk_to_write_;
+
+ // ReserveSpace() tracks unprocessed patches.
+ typedef std::pair<MethodReference, uint32_t> UnprocessedPatch;
+ std::deque<UnprocessedPatch> unprocessed_patches_;
+
+ friend class Arm64RelativePatcherTest;
+ friend class Thumb2RelativePatcherTest;
+
+ DISALLOW_COPY_AND_ASSIGN(ArmBaseRelativePatcher);
+};
+
+} // namespace linker
+} // namespace art
+
+#endif // ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_ARM_BASE_H_
diff --git a/compiler/linker/arm/relative_patcher_thumb2.cc b/compiler/linker/arm/relative_patcher_thumb2.cc
new file mode 100644
index 0000000000..b17cbca2d2
--- /dev/null
+++ b/compiler/linker/arm/relative_patcher_thumb2.cc
@@ -0,0 +1,118 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "linker/arm/relative_patcher_thumb2.h"
+
+#include "compiled_method.h"
+#include "mirror/art_method.h"
+#include "utils/arm/assembler_thumb2.h"
+
+namespace art {
+namespace linker {
+
+Thumb2RelativePatcher::Thumb2RelativePatcher(RelativePatcherTargetProvider* provider)
+ : ArmBaseRelativePatcher(provider, kThumb2, CompileThunkCode(),
+ kMaxPositiveDisplacement, kMaxNegativeDisplacement) {
+}
+
+void Thumb2RelativePatcher::PatchCall(std::vector<uint8_t>* code, uint32_t literal_offset,
+ uint32_t patch_offset, uint32_t target_offset) {
+ DCHECK_LE(literal_offset + 4u, code->size());
+ DCHECK_EQ(literal_offset & 1u, 0u);
+ DCHECK_EQ(patch_offset & 1u, 0u);
+ DCHECK_EQ(target_offset & 1u, 1u); // Thumb2 mode bit.
+ uint32_t displacement = CalculateDisplacement(patch_offset, target_offset & ~1u);
+ displacement -= kPcDisplacement; // The base PC is at the end of the 4-byte patch.
+ DCHECK_EQ(displacement & 1u, 0u);
+ DCHECK((displacement >> 24) == 0u || (displacement >> 24) == 255u); // 25-bit signed.
+ uint32_t signbit = (displacement >> 31) & 0x1;
+ uint32_t i1 = (displacement >> 23) & 0x1;
+ uint32_t i2 = (displacement >> 22) & 0x1;
+ uint32_t imm10 = (displacement >> 12) & 0x03ff;
+ uint32_t imm11 = (displacement >> 1) & 0x07ff;
+ uint32_t j1 = i1 ^ (signbit ^ 1);
+ uint32_t j2 = i2 ^ (signbit ^ 1);
+ uint32_t value = (signbit << 26) | (j1 << 13) | (j2 << 11) | (imm10 << 16) | imm11;
+ value |= 0xf000d000; // BL
+
+ // Check that we're just overwriting an existing BL.
+ DCHECK_EQ(GetInsn32(code, literal_offset) & 0xf800d000, 0xf000d000);
+ // Write the new BL.
+ SetInsn32(code, literal_offset, value);
+}
+
+void Thumb2RelativePatcher::PatchDexCacheReference(std::vector<uint8_t>* code,
+ const LinkerPatch& patch,
+ uint32_t patch_offset,
+ uint32_t target_offset) {
+ uint32_t literal_offset = patch.LiteralOffset();
+ uint32_t pc_literal_offset = patch.PcInsnOffset();
+ uint32_t pc_base = patch_offset + (pc_literal_offset - literal_offset) + 4u /* PC adjustment */;
+ uint32_t diff = target_offset - pc_base;
+
+ uint32_t insn = GetInsn32(code, literal_offset);
+ DCHECK_EQ(insn & 0xff7ff0ffu, 0xf2400000u); // MOVW/MOVT, unpatched (imm16 == 0).
+ uint32_t diff16 = ((insn & 0x00800000u) != 0u) ? (diff >> 16) : (diff & 0xffffu);
+ uint32_t imm4 = (diff16 >> 12) & 0xfu;
+ uint32_t imm = (diff16 >> 11) & 0x1u;
+ uint32_t imm3 = (diff16 >> 8) & 0x7u;
+ uint32_t imm8 = diff16 & 0xffu;
+ insn = (insn & 0xfbf08f00u) | (imm << 26) | (imm4 << 16) | (imm3 << 12) | imm8;
+ SetInsn32(code, literal_offset, insn);
+}
+
+std::vector<uint8_t> Thumb2RelativePatcher::CompileThunkCode() {
+ // The thunk just uses the entry point in the ArtMethod. This works even for calls
+ // to the generic JNI and interpreter trampolines.
+ arm::Thumb2Assembler assembler;
+ assembler.LoadFromOffset(
+ arm::kLoadWord, arm::PC, arm::R0,
+ mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value());
+ assembler.bkpt(0);
+ std::vector<uint8_t> thunk_code(assembler.CodeSize());
+ MemoryRegion code(thunk_code.data(), thunk_code.size());
+ assembler.FinalizeInstructions(code);
+ return thunk_code;
+}
+
+void Thumb2RelativePatcher::SetInsn32(std::vector<uint8_t>* code, uint32_t offset, uint32_t value) {
+ DCHECK_LE(offset + 4u, code->size());
+ DCHECK_EQ(offset & 1u, 0u);
+ uint8_t* addr = &(*code)[offset];
+ addr[0] = (value >> 16) & 0xff;
+ addr[1] = (value >> 24) & 0xff;
+ addr[2] = (value >> 0) & 0xff;
+ addr[3] = (value >> 8) & 0xff;
+}
+
+uint32_t Thumb2RelativePatcher::GetInsn32(ArrayRef<const uint8_t> code, uint32_t offset) {
+ DCHECK_LE(offset + 4u, code.size());
+ DCHECK_EQ(offset & 1u, 0u);
+ const uint8_t* addr = &code[offset];
+ return
+ (static_cast<uint32_t>(addr[0]) << 16) +
+ (static_cast<uint32_t>(addr[1]) << 24) +
+ (static_cast<uint32_t>(addr[2]) << 0)+
+ (static_cast<uint32_t>(addr[3]) << 8);
+}
+
+template <typename Alloc>
+uint32_t Thumb2RelativePatcher::GetInsn32(std::vector<uint8_t, Alloc>* code, uint32_t offset) {
+ return GetInsn32(ArrayRef<const uint8_t>(*code), offset);
+}
+
+} // namespace linker
+} // namespace art
diff --git a/compiler/linker/arm/relative_patcher_thumb2.h b/compiler/linker/arm/relative_patcher_thumb2.h
new file mode 100644
index 0000000000..2d474c2db0
--- /dev/null
+++ b/compiler/linker/arm/relative_patcher_thumb2.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_THUMB2_H_
+#define ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_THUMB2_H_
+
+#include "linker/arm/relative_patcher_arm_base.h"
+
+namespace art {
+namespace linker {
+
+class Thumb2RelativePatcher FINAL : public ArmBaseRelativePatcher {
+ public:
+ explicit Thumb2RelativePatcher(RelativePatcherTargetProvider* provider);
+
+ void PatchCall(std::vector<uint8_t>* code, uint32_t literal_offset,
+ uint32_t patch_offset, uint32_t target_offset) OVERRIDE;
+ void PatchDexCacheReference(std::vector<uint8_t>* code, const LinkerPatch& patch,
+ uint32_t patch_offset, uint32_t target_offset) OVERRIDE;
+
+ private:
+ static std::vector<uint8_t> CompileThunkCode();
+
+ void SetInsn32(std::vector<uint8_t>* code, uint32_t offset, uint32_t value);
+ static uint32_t GetInsn32(ArrayRef<const uint8_t> code, uint32_t offset);
+
+ template <typename Alloc>
+ static uint32_t GetInsn32(std::vector<uint8_t, Alloc>* code, uint32_t offset);
+
+ // PC displacement from patch location; Thumb2 PC is always at instruction address + 4.
+ static constexpr int32_t kPcDisplacement = 4;
+
+ // Maximum positive and negative displacement measured from the patch location.
+ // (Signed 25 bit displacement with the last bit 0 has range [-2^24, 2^24-2] measured from
+ // the Thumb2 PC pointing right after the BL, i.e. 4 bytes later than the patch location.)
+ static constexpr uint32_t kMaxPositiveDisplacement = (1u << 24) - 2 + kPcDisplacement;
+ static constexpr uint32_t kMaxNegativeDisplacement = (1u << 24) - kPcDisplacement;
+
+ DISALLOW_COPY_AND_ASSIGN(Thumb2RelativePatcher);
+};
+
+} // namespace linker
+} // namespace art
+
+#endif // ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_THUMB2_H_
diff --git a/compiler/linker/arm/relative_patcher_thumb2_test.cc b/compiler/linker/arm/relative_patcher_thumb2_test.cc
new file mode 100644
index 0000000000..a057a4cf16
--- /dev/null
+++ b/compiler/linker/arm/relative_patcher_thumb2_test.cc
@@ -0,0 +1,351 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "linker/relative_patcher_test.h"
+#include "linker/arm/relative_patcher_thumb2.h"
+
+namespace art {
+namespace linker {
+
+class Thumb2RelativePatcherTest : public RelativePatcherTest {
+ public:
+ Thumb2RelativePatcherTest() : RelativePatcherTest(kThumb2, "default") { }
+
+ protected:
+ static const uint8_t kCallRawCode[];
+ static const ArrayRef<const uint8_t> kCallCode;
+ static const uint8_t kNopRawCode[];
+ static const ArrayRef<const uint8_t> kNopCode;
+
+ // Branches within range [-256, 256) can be created from these by adding the low 8 bits.
+ static constexpr uint32_t kBlPlus0 = 0xf000f800;
+ static constexpr uint32_t kBlMinus256 = 0xf7ffff00;
+
+ // Special BL values.
+ static constexpr uint32_t kBlPlusMax = 0xf3ffd7ff;
+ static constexpr uint32_t kBlMinusMax = 0xf400d000;
+
+ bool Create2MethodsWithGap(const ArrayRef<const uint8_t>& method1_code,
+ const ArrayRef<const LinkerPatch>& method1_patches,
+ const ArrayRef<const uint8_t>& method3_code,
+ const ArrayRef<const LinkerPatch>& method3_patches,
+ uint32_t distance_without_thunks) {
+ CHECK_EQ(distance_without_thunks % kArmAlignment, 0u);
+ const uint32_t method1_offset =
+ CompiledCode::AlignCode(kTrampolineSize, kThumb2) + sizeof(OatQuickMethodHeader);
+ AddCompiledMethod(MethodRef(1u), method1_code, method1_patches);
+
+ // We want to put the method3 at a very precise offset.
+ const uint32_t method3_offset = method1_offset + distance_without_thunks;
+ CHECK(IsAligned<kArmAlignment>(method3_offset - sizeof(OatQuickMethodHeader)));
+
+ // Calculate size of method2 so that we put method3 at the correct place.
+ const uint32_t method2_offset =
+ CompiledCode::AlignCode(method1_offset + method1_code.size(), kThumb2) +
+ sizeof(OatQuickMethodHeader);
+ const uint32_t method2_size = (method3_offset - sizeof(OatQuickMethodHeader) - method2_offset);
+ std::vector<uint8_t> method2_raw_code(method2_size);
+ ArrayRef<const uint8_t> method2_code(method2_raw_code);
+ AddCompiledMethod(MethodRef(2u), method2_code, ArrayRef<const LinkerPatch>());
+
+ AddCompiledMethod(MethodRef(3u), method3_code, method3_patches);
+
+ Link();
+
+ // Check assumptions.
+ CHECK_EQ(GetMethodOffset(1), method1_offset);
+ CHECK_EQ(GetMethodOffset(2), method2_offset);
+ auto result3 = method_offset_map_.FindMethodOffset(MethodRef(3));
+ CHECK(result3.first);
+ // There may be a thunk before method2.
+ if (result3.second == method3_offset + 1 /* thumb mode */) {
+ return false; // No thunk.
+ } else {
+ uint32_t aligned_thunk_size = CompiledCode::AlignCode(ThunkSize(), kThumb2);
+ CHECK_EQ(result3.second, method3_offset + aligned_thunk_size + 1 /* thumb mode */);
+ return true; // Thunk present.
+ }
+ }
+
+ uint32_t GetMethodOffset(uint32_t method_idx) {
+ auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx));
+ CHECK(result.first);
+ CHECK_NE(result.second & 1u, 0u);
+ return result.second - 1 /* thumb mode */;
+ }
+
+ uint32_t ThunkSize() {
+ return static_cast<Thumb2RelativePatcher*>(patcher_.get())->thunk_code_.size();
+ }
+
+ bool CheckThunk(uint32_t thunk_offset) {
+ Thumb2RelativePatcher* patcher = static_cast<Thumb2RelativePatcher*>(patcher_.get());
+ ArrayRef<const uint8_t> expected_code(patcher->thunk_code_);
+ if (output_.size() < thunk_offset + expected_code.size()) {
+ LOG(ERROR) << "output_.size() == " << output_.size() << " < "
+ << "thunk_offset + expected_code.size() == " << (thunk_offset + expected_code.size());
+ return false;
+ }
+ ArrayRef<const uint8_t> linked_code(&output_[thunk_offset], expected_code.size());
+ if (linked_code == expected_code) {
+ return true;
+ }
+ // Log failure info.
+ DumpDiff(expected_code, linked_code);
+ return false;
+ }
+
+ std::vector<uint8_t> GenNopsAndBl(size_t num_nops, uint32_t bl) {
+ std::vector<uint8_t> result;
+ result.reserve(num_nops * 2u + 4u);
+ for (size_t i = 0; i != num_nops; ++i) {
+ result.push_back(0x00);
+ result.push_back(0xbf);
+ }
+ result.push_back(static_cast<uint8_t>(bl >> 16));
+ result.push_back(static_cast<uint8_t>(bl >> 24));
+ result.push_back(static_cast<uint8_t>(bl));
+ result.push_back(static_cast<uint8_t>(bl >> 8));
+ return result;
+ }
+
+ void TestDexCachereference(uint32_t dex_cache_arrays_begin, uint32_t element_offset) {
+ dex_cache_arrays_begin_ = dex_cache_arrays_begin;
+ static const uint8_t raw_code[] = {
+ 0x40, 0xf2, 0x00, 0x00, // MOVW r0, #0 (placeholder)
+ 0xc0, 0xf2, 0x00, 0x00, // MOVT r0, #0 (placeholder)
+ 0x78, 0x44, // ADD r0, pc
+ };
+ constexpr uint32_t pc_insn_offset = 8u;
+ const ArrayRef<const uint8_t> code(raw_code);
+ LinkerPatch patches[] = {
+ LinkerPatch::DexCacheArrayPatch(0u, nullptr, pc_insn_offset, element_offset),
+ LinkerPatch::DexCacheArrayPatch(4u, nullptr, pc_insn_offset, element_offset),
+ };
+ AddCompiledMethod(MethodRef(1u), code, ArrayRef<const LinkerPatch>(patches));
+ Link();
+
+ uint32_t method1_offset = GetMethodOffset(1u);
+ uint32_t pc_base_offset = method1_offset + pc_insn_offset + 4u /* PC adjustment */;
+ uint32_t diff = dex_cache_arrays_begin_ + element_offset - pc_base_offset;
+ // Distribute the bits of the diff between the MOVW and MOVT:
+ uint32_t diffw = diff & 0xffffu;
+ uint32_t difft = diff >> 16;
+ uint32_t movw = 0xf2400000u | // MOVW r0, #0 (placeholder),
+ ((diffw & 0xf000u) << (16 - 12)) | // move imm4 from bits 12-15 to bits 16-19,
+ ((diffw & 0x0800u) << (26 - 11)) | // move imm from bit 11 to bit 26,
+ ((diffw & 0x0700u) << (12 - 8)) | // move imm3 from bits 8-10 to bits 12-14,
+ ((diffw & 0x00ffu)); // keep imm8 at bits 0-7.
+ uint32_t movt = 0xf2c00000u | // MOVT r0, #0 (placeholder),
+ ((difft & 0xf000u) << (16 - 12)) | // move imm4 from bits 12-15 to bits 16-19,
+ ((difft & 0x0800u) << (26 - 11)) | // move imm from bit 11 to bit 26,
+ ((difft & 0x0700u) << (12 - 8)) | // move imm3 from bits 8-10 to bits 12-14,
+ ((difft & 0x00ffu)); // keep imm8 at bits 0-7.
+ const uint8_t expected_code[] = {
+ static_cast<uint8_t>(movw >> 16), static_cast<uint8_t>(movw >> 24),
+ static_cast<uint8_t>(movw >> 0), static_cast<uint8_t>(movw >> 8),
+ static_cast<uint8_t>(movt >> 16), static_cast<uint8_t>(movt >> 24),
+ static_cast<uint8_t>(movt >> 0), static_cast<uint8_t>(movt >> 8),
+ 0x78, 0x44,
+ };
+ EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+ }
+};
+
+const uint8_t Thumb2RelativePatcherTest::kCallRawCode[] = {
+ 0x00, 0xf0, 0x00, 0xf8
+};
+
+const ArrayRef<const uint8_t> Thumb2RelativePatcherTest::kCallCode(kCallRawCode);
+
+const uint8_t Thumb2RelativePatcherTest::kNopRawCode[] = {
+ 0x00, 0xbf
+};
+
+const ArrayRef<const uint8_t> Thumb2RelativePatcherTest::kNopCode(kNopRawCode);
+
+TEST_F(Thumb2RelativePatcherTest, CallSelf) {
+ LinkerPatch patches[] = {
+ LinkerPatch::RelativeCodePatch(0u, nullptr, 1u),
+ };
+ AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches));
+ Link();
+
+ static const uint8_t expected_code[] = {
+ 0xff, 0xf7, 0xfe, 0xff
+ };
+ EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+}
+
+TEST_F(Thumb2RelativePatcherTest, CallOther) {
+ LinkerPatch method1_patches[] = {
+ LinkerPatch::RelativeCodePatch(0u, nullptr, 2u),
+ };
+ AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(method1_patches));
+ LinkerPatch method2_patches[] = {
+ LinkerPatch::RelativeCodePatch(0u, nullptr, 1u),
+ };
+ AddCompiledMethod(MethodRef(2u), kCallCode, ArrayRef<const LinkerPatch>(method2_patches));
+ Link();
+
+ uint32_t method1_offset = GetMethodOffset(1u);
+ uint32_t method2_offset = GetMethodOffset(2u);
+ uint32_t diff_after = method2_offset - (method1_offset + 4u /* PC adjustment */);
+ ASSERT_EQ(diff_after & 1u, 0u);
+ ASSERT_LT(diff_after >> 1, 1u << 8); // Simple encoding, (diff_after >> 1) fits into 8 bits.
+ static const uint8_t method1_expected_code[] = {
+ 0x00, 0xf0, static_cast<uint8_t>(diff_after >> 1), 0xf8
+ };
+ EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(method1_expected_code)));
+ uint32_t diff_before = method1_offset - (method2_offset + 4u /* PC adjustment */);
+ ASSERT_EQ(diff_before & 1u, 0u);
+ ASSERT_GE(diff_before, -1u << 9); // Simple encoding, -256 <= (diff >> 1) < 0.
+ auto method2_expected_code = GenNopsAndBl(0u, kBlMinus256 | ((diff_before >> 1) & 0xffu));
+ EXPECT_TRUE(CheckLinkedMethod(MethodRef(2u), ArrayRef<const uint8_t>(method2_expected_code)));
+}
+
+TEST_F(Thumb2RelativePatcherTest, CallTrampoline) {
+ LinkerPatch patches[] = {
+ LinkerPatch::RelativeCodePatch(0u, nullptr, 2u),
+ };
+ AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches));
+ Link();
+
+ uint32_t method1_offset = GetMethodOffset(1u);
+ uint32_t diff = kTrampolineOffset - (method1_offset + 4u);
+ ASSERT_EQ(diff & 1u, 0u);
+ ASSERT_GE(diff, -1u << 9); // Simple encoding, -256 <= (diff >> 1) < 0 (checked as unsigned).
+ auto expected_code = GenNopsAndBl(0u, kBlMinus256 | ((diff >> 1) & 0xffu));
+ EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+}
+
+TEST_F(Thumb2RelativePatcherTest, CallOtherAlmostTooFarAfter) {
+ auto method1_raw_code = GenNopsAndBl(3u, kBlPlus0);
+ constexpr uint32_t bl_offset_in_method1 = 3u * 2u; // After NOPs.
+ ArrayRef<const uint8_t> method1_code(method1_raw_code);
+ ASSERT_EQ(bl_offset_in_method1 + 4u, method1_code.size());
+ LinkerPatch method1_patches[] = {
+ LinkerPatch::RelativeCodePatch(bl_offset_in_method1, nullptr, 3u),
+ };
+
+ constexpr uint32_t max_positive_disp = 16 * MB - 2u + 4u /* PC adjustment */;
+ bool thunk_in_gap = Create2MethodsWithGap(method1_code, method1_patches,
+ kNopCode, ArrayRef<const LinkerPatch>(),
+ bl_offset_in_method1 + max_positive_disp);
+ ASSERT_FALSE(thunk_in_gap); // There should be no thunk.
+
+ // Check linked code.
+ auto expected_code = GenNopsAndBl(3u, kBlPlusMax);
+ EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+}
+
+TEST_F(Thumb2RelativePatcherTest, CallOtherAlmostTooFarBefore) {
+ auto method3_raw_code = GenNopsAndBl(2u, kBlPlus0);
+ constexpr uint32_t bl_offset_in_method3 = 2u * 2u; // After NOPs.
+ ArrayRef<const uint8_t> method3_code(method3_raw_code);
+ ASSERT_EQ(bl_offset_in_method3 + 4u, method3_code.size());
+ LinkerPatch method3_patches[] = {
+ LinkerPatch::RelativeCodePatch(bl_offset_in_method3, nullptr, 1u),
+ };
+
+ constexpr uint32_t just_over_max_negative_disp = 16 * MB - 4u /* PC adjustment */;
+ bool thunk_in_gap = Create2MethodsWithGap(kNopCode, ArrayRef<const LinkerPatch>(),
+ method3_code, method3_patches,
+ just_over_max_negative_disp - bl_offset_in_method3);
+ ASSERT_FALSE(thunk_in_gap); // There should be no thunk.
+
+ // Check linked code.
+ auto expected_code = GenNopsAndBl(2u, kBlMinusMax);
+ EXPECT_TRUE(CheckLinkedMethod(MethodRef(3u), ArrayRef<const uint8_t>(expected_code)));
+}
+
+TEST_F(Thumb2RelativePatcherTest, CallOtherJustTooFarAfter) {
+ auto method1_raw_code = GenNopsAndBl(2u, kBlPlus0);
+ constexpr uint32_t bl_offset_in_method1 = 2u * 2u; // After NOPs.
+ ArrayRef<const uint8_t> method1_code(method1_raw_code);
+ ASSERT_EQ(bl_offset_in_method1 + 4u, method1_code.size());
+ LinkerPatch method1_patches[] = {
+ LinkerPatch::RelativeCodePatch(bl_offset_in_method1, nullptr, 3u),
+ };
+
+ constexpr uint32_t just_over_max_positive_disp = 16 * MB + 4u /* PC adjustment */;
+ bool thunk_in_gap = Create2MethodsWithGap(method1_code, method1_patches,
+ kNopCode, ArrayRef<const LinkerPatch>(),
+ bl_offset_in_method1 + just_over_max_positive_disp);
+ ASSERT_TRUE(thunk_in_gap);
+
+ uint32_t method1_offset = GetMethodOffset(1u);
+ uint32_t method3_offset = GetMethodOffset(3u);
+ uint32_t method3_header_offset = method3_offset - sizeof(OatQuickMethodHeader);
+ ASSERT_TRUE(IsAligned<kArmAlignment>(method3_header_offset));
+ uint32_t thunk_offset = method3_header_offset - CompiledCode::AlignCode(ThunkSize(), kThumb2);
+ ASSERT_TRUE(IsAligned<kArmAlignment>(thunk_offset));
+ uint32_t diff = thunk_offset - (method1_offset + bl_offset_in_method1 + 4u /* PC adjustment */);
+ ASSERT_EQ(diff & 1u, 0u);
+ ASSERT_GE(diff, 16 * MB - (1u << 9)); // Simple encoding, unknown bits fit into the low 8 bits.
+ auto expected_code = GenNopsAndBl(2u, 0xf3ffd700 | ((diff >> 1) & 0xffu));
+ EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+ CheckThunk(thunk_offset);
+}
+
+TEST_F(Thumb2RelativePatcherTest, CallOtherJustTooFarBefore) {
+ auto method3_raw_code = GenNopsAndBl(3u, kBlPlus0);
+ constexpr uint32_t bl_offset_in_method3 = 3u * 2u; // After NOPs.
+ ArrayRef<const uint8_t> method3_code(method3_raw_code);
+ ASSERT_EQ(bl_offset_in_method3 + 4u, method3_code.size());
+ LinkerPatch method3_patches[] = {
+ LinkerPatch::RelativeCodePatch(bl_offset_in_method3, nullptr, 1u),
+ };
+
+ constexpr uint32_t just_over_max_negative_disp = 16 * MB + 2 - 4u /* PC adjustment */;
+ bool thunk_in_gap = Create2MethodsWithGap(kNopCode, ArrayRef<const LinkerPatch>(),
+ method3_code, method3_patches,
+ just_over_max_negative_disp - bl_offset_in_method3);
+ ASSERT_FALSE(thunk_in_gap); // There should be a thunk but it should be after the method2.
+
+ // Check linked code.
+ uint32_t method3_offset = GetMethodOffset(3u);
+ uint32_t thunk_offset = CompiledCode::AlignCode(method3_offset + method3_code.size(), kThumb2);
+ uint32_t diff = thunk_offset - (method3_offset + bl_offset_in_method3 + 4u /* PC adjustment */);
+ ASSERT_EQ(diff & 1u, 0u);
+ ASSERT_LT(diff >> 1, 1u << 8); // Simple encoding, (diff >> 1) fits into 8 bits.
+ auto expected_code = GenNopsAndBl(3u, kBlPlus0 | ((diff >> 1) & 0xffu));
+ EXPECT_TRUE(CheckLinkedMethod(MethodRef(3u), ArrayRef<const uint8_t>(expected_code)));
+ EXPECT_TRUE(CheckThunk(thunk_offset));
+}
+
+TEST_F(Thumb2RelativePatcherTest, DexCacheReferenceImm8) {
+ TestDexCachereference(0x00ff0000u, 0x00fcu);
+ ASSERT_LT(GetMethodOffset(1u), 0xfcu);
+}
+
+TEST_F(Thumb2RelativePatcherTest, DexCacheReferenceImm3) {
+ TestDexCachereference(0x02ff0000u, 0x05fcu);
+ ASSERT_LT(GetMethodOffset(1u), 0xfcu);
+}
+
+TEST_F(Thumb2RelativePatcherTest, DexCacheReferenceImm) {
+ TestDexCachereference(0x08ff0000u, 0x08fcu);
+ ASSERT_LT(GetMethodOffset(1u), 0xfcu);
+}
+
+TEST_F(Thumb2RelativePatcherTest, DexCacheReferenceimm4) {
+ TestDexCachereference(0xd0ff0000u, 0x60fcu);
+ ASSERT_LT(GetMethodOffset(1u), 0xfcu);
+}
+
+} // namespace linker
+} // namespace art
diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc
new file mode 100644
index 0000000000..72ddf07089
--- /dev/null
+++ b/compiler/linker/arm64/relative_patcher_arm64.cc
@@ -0,0 +1,322 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "linker/arm64/relative_patcher_arm64.h"
+
+#include "arch/arm64/instruction_set_features_arm64.h"
+#include "compiled_method.h"
+#include "driver/compiler_driver.h"
+#include "mirror/art_method.h"
+#include "utils/arm64/assembler_arm64.h"
+#include "oat.h"
+#include "output_stream.h"
+
+namespace art {
+namespace linker {
+
+Arm64RelativePatcher::Arm64RelativePatcher(RelativePatcherTargetProvider* provider,
+ const Arm64InstructionSetFeatures* features)
+ : ArmBaseRelativePatcher(provider, kArm64, CompileThunkCode(),
+ kMaxPositiveDisplacement, kMaxNegativeDisplacement),
+ fix_cortex_a53_843419_(features->NeedFixCortexA53_843419()),
+ reserved_adrp_thunks_(0u),
+ processed_adrp_thunks_(0u) {
+ if (fix_cortex_a53_843419_) {
+ adrp_thunk_locations_.reserve(16u);
+ current_method_thunks_.reserve(16u * kAdrpThunkSize);
+ }
+}
+
+uint32_t Arm64RelativePatcher::ReserveSpace(uint32_t offset,
+ const CompiledMethod* compiled_method,
+ MethodReference method_ref) {
+ if (!fix_cortex_a53_843419_) {
+ DCHECK(adrp_thunk_locations_.empty());
+ return ReserveSpaceInternal(offset, compiled_method, method_ref, 0u);
+ }
+
+ // Add thunks for previous method if any.
+ if (reserved_adrp_thunks_ != adrp_thunk_locations_.size()) {
+ size_t num_adrp_thunks = adrp_thunk_locations_.size() - reserved_adrp_thunks_;
+ offset = CompiledMethod::AlignCode(offset, kArm64) + kAdrpThunkSize * num_adrp_thunks;
+ reserved_adrp_thunks_ = adrp_thunk_locations_.size();
+ }
+
+ // Count the number of ADRP insns as the upper bound on the number of thunks needed
+ // and use it to reserve space for other linker patches.
+ size_t num_adrp = 0u;
+ DCHECK(compiled_method != nullptr);
+ for (const LinkerPatch& patch : compiled_method->GetPatches()) {
+ if (patch.Type() == kLinkerPatchDexCacheArray &&
+ patch.LiteralOffset() == patch.PcInsnOffset()) { // ADRP patch
+ ++num_adrp;
+ }
+ }
+ offset = ReserveSpaceInternal(offset, compiled_method, method_ref, kAdrpThunkSize * num_adrp);
+ if (num_adrp == 0u) {
+ return offset;
+ }
+
+ // Now that we have the actual offset where the code will be placed, locate the ADRP insns
+ // that actually require the thunk.
+ uint32_t quick_code_offset = compiled_method->AlignCode(offset) + sizeof(OatQuickMethodHeader);
+ ArrayRef<const uint8_t> code(*compiled_method->GetQuickCode());
+ uint32_t thunk_offset = compiled_method->AlignCode(quick_code_offset + code.size());
+ DCHECK(compiled_method != nullptr);
+ for (const LinkerPatch& patch : compiled_method->GetPatches()) {
+ if (patch.Type() == kLinkerPatchDexCacheArray &&
+ patch.LiteralOffset() == patch.PcInsnOffset()) { // ADRP patch
+ uint32_t patch_offset = quick_code_offset + patch.LiteralOffset();
+ if (NeedsErratum843419Thunk(code, patch.LiteralOffset(), patch_offset)) {
+ adrp_thunk_locations_.emplace_back(patch_offset, thunk_offset);
+ thunk_offset += kAdrpThunkSize;
+ }
+ }
+ }
+ return offset;
+}
+
+uint32_t Arm64RelativePatcher::ReserveSpaceEnd(uint32_t offset) {
+ if (!fix_cortex_a53_843419_) {
+ DCHECK(adrp_thunk_locations_.empty());
+ } else {
+ // Add thunks for the last method if any.
+ if (reserved_adrp_thunks_ != adrp_thunk_locations_.size()) {
+ size_t num_adrp_thunks = adrp_thunk_locations_.size() - reserved_adrp_thunks_;
+ offset = CompiledMethod::AlignCode(offset, kArm64) + kAdrpThunkSize * num_adrp_thunks;
+ reserved_adrp_thunks_ = adrp_thunk_locations_.size();
+ }
+ }
+ return ArmBaseRelativePatcher::ReserveSpaceEnd(offset);
+}
+
+uint32_t Arm64RelativePatcher::WriteThunks(OutputStream* out, uint32_t offset) {
+ if (fix_cortex_a53_843419_) {
+ if (!current_method_thunks_.empty()) {
+ uint32_t aligned_offset = CompiledMethod::AlignCode(offset, kArm64);
+ if (kIsDebugBuild) {
+ CHECK(IsAligned<kAdrpThunkSize>(current_method_thunks_.size()));
+ size_t num_thunks = current_method_thunks_.size() / kAdrpThunkSize;
+ CHECK_LE(num_thunks, processed_adrp_thunks_);
+ for (size_t i = 0u; i != num_thunks; ++i) {
+ const auto& entry = adrp_thunk_locations_[processed_adrp_thunks_ - num_thunks + i];
+ CHECK_EQ(entry.second, aligned_offset + i * kAdrpThunkSize);
+ }
+ }
+ uint32_t aligned_code_delta = aligned_offset - offset;
+ if (aligned_code_delta != 0u && !WriteCodeAlignment(out, aligned_code_delta)) {
+ return 0u;
+ }
+ if (!WriteMiscThunk(out, ArrayRef<const uint8_t>(current_method_thunks_))) {
+ return 0u;
+ }
+ offset = aligned_offset + current_method_thunks_.size();
+ current_method_thunks_.clear();
+ }
+ }
+ return ArmBaseRelativePatcher::WriteThunks(out, offset);
+}
+
+void Arm64RelativePatcher::PatchCall(std::vector<uint8_t>* code, uint32_t literal_offset,
+ uint32_t patch_offset, uint32_t target_offset) {
+ DCHECK_LE(literal_offset + 4u, code->size());
+ DCHECK_EQ(literal_offset & 3u, 0u);
+ DCHECK_EQ(patch_offset & 3u, 0u);
+ DCHECK_EQ(target_offset & 3u, 0u);
+ uint32_t displacement = CalculateDisplacement(patch_offset, target_offset & ~1u);
+ DCHECK_EQ(displacement & 3u, 0u);
+ DCHECK((displacement >> 27) == 0u || (displacement >> 27) == 31u); // 28-bit signed.
+ uint32_t insn = (displacement & 0x0fffffffu) >> 2;
+ insn |= 0x94000000; // BL
+
+ // Check that we're just overwriting an existing BL.
+ DCHECK_EQ(GetInsn(code, literal_offset) & 0xfc000000u, 0x94000000u);
+ // Write the new BL.
+ SetInsn(code, literal_offset, insn);
+}
+
+void Arm64RelativePatcher::PatchDexCacheReference(std::vector<uint8_t>* code,
+ const LinkerPatch& patch,
+ uint32_t patch_offset,
+ uint32_t target_offset) {
+ DCHECK_EQ(patch_offset & 3u, 0u);
+ DCHECK_EQ(target_offset & 3u, 0u);
+ uint32_t literal_offset = patch.LiteralOffset();
+ uint32_t insn = GetInsn(code, literal_offset);
+ uint32_t pc_insn_offset = patch.PcInsnOffset();
+ uint32_t disp = target_offset - ((patch_offset - literal_offset + pc_insn_offset) & ~0xfffu);
+ if (literal_offset == pc_insn_offset) {
+ // Check it's an ADRP with imm == 0 (unset).
+ DCHECK_EQ((insn & 0xffffffe0u), 0x90000000u)
+ << literal_offset << ", " << pc_insn_offset << ", 0x" << std::hex << insn;
+ if (fix_cortex_a53_843419_ && processed_adrp_thunks_ != adrp_thunk_locations_.size() &&
+ adrp_thunk_locations_[processed_adrp_thunks_].first == patch_offset) {
+ DCHECK(NeedsErratum843419Thunk(ArrayRef<const uint8_t>(*code),
+ literal_offset, patch_offset));
+ uint32_t thunk_offset = adrp_thunk_locations_[processed_adrp_thunks_].second;
+ uint32_t adrp_disp = target_offset - (thunk_offset & ~0xfffu);
+ uint32_t adrp = PatchAdrp(insn, adrp_disp);
+
+ uint32_t out_disp = thunk_offset - patch_offset;
+ DCHECK_EQ(out_disp & 3u, 0u);
+ DCHECK((out_disp >> 27) == 0u || (out_disp >> 27) == 31u); // 28-bit signed.
+ insn = (out_disp & 0x0fffffffu) >> 2;
+ insn |= 0x14000000; // B <thunk>
+
+ uint32_t back_disp = -out_disp;
+ DCHECK_EQ(back_disp & 3u, 0u);
+ DCHECK((back_disp >> 27) == 0u || (back_disp >> 27) == 31u); // 28-bit signed.
+ uint32_t b_back = (back_disp & 0x0fffffffu) >> 2;
+ b_back |= 0x14000000; // B <back>
+ size_t thunks_code_offset = current_method_thunks_.size();
+ current_method_thunks_.resize(thunks_code_offset + kAdrpThunkSize);
+ SetInsn(&current_method_thunks_, thunks_code_offset, adrp);
+ SetInsn(&current_method_thunks_, thunks_code_offset + 4u, b_back);
+ static_assert(kAdrpThunkSize == 2 * 4u, "thunk has 2 instructions");
+
+ processed_adrp_thunks_ += 1u;
+ } else {
+ insn = PatchAdrp(insn, disp);
+ }
+ // Write the new ADRP (or B to the erratum 843419 thunk).
+ SetInsn(code, literal_offset, insn);
+ } else {
+ DCHECK_EQ(insn & 0xfffffc00, 0xb9400000); // LDR 32-bit with imm12 == 0 (unset).
+ if (kIsDebugBuild) {
+ uint32_t adrp = GetInsn(code, pc_insn_offset);
+ if ((adrp & 0x9f000000u) != 0x90000000u) {
+ CHECK(fix_cortex_a53_843419_);
+ CHECK_EQ(adrp & 0xfc000000u, 0x14000000u); // B <thunk>
+ CHECK(IsAligned<kAdrpThunkSize>(current_method_thunks_.size()));
+ size_t num_thunks = current_method_thunks_.size() / kAdrpThunkSize;
+ CHECK_LE(num_thunks, processed_adrp_thunks_);
+ uint32_t b_offset = patch_offset - literal_offset + pc_insn_offset;
+ for (size_t i = processed_adrp_thunks_ - num_thunks; ; ++i) {
+ CHECK_NE(i, processed_adrp_thunks_);
+ if (adrp_thunk_locations_[i].first == b_offset) {
+ size_t idx = num_thunks - (processed_adrp_thunks_ - i);
+ adrp = GetInsn(&current_method_thunks_, idx * kAdrpThunkSize);
+ break;
+ }
+ }
+ }
+ CHECK_EQ(adrp & 0x9f00001fu, // Check that pc_insn_offset points
+ 0x90000000 | ((insn >> 5) & 0x1fu)); // to ADRP with matching register.
+ }
+ uint32_t imm12 = (disp & 0xfffu) >> 2;
+ insn = (insn & ~(0xfffu << 10)) | (imm12 << 10);
+ SetInsn(code, literal_offset, insn);
+ }
+}
+
+std::vector<uint8_t> Arm64RelativePatcher::CompileThunkCode() {
+ // The thunk just uses the entry point in the ArtMethod. This works even for calls
+ // to the generic JNI and interpreter trampolines.
+ arm64::Arm64Assembler assembler;
+ Offset offset(mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+ kArm64PointerSize).Int32Value());
+ assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0));
+ // Ensure we emit the literal pool.
+ assembler.EmitSlowPaths();
+ std::vector<uint8_t> thunk_code(assembler.CodeSize());
+ MemoryRegion code(thunk_code.data(), thunk_code.size());
+ assembler.FinalizeInstructions(code);
+ return thunk_code;
+}
+
+uint32_t Arm64RelativePatcher::PatchAdrp(uint32_t adrp, uint32_t disp) {
+ return (adrp & 0x9f00001fu) | // Clear offset bits, keep ADRP with destination reg.
+ // Bottom 12 bits are ignored, the next 2 lowest bits are encoded in bits 29-30.
+ ((disp & 0x00003000u) << (29 - 12)) |
+ // The next 16 bits are encoded in bits 5-22.
+ ((disp & 0xffffc000u) >> (12 + 2 - 5)) |
+ // Since the target_offset is based on the beginning of the oat file and the
+ // image space precedes the oat file, the target_offset into image space will
+ // be negative yet passed as uint32_t. Therefore we limit the displacement
+ // to +-2GiB (rather than the maximim +-4GiB) and determine the sign bit from
+ // the highest bit of the displacement. This is encoded in bit 23.
+ ((disp & 0x80000000u) >> (31 - 23));
+}
+
+bool Arm64RelativePatcher::NeedsErratum843419Thunk(ArrayRef<const uint8_t> code,
+ uint32_t literal_offset,
+ uint32_t patch_offset) {
+ DCHECK_EQ(patch_offset & 0x3u, 0u);
+ if ((patch_offset & 0xff8) == 0xff8) { // ...ff8 or ...ffc
+ uint32_t adrp = GetInsn(code, literal_offset);
+ DCHECK_EQ(adrp & 0xff000000, 0x90000000);
+ uint32_t next_offset = patch_offset + 4u;
+ uint32_t next_insn = GetInsn(code, literal_offset + 4u);
+
+ // Below we avoid patching sequences where the adrp is followed by a load which can easily
+ // be proved to be aligned.
+
+ // First check if the next insn is the LDR using the result of the ADRP.
+ // LDR <Wt>, [<Xn>, #pimm], where <Xn> == ADRP destination reg.
+ if ((next_insn & 0xffc00000) == 0xb9400000 &&
+ (((next_insn >> 5) ^ adrp) & 0x1f) == 0) {
+ return false;
+ }
+
+ // LDR <Wt>, <label> is always aligned and thus it doesn't cause boundary crossing.
+ if ((next_insn & 0xff000000) == 0x18000000) {
+ return false;
+ }
+
+ // LDR <Xt>, <label> is aligned iff the pc + displacement is a multiple of 8.
+ if ((next_insn & 0xff000000) == 0x58000000) {
+ bool is_aligned_load = (((next_offset >> 2) ^ (next_insn >> 5)) & 1) == 0;
+ return !is_aligned_load;
+ }
+
+ // LDR <Wt>, [SP, #<pimm>] and LDR <Xt>, [SP, #<pimm>] are always aligned loads, as SP is
+ // guaranteed to be 128-bits aligned and <pimm> is multiple of the load size.
+ if ((next_insn & 0xbfc003e0) == 0xb94003e0) {
+ return false;
+ }
+ return true;
+ }
+ return false;
+}
+
+void Arm64RelativePatcher::SetInsn(std::vector<uint8_t>* code, uint32_t offset, uint32_t value) {
+ DCHECK_LE(offset + 4u, code->size());
+ DCHECK_EQ(offset & 3u, 0u);
+ uint8_t* addr = &(*code)[offset];
+ addr[0] = (value >> 0) & 0xff;
+ addr[1] = (value >> 8) & 0xff;
+ addr[2] = (value >> 16) & 0xff;
+ addr[3] = (value >> 24) & 0xff;
+}
+
+uint32_t Arm64RelativePatcher::GetInsn(ArrayRef<const uint8_t> code, uint32_t offset) {
+ DCHECK_LE(offset + 4u, code.size());
+ DCHECK_EQ(offset & 3u, 0u);
+ const uint8_t* addr = &code[offset];
+ return
+ (static_cast<uint32_t>(addr[0]) << 0) +
+ (static_cast<uint32_t>(addr[1]) << 8) +
+ (static_cast<uint32_t>(addr[2]) << 16)+
+ (static_cast<uint32_t>(addr[3]) << 24);
+}
+
+template <typename Alloc>
+uint32_t Arm64RelativePatcher::GetInsn(std::vector<uint8_t, Alloc>* code, uint32_t offset) {
+ return GetInsn(ArrayRef<const uint8_t>(*code), offset);
+}
+
+} // namespace linker
+} // namespace art
diff --git a/compiler/linker/arm64/relative_patcher_arm64.h b/compiler/linker/arm64/relative_patcher_arm64.h
new file mode 100644
index 0000000000..2d07e75c85
--- /dev/null
+++ b/compiler/linker/arm64/relative_patcher_arm64.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_LINKER_ARM64_RELATIVE_PATCHER_ARM64_H_
+#define ART_COMPILER_LINKER_ARM64_RELATIVE_PATCHER_ARM64_H_
+
+#include "linker/arm/relative_patcher_arm_base.h"
+#include "utils/array_ref.h"
+
+namespace art {
+namespace linker {
+
+class Arm64RelativePatcher FINAL : public ArmBaseRelativePatcher {
+ public:
+ Arm64RelativePatcher(RelativePatcherTargetProvider* provider,
+ const Arm64InstructionSetFeatures* features);
+
+ uint32_t ReserveSpace(uint32_t offset, const CompiledMethod* compiled_method,
+ MethodReference method_ref) OVERRIDE;
+ uint32_t ReserveSpaceEnd(uint32_t offset) OVERRIDE;
+ uint32_t WriteThunks(OutputStream* out, uint32_t offset) OVERRIDE;
+ void PatchCall(std::vector<uint8_t>* code, uint32_t literal_offset,
+ uint32_t patch_offset, uint32_t target_offset) OVERRIDE;
+ void PatchDexCacheReference(std::vector<uint8_t>* code, const LinkerPatch& patch,
+ uint32_t patch_offset, uint32_t target_offset) OVERRIDE;
+
+ private:
+ static std::vector<uint8_t> CompileThunkCode();
+ static uint32_t PatchAdrp(uint32_t adrp, uint32_t disp);
+
+ static bool NeedsErratum843419Thunk(ArrayRef<const uint8_t> code, uint32_t literal_offset,
+ uint32_t patch_offset);
+ void SetInsn(std::vector<uint8_t>* code, uint32_t offset, uint32_t value);
+ static uint32_t GetInsn(ArrayRef<const uint8_t> code, uint32_t offset);
+
+ template <typename Alloc>
+ static uint32_t GetInsn(std::vector<uint8_t, Alloc>* code, uint32_t offset);
+
+ // Maximum positive and negative displacement measured from the patch location.
+ // (Signed 28 bit displacement with the last bit 0 has range [-2^27, 2^27-4] measured from
+ // the ARM64 PC pointing to the BL.)
+ static constexpr uint32_t kMaxPositiveDisplacement = (1u << 27) - 4u;
+ static constexpr uint32_t kMaxNegativeDisplacement = (1u << 27);
+
+ // The ADRP thunk for erratum 843419 is 2 instructions, i.e. 8 bytes.
+ static constexpr uint32_t kAdrpThunkSize = 8u;
+
+ const bool fix_cortex_a53_843419_;
+ // Map original patch_offset to thunk offset.
+ std::vector<std::pair<uint32_t, uint32_t>> adrp_thunk_locations_;
+ size_t reserved_adrp_thunks_;
+ size_t processed_adrp_thunks_;
+ std::vector<uint8_t> current_method_thunks_;
+
+ DISALLOW_COPY_AND_ASSIGN(Arm64RelativePatcher);
+};
+
+} // namespace linker
+} // namespace art
+
+#endif // ART_COMPILER_LINKER_ARM64_RELATIVE_PATCHER_ARM64_H_
diff --git a/compiler/linker/arm64/relative_patcher_arm64_test.cc b/compiler/linker/arm64/relative_patcher_arm64_test.cc
new file mode 100644
index 0000000000..21f93672ad
--- /dev/null
+++ b/compiler/linker/arm64/relative_patcher_arm64_test.cc
@@ -0,0 +1,582 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "linker/relative_patcher_test.h"
+#include "linker/arm64/relative_patcher_arm64.h"
+
+namespace art {
+namespace linker {
+
+class Arm64RelativePatcherTest : public RelativePatcherTest {
+ public:
+ explicit Arm64RelativePatcherTest(const std::string& variant)
+ : RelativePatcherTest(kArm64, variant) { }
+
+ protected:
+ static const uint8_t kCallRawCode[];
+ static const ArrayRef<const uint8_t> kCallCode;
+ static const uint8_t kNopRawCode[];
+ static const ArrayRef<const uint8_t> kNopCode;
+
+ // All branches can be created from kBlPlus0 or kBPlus0 by adding the low 26 bits.
+ static constexpr uint32_t kBlPlus0 = 0x94000000u;
+ static constexpr uint32_t kBPlus0 = 0x14000000u;
+
+ // Special BL values.
+ static constexpr uint32_t kBlPlusMax = 0x95ffffffu;
+ static constexpr uint32_t kBlMinusMax = 0x96000000u;
+
+ // LDUR x2, [sp, #4], i.e. unaligned load crossing 64-bit boundary (assuming aligned sp).
+ static constexpr uint32_t kLdurInsn = 0xf840405fu;
+
+ // LDR w12, <label> and LDR x12, <label>. Bits 5-23 contain label displacement in 4-byte units.
+ static constexpr uint32_t kLdrWPcRelInsn = 0x1800000cu;
+ static constexpr uint32_t kLdrXPcRelInsn = 0x5800000cu;
+
+ // LDR w13, [SP, #<pimm>] and LDR x13, [SP, #<pimm>]. Bits 10-21 contain displacement from SP
+ // in units of 4-bytes (for 32-bit load) or 8-bytes (for 64-bit load).
+ static constexpr uint32_t kLdrWSpRelInsn = 0xb94003edu;
+ static constexpr uint32_t kLdrXSpRelInsn = 0xf94003edu;
+
+ uint32_t Create2MethodsWithGap(const ArrayRef<const uint8_t>& method1_code,
+ const ArrayRef<const LinkerPatch>& method1_patches,
+ const ArrayRef<const uint8_t>& last_method_code,
+ const ArrayRef<const LinkerPatch>& last_method_patches,
+ uint32_t distance_without_thunks) {
+ CHECK_EQ(distance_without_thunks % kArm64Alignment, 0u);
+ const uint32_t method1_offset =
+ CompiledCode::AlignCode(kTrampolineSize, kArm64) + sizeof(OatQuickMethodHeader);
+ AddCompiledMethod(MethodRef(1u), method1_code, method1_patches);
+ const uint32_t gap_start =
+ CompiledCode::AlignCode(method1_offset + method1_code.size(), kArm64);
+
+ // We want to put the method3 at a very precise offset.
+ const uint32_t last_method_offset = method1_offset + distance_without_thunks;
+ const uint32_t gap_end = last_method_offset - sizeof(OatQuickMethodHeader);
+ CHECK(IsAligned<kArm64Alignment>(gap_end));
+
+ // Fill the gap with intermediate methods in chunks of 2MiB and the last in [2MiB, 4MiB).
+ // (This allows deduplicating the small chunks to avoid using 256MiB of memory for +-128MiB
+ // offsets by this test.)
+ uint32_t method_idx = 2u;
+ constexpr uint32_t kSmallChunkSize = 2 * MB;
+ std::vector<uint8_t> gap_code;
+ size_t gap_size = gap_end - gap_start;
+ for (; gap_size >= 2u * kSmallChunkSize; gap_size -= kSmallChunkSize) {
+ uint32_t chunk_code_size = kSmallChunkSize - sizeof(OatQuickMethodHeader);
+ gap_code.resize(chunk_code_size, 0u);
+ AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(gap_code),
+ ArrayRef<const LinkerPatch>());
+ method_idx += 1u;
+ }
+ uint32_t chunk_code_size = gap_size - sizeof(OatQuickMethodHeader);
+ gap_code.resize(chunk_code_size, 0u);
+ AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(gap_code),
+ ArrayRef<const LinkerPatch>());
+ method_idx += 1u;
+
+ // Add the last method and link
+ AddCompiledMethod(MethodRef(method_idx), last_method_code, last_method_patches);
+ Link();
+
+ // Check assumptions.
+ CHECK_EQ(GetMethodOffset(1), method1_offset);
+ auto last_result = method_offset_map_.FindMethodOffset(MethodRef(method_idx));
+ CHECK(last_result.first);
+ // There may be a thunk before method2.
+ if (last_result.second != last_method_offset) {
+ // Thunk present. Check that there's only one.
+ uint32_t aligned_thunk_size = CompiledCode::AlignCode(ThunkSize(), kArm64);
+ CHECK_EQ(last_result.second, last_method_offset + aligned_thunk_size);
+ }
+ return method_idx;
+ }
+
+ uint32_t GetMethodOffset(uint32_t method_idx) {
+ auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx));
+ CHECK(result.first);
+ CHECK_EQ(result.second & 3u, 0u);
+ return result.second;
+ }
+
+ uint32_t ThunkSize() {
+ return static_cast<Arm64RelativePatcher*>(patcher_.get())->thunk_code_.size();
+ }
+
+ bool CheckThunk(uint32_t thunk_offset) {
+ Arm64RelativePatcher* patcher = static_cast<Arm64RelativePatcher*>(patcher_.get());
+ ArrayRef<const uint8_t> expected_code(patcher->thunk_code_);
+ if (output_.size() < thunk_offset + expected_code.size()) {
+ LOG(ERROR) << "output_.size() == " << output_.size() << " < "
+ << "thunk_offset + expected_code.size() == " << (thunk_offset + expected_code.size());
+ return false;
+ }
+ ArrayRef<const uint8_t> linked_code(&output_[thunk_offset], expected_code.size());
+ if (linked_code == expected_code) {
+ return true;
+ }
+ // Log failure info.
+ DumpDiff(expected_code, linked_code);
+ return false;
+ }
+
+ std::vector<uint8_t> GenNopsAndBl(size_t num_nops, uint32_t bl) {
+ std::vector<uint8_t> result;
+ result.reserve(num_nops * 4u + 4u);
+ for (size_t i = 0; i != num_nops; ++i) {
+ result.insert(result.end(), kNopCode.begin(), kNopCode.end());
+ }
+ result.push_back(static_cast<uint8_t>(bl));
+ result.push_back(static_cast<uint8_t>(bl >> 8));
+ result.push_back(static_cast<uint8_t>(bl >> 16));
+ result.push_back(static_cast<uint8_t>(bl >> 24));
+ return result;
+ }
+
+ std::vector<uint8_t> GenNopsAndAdrpLdr(size_t num_nops,
+ uint32_t method_offset, uint32_t target_offset) {
+ std::vector<uint8_t> result;
+ result.reserve(num_nops * 4u + 8u);
+ for (size_t i = 0; i != num_nops; ++i) {
+ result.insert(result.end(), kNopCode.begin(), kNopCode.end());
+ }
+ DCHECK_EQ(method_offset & 3u, 0u);
+ DCHECK_EQ(target_offset & 3u, 0u);
+ uint32_t adrp_offset = method_offset + num_nops * 4u;
+ uint32_t disp = target_offset - (adrp_offset & ~0xfffu);
+ DCHECK_EQ(disp & 3u, 0u);
+ uint32_t ldr = 0xb9400001 | // LDR w1, [x0, #(imm12 * 2)]
+ ((disp & 0xfffu) << (10 - 2)); // imm12 = ((disp & 0xfffu) >> 2) is at bit 10.
+ uint32_t adrp = 0x90000000 | // ADRP x0, +SignExtend(immhi:immlo:Zeros(12), 64)
+ ((disp & 0x3000u) << (29 - 12)) | // immlo = ((disp & 0x3000u) >> 12) is at bit 29,
+ ((disp & 0xffffc000) >> (14 - 5)) | // immhi = (disp >> 14) is at bit 5,
+ // We take the sign bit from the disp, limiting disp to +- 2GiB.
+ ((disp & 0x80000000) >> (31 - 23)); // sign bit in immhi is at bit 23.
+ result.push_back(static_cast<uint8_t>(adrp));
+ result.push_back(static_cast<uint8_t>(adrp >> 8));
+ result.push_back(static_cast<uint8_t>(adrp >> 16));
+ result.push_back(static_cast<uint8_t>(adrp >> 24));
+ result.push_back(static_cast<uint8_t>(ldr));
+ result.push_back(static_cast<uint8_t>(ldr >> 8));
+ result.push_back(static_cast<uint8_t>(ldr >> 16));
+ result.push_back(static_cast<uint8_t>(ldr >> 24));
+ return result;
+ }
+
+ void TestNopsAdrpLdr(size_t num_nops, uint32_t dex_cache_arrays_begin, uint32_t element_offset) {
+ dex_cache_arrays_begin_ = dex_cache_arrays_begin;
+ auto code = GenNopsAndAdrpLdr(num_nops, 0u, 0u); // Unpatched.
+ LinkerPatch patches[] = {
+ LinkerPatch::DexCacheArrayPatch(num_nops * 4u , nullptr, num_nops * 4u, element_offset),
+ LinkerPatch::DexCacheArrayPatch(num_nops * 4u + 4u, nullptr, num_nops * 4u, element_offset),
+ };
+ AddCompiledMethod(MethodRef(1u), ArrayRef<const uint8_t>(code),
+ ArrayRef<const LinkerPatch>(patches));
+ Link();
+
+ uint32_t method1_offset = GetMethodOffset(1u);
+ uint32_t target_offset = dex_cache_arrays_begin_ + element_offset;
+ auto expected_code = GenNopsAndAdrpLdr(num_nops, method1_offset, target_offset);
+ EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+ }
+
+ void InsertInsn(std::vector<uint8_t>* code, size_t pos, uint32_t insn) {
+ CHECK_LE(pos, code->size());
+ const uint8_t insn_code[] = {
+ static_cast<uint8_t>(insn), static_cast<uint8_t>(insn >> 8),
+ static_cast<uint8_t>(insn >> 16), static_cast<uint8_t>(insn >> 24),
+ };
+ static_assert(sizeof(insn_code) == 4u, "Invalid sizeof(insn_code).");
+ code->insert(code->begin() + pos, insn_code, insn_code + sizeof(insn_code));
+ }
+
+ void PrepareNopsAdrpInsn2Ldr(size_t num_nops, uint32_t insn2,
+ uint32_t dex_cache_arrays_begin, uint32_t element_offset) {
+ dex_cache_arrays_begin_ = dex_cache_arrays_begin;
+ auto code = GenNopsAndAdrpLdr(num_nops, 0u, 0u); // Unpatched.
+ InsertInsn(&code, num_nops * 4u + 4u, insn2);
+ LinkerPatch patches[] = {
+ LinkerPatch::DexCacheArrayPatch(num_nops * 4u , nullptr, num_nops * 4u, element_offset),
+ LinkerPatch::DexCacheArrayPatch(num_nops * 4u + 8u, nullptr, num_nops * 4u, element_offset),
+ };
+ AddCompiledMethod(MethodRef(1u), ArrayRef<const uint8_t>(code),
+ ArrayRef<const LinkerPatch>(patches));
+ Link();
+ }
+
+ void TestNopsAdrpInsn2Ldr(size_t num_nops, uint32_t insn2,
+ uint32_t dex_cache_arrays_begin, uint32_t element_offset) {
+ PrepareNopsAdrpInsn2Ldr(num_nops, insn2, dex_cache_arrays_begin, element_offset);
+
+ uint32_t method1_offset = GetMethodOffset(1u);
+ uint32_t target_offset = dex_cache_arrays_begin_ + element_offset;
+ auto expected_code = GenNopsAndAdrpLdr(num_nops, method1_offset, target_offset);
+ InsertInsn(&expected_code, num_nops * 4u + 4u, insn2);
+ EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+ }
+
+ void TestNopsAdrpInsn2LdrHasThunk(size_t num_nops, uint32_t insn2,
+ uint32_t dex_cache_arrays_begin, uint32_t element_offset) {
+ PrepareNopsAdrpInsn2Ldr(num_nops, insn2, dex_cache_arrays_begin, element_offset);
+
+ uint32_t method1_offset = GetMethodOffset(1u);
+ CHECK(!compiled_method_refs_.empty());
+ CHECK_EQ(compiled_method_refs_[0].dex_method_index, 1u);
+ CHECK_EQ(compiled_method_refs_.size(), compiled_methods_.size());
+ uint32_t method1_size = compiled_methods_[0]->GetQuickCode()->size();
+ uint32_t thunk_offset = CompiledCode::AlignCode(method1_offset + method1_size, kArm64);
+ uint32_t b_diff = thunk_offset - (method1_offset + num_nops * 4u);
+ ASSERT_EQ(b_diff & 3u, 0u);
+ ASSERT_LT(b_diff, 128 * MB);
+ uint32_t b_out = kBPlus0 + ((b_diff >> 2) & 0x03ffffffu);
+ uint32_t b_in = kBPlus0 + ((-b_diff >> 2) & 0x03ffffffu);
+
+ uint32_t target_offset = dex_cache_arrays_begin_ + element_offset;
+ auto expected_code = GenNopsAndAdrpLdr(num_nops, method1_offset, target_offset);
+ InsertInsn(&expected_code, num_nops * 4u + 4u, insn2);
+ // Replace adrp with bl.
+ expected_code.erase(expected_code.begin() + num_nops * 4u,
+ expected_code.begin() + num_nops * 4u + 4u);
+ InsertInsn(&expected_code, num_nops * 4u, b_out);
+ EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+
+ auto expected_thunk_code = GenNopsAndAdrpLdr(0u, thunk_offset, target_offset);
+ ASSERT_EQ(expected_thunk_code.size(), 8u);
+ expected_thunk_code.erase(expected_thunk_code.begin() + 4u, expected_thunk_code.begin() + 8u);
+ InsertInsn(&expected_thunk_code, 4u, b_in);
+ ASSERT_EQ(expected_thunk_code.size(), 8u);
+
+ uint32_t thunk_size = ThunkSize();
+ ASSERT_EQ(thunk_offset + thunk_size, output_.size());
+ ASSERT_EQ(thunk_size, expected_thunk_code.size());
+ ArrayRef<const uint8_t> thunk_code(&output_[thunk_offset], thunk_size);
+ if (ArrayRef<const uint8_t>(expected_thunk_code) != thunk_code) {
+ DumpDiff(ArrayRef<const uint8_t>(expected_thunk_code), thunk_code);
+ FAIL();
+ }
+ }
+
+ void TestAdrpInsn2Ldr(uint32_t insn2, uint32_t adrp_offset, bool has_thunk,
+ uint32_t dex_cache_arrays_begin, uint32_t element_offset) {
+ uint32_t method1_offset =
+ CompiledCode::AlignCode(kTrampolineSize, kArm64) + sizeof(OatQuickMethodHeader);
+ ASSERT_LT(method1_offset, adrp_offset);
+ ASSERT_EQ(adrp_offset & 3u, 0u);
+ uint32_t num_nops = (adrp_offset - method1_offset) / 4u;
+ if (has_thunk) {
+ TestNopsAdrpInsn2LdrHasThunk(num_nops, insn2, dex_cache_arrays_begin, element_offset);
+ } else {
+ TestNopsAdrpInsn2Ldr(num_nops, insn2, dex_cache_arrays_begin, element_offset);
+ }
+ ASSERT_EQ(method1_offset, GetMethodOffset(1u)); // If this fails, num_nops is wrong.
+ }
+
+ void TestAdrpLdurLdr(uint32_t adrp_offset, bool has_thunk,
+ uint32_t dex_cache_arrays_begin, uint32_t element_offset) {
+ TestAdrpInsn2Ldr(kLdurInsn, adrp_offset, has_thunk, dex_cache_arrays_begin, element_offset);
+ }
+
+ void TestAdrpLdrPcRelLdr(uint32_t pcrel_ldr_insn, int32_t pcrel_disp,
+ uint32_t adrp_offset, bool has_thunk,
+ uint32_t dex_cache_arrays_begin, uint32_t element_offset) {
+ ASSERT_LT(pcrel_disp, 0x100000);
+ ASSERT_GE(pcrel_disp, -0x100000);
+ ASSERT_EQ(pcrel_disp & 0x3, 0);
+ uint32_t insn2 = pcrel_ldr_insn | (((static_cast<uint32_t>(pcrel_disp) >> 2) & 0x7ffffu) << 5);
+ TestAdrpInsn2Ldr(insn2, adrp_offset, has_thunk, dex_cache_arrays_begin, element_offset);
+ }
+
+ void TestAdrpLdrSpRelLdr(uint32_t sprel_ldr_insn, uint32_t sprel_disp_in_load_units,
+ uint32_t adrp_offset, bool has_thunk,
+ uint32_t dex_cache_arrays_begin, uint32_t element_offset) {
+ ASSERT_LT(sprel_disp_in_load_units, 0x1000u);
+ uint32_t insn2 = sprel_ldr_insn | ((sprel_disp_in_load_units & 0xfffu) << 10);
+ TestAdrpInsn2Ldr(insn2, adrp_offset, has_thunk, dex_cache_arrays_begin, element_offset);
+ }
+};
+
+const uint8_t Arm64RelativePatcherTest::kCallRawCode[] = {
+ 0x00, 0x00, 0x00, 0x94
+};
+
+const ArrayRef<const uint8_t> Arm64RelativePatcherTest::kCallCode(kCallRawCode);
+
+const uint8_t Arm64RelativePatcherTest::kNopRawCode[] = {
+ 0x1f, 0x20, 0x03, 0xd5
+};
+
+const ArrayRef<const uint8_t> Arm64RelativePatcherTest::kNopCode(kNopRawCode);
+
+class Arm64RelativePatcherTestDefault : public Arm64RelativePatcherTest {
+ public:
+ Arm64RelativePatcherTestDefault() : Arm64RelativePatcherTest("default") { }
+};
+
+class Arm64RelativePatcherTestDenver64 : public Arm64RelativePatcherTest {
+ public:
+ Arm64RelativePatcherTestDenver64() : Arm64RelativePatcherTest("denver64") { }
+};
+
+TEST_F(Arm64RelativePatcherTestDefault, CallSelf) {
+ LinkerPatch patches[] = {
+ LinkerPatch::RelativeCodePatch(0u, nullptr, 1u),
+ };
+ AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches));
+ Link();
+
+ static const uint8_t expected_code[] = {
+ 0x00, 0x00, 0x00, 0x94
+ };
+ EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+}
+
+TEST_F(Arm64RelativePatcherTestDefault, CallOther) {
+ LinkerPatch method1_patches[] = {
+ LinkerPatch::RelativeCodePatch(0u, nullptr, 2u),
+ };
+ AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(method1_patches));
+ LinkerPatch method2_patches[] = {
+ LinkerPatch::RelativeCodePatch(0u, nullptr, 1u),
+ };
+ AddCompiledMethod(MethodRef(2u), kCallCode, ArrayRef<const LinkerPatch>(method2_patches));
+ Link();
+
+ uint32_t method1_offset = GetMethodOffset(1u);
+ uint32_t method2_offset = GetMethodOffset(2u);
+ uint32_t diff_after = method2_offset - method1_offset;
+ ASSERT_EQ(diff_after & 3u, 0u);
+ ASSERT_LT(diff_after >> 2, 1u << 8); // Simple encoding, (diff_after >> 2) fits into 8 bits.
+ static const uint8_t method1_expected_code[] = {
+ static_cast<uint8_t>(diff_after >> 2), 0x00, 0x00, 0x94
+ };
+ EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(method1_expected_code)));
+ uint32_t diff_before = method1_offset - method2_offset;
+ ASSERT_EQ(diff_before & 3u, 0u);
+ ASSERT_GE(diff_before, -1u << 27);
+ auto method2_expected_code = GenNopsAndBl(0u, kBlPlus0 | ((diff_before >> 2) & 0x03ffffffu));
+ EXPECT_TRUE(CheckLinkedMethod(MethodRef(2u), ArrayRef<const uint8_t>(method2_expected_code)));
+}
+
+TEST_F(Arm64RelativePatcherTestDefault, CallTrampoline) {
+ LinkerPatch patches[] = {
+ LinkerPatch::RelativeCodePatch(0u, nullptr, 2u),
+ };
+ AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches));
+ Link();
+
+ uint32_t method1_offset = GetMethodOffset(1u);
+ uint32_t diff = kTrampolineOffset - method1_offset;
+ ASSERT_EQ(diff & 1u, 0u);
+ ASSERT_GE(diff, -1u << 9); // Simple encoding, -256 <= (diff >> 1) < 0 (checked as unsigned).
+ auto expected_code = GenNopsAndBl(0u, kBlPlus0 | ((diff >> 2) & 0x03ffffffu));
+ EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+}
+
+TEST_F(Arm64RelativePatcherTestDefault, CallOtherAlmostTooFarAfter) {
+ auto method1_raw_code = GenNopsAndBl(1u, kBlPlus0);
+ constexpr uint32_t bl_offset_in_method1 = 1u * 4u; // After NOPs.
+ ArrayRef<const uint8_t> method1_code(method1_raw_code);
+ ASSERT_EQ(bl_offset_in_method1 + 4u, method1_code.size());
+ uint32_t expected_last_method_idx = 65; // Based on 2MiB chunks in Create2MethodsWithGap().
+ LinkerPatch method1_patches[] = {
+ LinkerPatch::RelativeCodePatch(bl_offset_in_method1, nullptr, expected_last_method_idx),
+ };
+
+ constexpr uint32_t max_positive_disp = 128 * MB - 4u;
+ uint32_t last_method_idx = Create2MethodsWithGap(method1_code, method1_patches,
+ kNopCode, ArrayRef<const LinkerPatch>(),
+ bl_offset_in_method1 + max_positive_disp);
+ ASSERT_EQ(expected_last_method_idx, last_method_idx);
+
+ uint32_t method1_offset = GetMethodOffset(1u);
+ uint32_t last_method_offset = GetMethodOffset(last_method_idx);
+ ASSERT_EQ(method1_offset + bl_offset_in_method1 + max_positive_disp, last_method_offset);
+
+ // Check linked code.
+ auto expected_code = GenNopsAndBl(1u, kBlPlusMax);
+ EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+}
+
+TEST_F(Arm64RelativePatcherTestDefault, CallOtherAlmostTooFarBefore) {
+ auto last_method_raw_code = GenNopsAndBl(0u, kBlPlus0);
+ constexpr uint32_t bl_offset_in_last_method = 0u * 4u; // After NOPs.
+ ArrayRef<const uint8_t> last_method_code(last_method_raw_code);
+ ASSERT_EQ(bl_offset_in_last_method + 4u, last_method_code.size());
+ LinkerPatch last_method_patches[] = {
+ LinkerPatch::RelativeCodePatch(bl_offset_in_last_method, nullptr, 1u),
+ };
+
+ constexpr uint32_t max_negative_disp = 128 * MB;
+ uint32_t last_method_idx = Create2MethodsWithGap(kNopCode, ArrayRef<const LinkerPatch>(),
+ last_method_code, last_method_patches,
+ max_negative_disp - bl_offset_in_last_method);
+ uint32_t method1_offset = GetMethodOffset(1u);
+ uint32_t last_method_offset = GetMethodOffset(last_method_idx);
+ ASSERT_EQ(method1_offset, last_method_offset + bl_offset_in_last_method - max_negative_disp);
+
+ // Check linked code.
+ auto expected_code = GenNopsAndBl(0u, kBlMinusMax);
+ EXPECT_TRUE(CheckLinkedMethod(MethodRef(last_method_idx),
+ ArrayRef<const uint8_t>(expected_code)));
+}
+
+TEST_F(Arm64RelativePatcherTestDefault, CallOtherJustTooFarAfter) {
+ auto method1_raw_code = GenNopsAndBl(0u, kBlPlus0);
+ constexpr uint32_t bl_offset_in_method1 = 0u * 4u; // After NOPs.
+ ArrayRef<const uint8_t> method1_code(method1_raw_code);
+ ASSERT_EQ(bl_offset_in_method1 + 4u, method1_code.size());
+ uint32_t expected_last_method_idx = 65; // Based on 2MiB chunks in Create2MethodsWithGap().
+ LinkerPatch method1_patches[] = {
+ LinkerPatch::RelativeCodePatch(bl_offset_in_method1, nullptr, expected_last_method_idx),
+ };
+
+ constexpr uint32_t just_over_max_positive_disp = 128 * MB;
+ uint32_t last_method_idx = Create2MethodsWithGap(
+ method1_code, method1_patches, kNopCode, ArrayRef<const LinkerPatch>(),
+ bl_offset_in_method1 + just_over_max_positive_disp);
+ ASSERT_EQ(expected_last_method_idx, last_method_idx);
+
+ uint32_t method1_offset = GetMethodOffset(1u);
+ uint32_t last_method_offset = GetMethodOffset(last_method_idx);
+ uint32_t last_method_header_offset = last_method_offset - sizeof(OatQuickMethodHeader);
+ ASSERT_TRUE(IsAligned<kArm64Alignment>(last_method_header_offset));
+ uint32_t thunk_offset = last_method_header_offset - CompiledCode::AlignCode(ThunkSize(), kArm64);
+ ASSERT_TRUE(IsAligned<kArm64Alignment>(thunk_offset));
+ uint32_t diff = thunk_offset - (method1_offset + bl_offset_in_method1);
+ ASSERT_EQ(diff & 3u, 0u);
+ ASSERT_LT(diff, 128 * MB);
+ auto expected_code = GenNopsAndBl(0u, kBlPlus0 | (diff >> 2));
+ EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+ CheckThunk(thunk_offset);
+}
+
+TEST_F(Arm64RelativePatcherTestDefault, CallOtherJustTooFarBefore) {
+ auto last_method_raw_code = GenNopsAndBl(1u, kBlPlus0);
+ constexpr uint32_t bl_offset_in_last_method = 1u * 4u; // After NOPs.
+ ArrayRef<const uint8_t> last_method_code(last_method_raw_code);
+ ASSERT_EQ(bl_offset_in_last_method + 4u, last_method_code.size());
+ LinkerPatch last_method_patches[] = {
+ LinkerPatch::RelativeCodePatch(bl_offset_in_last_method, nullptr, 1u),
+ };
+
+ constexpr uint32_t just_over_max_negative_disp = 128 * MB + 4;
+ uint32_t last_method_idx = Create2MethodsWithGap(
+ kNopCode, ArrayRef<const LinkerPatch>(), last_method_code, last_method_patches,
+ just_over_max_negative_disp - bl_offset_in_last_method);
+ uint32_t method1_offset = GetMethodOffset(1u);
+ uint32_t last_method_offset = GetMethodOffset(last_method_idx);
+ ASSERT_EQ(method1_offset,
+ last_method_offset + bl_offset_in_last_method - just_over_max_negative_disp);
+
+ // Check linked code.
+ uint32_t thunk_offset =
+ CompiledCode::AlignCode(last_method_offset + last_method_code.size(), kArm64);
+ uint32_t diff = thunk_offset - (last_method_offset + bl_offset_in_last_method);
+ ASSERT_EQ(diff & 3u, 0u);
+ ASSERT_LT(diff, 128 * MB);
+ auto expected_code = GenNopsAndBl(1u, kBlPlus0 | (diff >> 2));
+ EXPECT_TRUE(CheckLinkedMethod(MethodRef(last_method_idx),
+ ArrayRef<const uint8_t>(expected_code)));
+ EXPECT_TRUE(CheckThunk(thunk_offset));
+}
+
+TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference1) {
+ TestNopsAdrpLdr(0u, 0x12345678u, 0x1234u);
+}
+
+TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference2) {
+ TestNopsAdrpLdr(0u, -0x12345678u, 0x4444u);
+}
+
+TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference3) {
+ TestNopsAdrpLdr(0u, 0x12345000u, 0x3ffcu);
+}
+
+TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference4) {
+ TestNopsAdrpLdr(0u, 0x12345000u, 0x4000u);
+}
+
+TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference0xff4) {
+ TestAdrpLdurLdr(0xff4u, false, 0x12345678u, 0x1234u);
+}
+
+TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference0xff8) {
+ TestAdrpLdurLdr(0xff8u, true, 0x12345678u, 0x1234u);
+}
+
+TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference0xffc) {
+ TestAdrpLdurLdr(0xffcu, true, 0x12345678u, 0x1234u);
+}
+
+TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference0x1000) {
+ TestAdrpLdurLdr(0x1000u, false, 0x12345678u, 0x1234u);
+}
+
+TEST_F(Arm64RelativePatcherTestDenver64, DexCacheReference0xff4) {
+ TestAdrpLdurLdr(0xff4u, false, 0x12345678u, 0x1234u);
+}
+
+TEST_F(Arm64RelativePatcherTestDenver64, DexCacheReference0xff8) {
+ TestAdrpLdurLdr(0xff8u, false, 0x12345678u, 0x1234u);
+}
+
+TEST_F(Arm64RelativePatcherTestDenver64, DexCacheReference0xffc) {
+ TestAdrpLdurLdr(0xffcu, false, 0x12345678u, 0x1234u);
+}
+
+TEST_F(Arm64RelativePatcherTestDenver64, DexCacheReference0x1000) {
+ TestAdrpLdurLdr(0x1000u, false, 0x12345678u, 0x1234u);
+}
+
+#define TEST_FOR_OFFSETS(test, disp1, disp2) \
+ test(0xff4u, disp1) test(0xff8u, disp1) test(0xffcu, disp1) test(0x1000u, disp1) \
+ test(0xff4u, disp2) test(0xff8u, disp2) test(0xffcu, disp2) test(0x1000u, disp2)
+
+// LDR <Wt>, <label> is always aligned. We should never have to use a fixup.
+#define LDRW_PCREL_TEST(adrp_offset, disp) \
+ TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference ## adrp_offset ## WPcRel ## disp) { \
+ TestAdrpLdrPcRelLdr(kLdrWPcRelInsn, disp, adrp_offset, false, 0x12345678u, 0x1234u); \
+ }
+
+TEST_FOR_OFFSETS(LDRW_PCREL_TEST, 0x1234, 0x1238)
+
+// LDR <Xt>, <label> is aligned when offset + displacement is a multiple of 8.
+#define LDRX_PCREL_TEST(adrp_offset, disp) \
+ TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference ## adrp_offset ## XPcRel ## disp) { \
+ bool unaligned = ((adrp_offset + 4u + static_cast<uint32_t>(disp)) & 7u) != 0; \
+ bool has_thunk = (adrp_offset == 0xff8u || adrp_offset == 0xffcu) && unaligned; \
+ TestAdrpLdrPcRelLdr(kLdrXPcRelInsn, disp, adrp_offset, has_thunk, 0x12345678u, 0x1234u); \
+ }
+
+TEST_FOR_OFFSETS(LDRX_PCREL_TEST, 0x1234, 0x1238)
+
+// LDR <Wt>, [SP, #<pimm>] and LDR <Xt>, [SP, #<pimm>] are always aligned. No fixup needed.
+#define LDRW_SPREL_TEST(adrp_offset, disp) \
+ TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference ## adrp_offset ## WSpRel ## disp) { \
+ TestAdrpLdrSpRelLdr(kLdrWSpRelInsn, disp >> 2, adrp_offset, false, 0x12345678u, 0x1234u); \
+ }
+
+TEST_FOR_OFFSETS(LDRW_SPREL_TEST, 0, 4)
+
+#define LDRX_SPREL_TEST(adrp_offset, disp) \
+ TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference ## adrp_offset ## XSpRel ## disp) { \
+ TestAdrpLdrSpRelLdr(kLdrXSpRelInsn, disp >> 3, adrp_offset, false, 0x12345678u, 0x1234u); \
+ }
+
+TEST_FOR_OFFSETS(LDRX_SPREL_TEST, 0, 8)
+
+} // namespace linker
+} // namespace art
diff --git a/compiler/linker/relative_patcher.cc b/compiler/linker/relative_patcher.cc
new file mode 100644
index 0000000000..89aed956aa
--- /dev/null
+++ b/compiler/linker/relative_patcher.cc
@@ -0,0 +1,113 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "linker/relative_patcher.h"
+
+#include "linker/arm/relative_patcher_thumb2.h"
+#include "linker/arm64/relative_patcher_arm64.h"
+#include "linker/x86/relative_patcher_x86.h"
+#include "linker/x86_64/relative_patcher_x86_64.h"
+#include "output_stream.h"
+
+namespace art {
+namespace linker {
+
+std::unique_ptr<RelativePatcher> RelativePatcher::Create(
+ InstructionSet instruction_set, const InstructionSetFeatures* features,
+ RelativePatcherTargetProvider* provider) {
+ class RelativePatcherNone FINAL : public RelativePatcher {
+ public:
+ RelativePatcherNone() { }
+
+ uint32_t ReserveSpace(uint32_t offset,
+ const CompiledMethod* compiled_method ATTRIBUTE_UNUSED,
+ MethodReference method_ref ATTRIBUTE_UNUSED) OVERRIDE {
+ return offset; // No space reserved; no patches expected.
+ }
+
+ uint32_t ReserveSpaceEnd(uint32_t offset) OVERRIDE {
+ return offset; // No space reserved; no patches expected.
+ }
+
+ uint32_t WriteThunks(OutputStream* out ATTRIBUTE_UNUSED, uint32_t offset) OVERRIDE {
+ return offset; // No thunks added; no patches expected.
+ }
+
+ void PatchCall(std::vector<uint8_t>* code ATTRIBUTE_UNUSED,
+ uint32_t literal_offset ATTRIBUTE_UNUSED,
+ uint32_t patch_offset ATTRIBUTE_UNUSED,
+ uint32_t target_offset ATTRIBUTE_UNUSED) OVERRIDE {
+ LOG(FATAL) << "Unexpected relative call patch.";
+ }
+
+ virtual void PatchDexCacheReference(std::vector<uint8_t>* code ATTRIBUTE_UNUSED,
+ const LinkerPatch& patch ATTRIBUTE_UNUSED,
+ uint32_t patch_offset ATTRIBUTE_UNUSED,
+ uint32_t target_offset ATTRIBUTE_UNUSED) {
+ LOG(FATAL) << "Unexpected relative dex cache array patch.";
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(RelativePatcherNone);
+ };
+
+ switch (instruction_set) {
+ case kX86:
+ return std::unique_ptr<RelativePatcher>(new X86RelativePatcher());
+ case kX86_64:
+ return std::unique_ptr<RelativePatcher>(new X86_64RelativePatcher());
+ case kArm:
+ // Fall through: we generate Thumb2 code for "arm".
+ case kThumb2:
+ return std::unique_ptr<RelativePatcher>(new Thumb2RelativePatcher(provider));
+ case kArm64:
+ return std::unique_ptr<RelativePatcher>(
+ new Arm64RelativePatcher(provider, features->AsArm64InstructionSetFeatures()));
+ default:
+ return std::unique_ptr<RelativePatcher>(new RelativePatcherNone);
+ }
+}
+
+bool RelativePatcher::WriteCodeAlignment(OutputStream* out, uint32_t aligned_code_delta) {
+ static const uint8_t kPadding[] = {
+ 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u
+ };
+ DCHECK_LE(aligned_code_delta, sizeof(kPadding));
+ if (UNLIKELY(!out->WriteFully(kPadding, aligned_code_delta))) {
+ return false;
+ }
+ size_code_alignment_ += aligned_code_delta;
+ return true;
+}
+
+bool RelativePatcher::WriteRelCallThunk(OutputStream* out, const ArrayRef<const uint8_t>& thunk) {
+ if (UNLIKELY(!out->WriteFully(thunk.data(), thunk.size()))) {
+ return false;
+ }
+ size_relative_call_thunks_ += thunk.size();
+ return true;
+}
+
+bool RelativePatcher::WriteMiscThunk(OutputStream* out, const ArrayRef<const uint8_t>& thunk) {
+ if (UNLIKELY(!out->WriteFully(thunk.data(), thunk.size()))) {
+ return false;
+ }
+ size_misc_thunks_ += thunk.size();
+ return true;
+}
+
+} // namespace linker
+} // namespace art
diff --git a/compiler/linker/relative_patcher.h b/compiler/linker/relative_patcher.h
new file mode 100644
index 0000000000..8a9f3f8364
--- /dev/null
+++ b/compiler/linker/relative_patcher.h
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_LINKER_RELATIVE_PATCHER_H_
+#define ART_COMPILER_LINKER_RELATIVE_PATCHER_H_
+
+#include <vector>
+
+#include "arch/instruction_set.h"
+#include "arch/instruction_set_features.h"
+#include "base/macros.h"
+#include "method_reference.h"
+#include "utils/array_ref.h"
+
+namespace art {
+
+class CompiledMethod;
+class LinkerPatch;
+class OutputStream;
+
+namespace linker {
+
+/**
+ * @class RelativePatcherTargetProvider
+ * @brief Interface for providing method offsets for relative call targets.
+ */
+class RelativePatcherTargetProvider {
+ public:
+ /**
+ * Find the offset of the target method of a relative call if known.
+ *
+ * The process of assigning target method offsets includes calls to the relative patcher's
+ * ReserveSpace() which in turn can use FindMethodOffset() to determine if a method already
+ * has an offset assigned and, if so, what's that offset. If the offset has not yet been
+ * assigned or if it's too far for the particular architecture's relative call,
+ * ReserveSpace() may need to allocate space for a special dispatch thunk.
+ *
+ * @param ref the target method of the relative call.
+ * @return true in the first element of the pair if the method was found, false otherwise;
+ * if found, the second element specifies the offset.
+ */
+ virtual std::pair<bool, uint32_t> FindMethodOffset(MethodReference ref) = 0;
+
+ protected:
+ virtual ~RelativePatcherTargetProvider() { }
+};
+
+/**
+ * @class RelativePatcher
+ * @brief Interface for architecture-specific link-time patching of PC-relative references.
+ */
+class RelativePatcher {
+ public:
+ static std::unique_ptr<RelativePatcher> Create(
+ InstructionSet instruction_set, const InstructionSetFeatures* features,
+ RelativePatcherTargetProvider* provider);
+
+ virtual ~RelativePatcher() { }
+
+ uint32_t CodeAlignmentSize() const {
+ return size_code_alignment_;
+ }
+
+ uint32_t RelativeCallThunksSize() const {
+ return size_relative_call_thunks_;
+ }
+
+ uint32_t MiscThunksSize() const {
+ return size_misc_thunks_;
+ }
+
+ // Reserve space for thunks if needed before a method, return adjusted offset.
+ virtual uint32_t ReserveSpace(uint32_t offset, const CompiledMethod* compiled_method,
+ MethodReference method_ref) = 0;
+
+ // Reserve space for thunks if needed after the last method, return adjusted offset.
+ virtual uint32_t ReserveSpaceEnd(uint32_t offset) = 0;
+
+ // Write relative call thunks if needed, return adjusted offset.
+ virtual uint32_t WriteThunks(OutputStream* out, uint32_t offset) = 0;
+
+ // Patch method code. The input displacement is relative to the patched location,
+ // the patcher may need to adjust it if the correct base is different.
+ virtual void PatchCall(std::vector<uint8_t>* code, uint32_t literal_offset,
+ uint32_t patch_offset, uint32_t target_offset) = 0;
+
+ // Patch a reference to a dex cache location.
+ virtual void PatchDexCacheReference(std::vector<uint8_t>* code, const LinkerPatch& patch,
+ uint32_t patch_offset, uint32_t target_offset) = 0;
+
+ protected:
+ RelativePatcher()
+ : size_code_alignment_(0u),
+ size_relative_call_thunks_(0u),
+ size_misc_thunks_(0u) {
+ }
+
+ bool WriteCodeAlignment(OutputStream* out, uint32_t aligned_code_delta);
+ bool WriteRelCallThunk(OutputStream* out, const ArrayRef<const uint8_t>& thunk);
+ bool WriteMiscThunk(OutputStream* out, const ArrayRef<const uint8_t>& thunk);
+
+ private:
+ uint32_t size_code_alignment_;
+ uint32_t size_relative_call_thunks_;
+ uint32_t size_misc_thunks_;
+
+ DISALLOW_COPY_AND_ASSIGN(RelativePatcher);
+};
+
+} // namespace linker
+} // namespace art
+
+#endif // ART_COMPILER_LINKER_RELATIVE_PATCHER_H_
diff --git a/compiler/linker/relative_patcher_test.h b/compiler/linker/relative_patcher_test.h
new file mode 100644
index 0000000000..70630f366f
--- /dev/null
+++ b/compiler/linker/relative_patcher_test.h
@@ -0,0 +1,255 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_LINKER_RELATIVE_PATCHER_TEST_H_
+#define ART_COMPILER_LINKER_RELATIVE_PATCHER_TEST_H_
+
+#include "arch/instruction_set.h"
+#include "arch/instruction_set_features.h"
+#include "base/macros.h"
+#include "compiled_method.h"
+#include "dex/quick/dex_file_to_method_inliner_map.h"
+#include "dex/verification_results.h"
+#include "driver/compiler_driver.h"
+#include "driver/compiler_options.h"
+#include "globals.h"
+#include "gtest/gtest.h"
+#include "linker/relative_patcher.h"
+#include "method_reference.h"
+#include "oat.h"
+#include "utils/array_ref.h"
+#include "vector_output_stream.h"
+
+namespace art {
+namespace linker {
+
+// Base class providing infrastructure for architecture-specific tests.
+class RelativePatcherTest : public testing::Test {
+ protected:
+ RelativePatcherTest(InstructionSet instruction_set, const std::string& variant)
+ : compiler_options_(),
+ verification_results_(&compiler_options_),
+ inliner_map_(),
+ driver_(&compiler_options_, &verification_results_, &inliner_map_,
+ Compiler::kQuick, instruction_set, nullptr,
+ false, nullptr, nullptr, 1u,
+ false, false, "", nullptr, -1, ""),
+ error_msg_(),
+ instruction_set_(instruction_set),
+ features_(InstructionSetFeatures::FromVariant(instruction_set, variant, &error_msg_)),
+ method_offset_map_(),
+ patcher_(RelativePatcher::Create(instruction_set, features_.get(), &method_offset_map_)),
+ dex_cache_arrays_begin_(0u),
+ compiled_method_refs_(),
+ compiled_methods_(),
+ patched_code_(),
+ output_(),
+ out_("test output stream", &output_) {
+ CHECK(error_msg_.empty()) << instruction_set << "/" << variant;
+ patched_code_.reserve(16 * KB);
+ }
+
+ MethodReference MethodRef(uint32_t method_idx) {
+ CHECK_NE(method_idx, 0u);
+ return MethodReference(nullptr, method_idx);
+ }
+
+ void AddCompiledMethod(MethodReference method_ref,
+ const ArrayRef<const uint8_t>& code,
+ const ArrayRef<const LinkerPatch>& patches) {
+ compiled_method_refs_.push_back(method_ref);
+ compiled_methods_.emplace_back(new CompiledMethod(
+ &driver_, instruction_set_, code,
+ 0u, 0u, 0u, nullptr, ArrayRef<const uint8_t>(), ArrayRef<const uint8_t>(),
+ ArrayRef<const uint8_t>(), ArrayRef<const uint8_t>(),
+ patches));
+ }
+
+ void Link() {
+ // Reserve space.
+ static_assert(kTrampolineOffset == 0u, "Unexpected trampoline offset.");
+ uint32_t offset = kTrampolineSize;
+ size_t idx = 0u;
+ for (auto& compiled_method : compiled_methods_) {
+ offset = patcher_->ReserveSpace(offset, compiled_method.get(), compiled_method_refs_[idx]);
+
+ uint32_t aligned_offset = compiled_method->AlignCode(offset);
+ uint32_t aligned_code_delta = aligned_offset - offset;
+ offset += aligned_code_delta;
+
+ offset += sizeof(OatQuickMethodHeader);
+ uint32_t quick_code_offset = offset + compiled_method->CodeDelta();
+ const auto& code = *compiled_method->GetQuickCode();
+ offset += code.size();
+
+ method_offset_map_.map.Put(compiled_method_refs_[idx], quick_code_offset);
+ ++idx;
+ }
+ offset = patcher_->ReserveSpaceEnd(offset);
+ uint32_t output_size = offset;
+ output_.reserve(output_size);
+
+ // Write data.
+ DCHECK(output_.empty());
+ uint8_t dummy_trampoline[kTrampolineSize];
+ memset(dummy_trampoline, 0, sizeof(dummy_trampoline));
+ out_.WriteFully(dummy_trampoline, kTrampolineSize);
+ offset = kTrampolineSize;
+ static const uint8_t kPadding[] = {
+ 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u
+ };
+ uint8_t dummy_header[sizeof(OatQuickMethodHeader)];
+ memset(dummy_header, 0, sizeof(dummy_header));
+ for (auto& compiled_method : compiled_methods_) {
+ offset = patcher_->WriteThunks(&out_, offset);
+
+ uint32_t aligned_offset = compiled_method->AlignCode(offset);
+ uint32_t aligned_code_delta = aligned_offset - offset;
+ CHECK_LE(aligned_code_delta, sizeof(kPadding));
+ out_.WriteFully(kPadding, aligned_code_delta);
+ offset += aligned_code_delta;
+
+ out_.WriteFully(dummy_header, sizeof(OatQuickMethodHeader));
+ offset += sizeof(OatQuickMethodHeader);
+ ArrayRef<const uint8_t> code(*compiled_method->GetQuickCode());
+ if (!compiled_method->GetPatches().empty()) {
+ patched_code_.assign(code.begin(), code.end());
+ code = ArrayRef<const uint8_t>(patched_code_);
+ for (const LinkerPatch& patch : compiled_method->GetPatches()) {
+ if (patch.Type() == kLinkerPatchCallRelative) {
+ auto result = method_offset_map_.FindMethodOffset(patch.TargetMethod());
+ uint32_t target_offset =
+ result.first ? result.second : kTrampolineOffset + compiled_method->CodeDelta();
+ patcher_->PatchCall(&patched_code_, patch.LiteralOffset(),
+ offset + patch.LiteralOffset(), target_offset);
+ } else if (patch.Type() == kLinkerPatchDexCacheArray) {
+ uint32_t target_offset = dex_cache_arrays_begin_ + patch.TargetDexCacheElementOffset();
+ patcher_->PatchDexCacheReference(&patched_code_, patch,
+ offset + patch.LiteralOffset(), target_offset);
+ } else {
+ LOG(FATAL) << "Bad patch type.";
+ }
+ }
+ }
+ out_.WriteFully(&code[0], code.size());
+ offset += code.size();
+ }
+ offset = patcher_->WriteThunks(&out_, offset);
+ CHECK_EQ(offset, output_size);
+ CHECK_EQ(output_.size(), output_size);
+ }
+
+ bool CheckLinkedMethod(MethodReference method_ref, const ArrayRef<const uint8_t>& expected_code) {
+ // Sanity check: original code size must match linked_code.size().
+ size_t idx = 0u;
+ for (auto ref : compiled_method_refs_) {
+ if (ref.dex_file == method_ref.dex_file &&
+ ref.dex_method_index == method_ref.dex_method_index) {
+ break;
+ }
+ ++idx;
+ }
+ CHECK_NE(idx, compiled_method_refs_.size());
+ CHECK_EQ(compiled_methods_[idx]->GetQuickCode()->size(), expected_code.size());
+
+ auto result = method_offset_map_.FindMethodOffset(method_ref);
+ CHECK(result.first); // Must have been linked.
+ size_t offset = result.second - compiled_methods_[idx]->CodeDelta();
+ CHECK_LT(offset, output_.size());
+ CHECK_LE(offset + expected_code.size(), output_.size());
+ ArrayRef<const uint8_t> linked_code(&output_[offset], expected_code.size());
+ if (linked_code == expected_code) {
+ return true;
+ }
+ // Log failure info.
+ DumpDiff(expected_code, linked_code);
+ return false;
+ }
+
+ void DumpDiff(const ArrayRef<const uint8_t>& expected_code,
+ const ArrayRef<const uint8_t>& linked_code) {
+ std::ostringstream expected_hex;
+ std::ostringstream linked_hex;
+ std::ostringstream diff_indicator;
+ static const char digits[] = "0123456789abcdef";
+ bool found_diff = false;
+ for (size_t i = 0; i != expected_code.size(); ++i) {
+ expected_hex << " " << digits[expected_code[i] >> 4] << digits[expected_code[i] & 0xf];
+ linked_hex << " " << digits[linked_code[i] >> 4] << digits[linked_code[i] & 0xf];
+ if (!found_diff) {
+ found_diff = (expected_code[i] != linked_code[i]);
+ diff_indicator << (found_diff ? " ^^" : " ");
+ }
+ }
+ CHECK(found_diff);
+ std::string expected_hex_str = expected_hex.str();
+ std::string linked_hex_str = linked_hex.str();
+ std::string diff_indicator_str = diff_indicator.str();
+ if (diff_indicator_str.length() > 60) {
+ CHECK_EQ(diff_indicator_str.length() % 3u, 0u);
+ size_t remove = diff_indicator_str.length() / 3 - 5;
+ std::ostringstream oss;
+ oss << "[stripped " << remove << "]";
+ std::string replacement = oss.str();
+ expected_hex_str.replace(0u, remove * 3u, replacement);
+ linked_hex_str.replace(0u, remove * 3u, replacement);
+ diff_indicator_str.replace(0u, remove * 3u, replacement);
+ }
+ LOG(ERROR) << "diff expected_code linked_code";
+ LOG(ERROR) << "<" << expected_hex_str;
+ LOG(ERROR) << ">" << linked_hex_str;
+ LOG(ERROR) << " " << diff_indicator_str;
+ }
+
+ // Map method reference to assinged offset.
+ // Wrap the map in a class implementing linker::RelativePatcherTargetProvider.
+ class MethodOffsetMap FINAL : public linker::RelativePatcherTargetProvider {
+ public:
+ std::pair<bool, uint32_t> FindMethodOffset(MethodReference ref) OVERRIDE {
+ auto it = map.find(ref);
+ if (it == map.end()) {
+ return std::pair<bool, uint32_t>(false, 0u);
+ } else {
+ return std::pair<bool, uint32_t>(true, it->second);
+ }
+ }
+ SafeMap<MethodReference, uint32_t, MethodReferenceComparator> map;
+ };
+
+ static const uint32_t kTrampolineSize = 4u;
+ static const uint32_t kTrampolineOffset = 0u;
+
+ CompilerOptions compiler_options_;
+ VerificationResults verification_results_;
+ DexFileToMethodInlinerMap inliner_map_;
+ CompilerDriver driver_; // Needed for constructing CompiledMethod.
+ std::string error_msg_;
+ InstructionSet instruction_set_;
+ std::unique_ptr<const InstructionSetFeatures> features_;
+ MethodOffsetMap method_offset_map_;
+ std::unique_ptr<RelativePatcher> patcher_;
+ uint32_t dex_cache_arrays_begin_;
+ std::vector<MethodReference> compiled_method_refs_;
+ std::vector<std::unique_ptr<CompiledMethod>> compiled_methods_;
+ std::vector<uint8_t> patched_code_;
+ std::vector<uint8_t> output_;
+ VectorOutputStream out_;
+};
+
+} // namespace linker
+} // namespace art
+
+#endif // ART_COMPILER_LINKER_RELATIVE_PATCHER_TEST_H_
diff --git a/compiler/linker/x86/relative_patcher_x86.cc b/compiler/linker/x86/relative_patcher_x86.cc
new file mode 100644
index 0000000000..315585d9e7
--- /dev/null
+++ b/compiler/linker/x86/relative_patcher_x86.cc
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "linker/x86/relative_patcher_x86.h"
+
+#include "compiled_method.h"
+
+namespace art {
+namespace linker {
+
+void X86RelativePatcher::PatchDexCacheReference(std::vector<uint8_t>* code,
+ const LinkerPatch& patch,
+ uint32_t patch_offset,
+ uint32_t target_offset) {
+ uint32_t anchor_literal_offset = patch.PcInsnOffset();
+ uint32_t literal_offset = patch.LiteralOffset();
+
+ // Check that the anchor points to pop in a "call +0; pop <reg>" sequence.
+ DCHECK_GE(anchor_literal_offset, 5u);
+ DCHECK_LT(anchor_literal_offset, code->size());
+ DCHECK_EQ((*code)[anchor_literal_offset - 5u], 0xe8u);
+ DCHECK_EQ((*code)[anchor_literal_offset - 4u], 0x00u);
+ DCHECK_EQ((*code)[anchor_literal_offset - 3u], 0x00u);
+ DCHECK_EQ((*code)[anchor_literal_offset - 2u], 0x00u);
+ DCHECK_EQ((*code)[anchor_literal_offset - 1u], 0x00u);
+ DCHECK_EQ((*code)[anchor_literal_offset] & 0xf8u, 0x58u);
+
+ // Check that the patched data contains kDummy32BitOffset.
+ constexpr int kDummy32BitOffset = 256; // Must match X86Mir2Lir::kDummy32BitOffset.
+ DCHECK_LE(literal_offset, code->size());
+ DCHECK_EQ((*code)[literal_offset + 0u], static_cast<uint8_t>(kDummy32BitOffset >> 0));
+ DCHECK_EQ((*code)[literal_offset + 1u], static_cast<uint8_t>(kDummy32BitOffset >> 8));
+ DCHECK_EQ((*code)[literal_offset + 2u], static_cast<uint8_t>(kDummy32BitOffset >> 16));
+ DCHECK_EQ((*code)[literal_offset + 3u], static_cast<uint8_t>(kDummy32BitOffset >> 24));
+
+ // Apply patch.
+ uint32_t anchor_offset = patch_offset - literal_offset + anchor_literal_offset;
+ uint32_t diff = target_offset - anchor_offset;
+ (*code)[literal_offset + 0u] = static_cast<uint8_t>(diff >> 0);
+ (*code)[literal_offset + 1u] = static_cast<uint8_t>(diff >> 8);
+ (*code)[literal_offset + 2u] = static_cast<uint8_t>(diff >> 16);
+ (*code)[literal_offset + 3u] = static_cast<uint8_t>(diff >> 24);
+}
+
+} // namespace linker
+} // namespace art
diff --git a/compiler/linker/x86/relative_patcher_x86.h b/compiler/linker/x86/relative_patcher_x86.h
new file mode 100644
index 0000000000..0c881f00ba
--- /dev/null
+++ b/compiler/linker/x86/relative_patcher_x86.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_LINKER_X86_RELATIVE_PATCHER_X86_H_
+#define ART_COMPILER_LINKER_X86_RELATIVE_PATCHER_X86_H_
+
+#include "linker/x86/relative_patcher_x86_base.h"
+
+namespace art {
+namespace linker {
+
+class X86RelativePatcher FINAL : public X86BaseRelativePatcher {
+ public:
+ X86RelativePatcher() { }
+
+ void PatchDexCacheReference(std::vector<uint8_t>* code, const LinkerPatch& patch,
+ uint32_t patch_offset, uint32_t target_offset) OVERRIDE;
+};
+
+} // namespace linker
+} // namespace art
+
+#endif // ART_COMPILER_LINKER_X86_RELATIVE_PATCHER_X86_H_
diff --git a/compiler/linker/x86/relative_patcher_x86_base.cc b/compiler/linker/x86/relative_patcher_x86_base.cc
new file mode 100644
index 0000000000..bc285a7849
--- /dev/null
+++ b/compiler/linker/x86/relative_patcher_x86_base.cc
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "linker/x86/relative_patcher_x86_base.h"
+
+namespace art {
+namespace linker {
+
+uint32_t X86BaseRelativePatcher::ReserveSpace(
+ uint32_t offset,
+ const CompiledMethod* compiled_method ATTRIBUTE_UNUSED,
+ MethodReference method_ref ATTRIBUTE_UNUSED) {
+ return offset; // No space reserved; no limit on relative call distance.
+}
+
+uint32_t X86BaseRelativePatcher::ReserveSpaceEnd(uint32_t offset) {
+ return offset; // No space reserved; no limit on relative call distance.
+}
+
+uint32_t X86BaseRelativePatcher::WriteThunks(OutputStream* out ATTRIBUTE_UNUSED, uint32_t offset) {
+ return offset; // No thunks added; no limit on relative call distance.
+}
+
+void X86BaseRelativePatcher::PatchCall(std::vector<uint8_t>* code, uint32_t literal_offset,
+ uint32_t patch_offset, uint32_t target_offset) {
+ DCHECK_LE(literal_offset + 4u, code->size());
+ // Unsigned arithmetic with its well-defined overflow behavior is just fine here.
+ uint32_t displacement = target_offset - patch_offset;
+ displacement -= kPcDisplacement; // The base PC is at the end of the 4-byte patch.
+
+ typedef __attribute__((__aligned__(1))) int32_t unaligned_int32_t;
+ reinterpret_cast<unaligned_int32_t*>(&(*code)[literal_offset])[0] = displacement;
+}
+
+} // namespace linker
+} // namespace art
diff --git a/compiler/linker/x86/relative_patcher_x86_base.h b/compiler/linker/x86/relative_patcher_x86_base.h
new file mode 100644
index 0000000000..9200709398
--- /dev/null
+++ b/compiler/linker/x86/relative_patcher_x86_base.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_LINKER_X86_RELATIVE_PATCHER_X86_BASE_H_
+#define ART_COMPILER_LINKER_X86_RELATIVE_PATCHER_X86_BASE_H_
+
+#include "linker/relative_patcher.h"
+
+namespace art {
+namespace linker {
+
+class X86BaseRelativePatcher : public RelativePatcher {
+ public:
+ uint32_t ReserveSpace(uint32_t offset,
+ const CompiledMethod* compiled_method,
+ MethodReference method_ref) OVERRIDE;
+ uint32_t ReserveSpaceEnd(uint32_t offset) OVERRIDE;
+ uint32_t WriteThunks(OutputStream* out, uint32_t offset) OVERRIDE;
+ void PatchCall(std::vector<uint8_t>* code, uint32_t literal_offset,
+ uint32_t patch_offset, uint32_t target_offset) OVERRIDE;
+
+ protected:
+ X86BaseRelativePatcher() { }
+
+ // PC displacement from patch location; the base address of x86/x86-64 relative
+ // calls and x86-64 RIP-relative addressing is the PC of the next instruction and
+ // the patch location is 4 bytes earlier.
+ static constexpr int32_t kPcDisplacement = 4;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(X86BaseRelativePatcher);
+};
+
+} // namespace linker
+} // namespace art
+
+#endif // ART_COMPILER_LINKER_X86_RELATIVE_PATCHER_X86_BASE_H_
diff --git a/compiler/linker/x86/relative_patcher_x86_test.cc b/compiler/linker/x86/relative_patcher_x86_test.cc
new file mode 100644
index 0000000000..7acc33004a
--- /dev/null
+++ b/compiler/linker/x86/relative_patcher_x86_test.cc
@@ -0,0 +1,135 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "linker/relative_patcher_test.h"
+#include "linker/x86/relative_patcher_x86.h"
+
+namespace art {
+namespace linker {
+
+class X86RelativePatcherTest : public RelativePatcherTest {
+ public:
+ X86RelativePatcherTest() : RelativePatcherTest(kX86, "default") { }
+
+ protected:
+ static const uint8_t kCallRawCode[];
+ static const ArrayRef<const uint8_t> kCallCode;
+
+ uint32_t GetMethodOffset(uint32_t method_idx) {
+ auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx));
+ CHECK(result.first);
+ return result.second;
+ }
+};
+
+const uint8_t X86RelativePatcherTest::kCallRawCode[] = {
+ 0xe8, 0x00, 0x01, 0x00, 0x00
+};
+
+const ArrayRef<const uint8_t> X86RelativePatcherTest::kCallCode(kCallRawCode);
+
+TEST_F(X86RelativePatcherTest, CallSelf) {
+ LinkerPatch patches[] = {
+ LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 1u),
+ };
+ AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches));
+ Link();
+
+ static const uint8_t expected_code[] = {
+ 0xe8, 0xfb, 0xff, 0xff, 0xff
+ };
+ EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+}
+
+TEST_F(X86RelativePatcherTest, CallOther) {
+ LinkerPatch method1_patches[] = {
+ LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 2u),
+ };
+ AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(method1_patches));
+ LinkerPatch method2_patches[] = {
+ LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 1u),
+ };
+ AddCompiledMethod(MethodRef(2u), kCallCode, ArrayRef<const LinkerPatch>(method2_patches));
+ Link();
+
+ uint32_t method1_offset = GetMethodOffset(1u);
+ uint32_t method2_offset = GetMethodOffset(2u);
+ uint32_t diff_after = method2_offset - (method1_offset + kCallCode.size() /* PC adjustment */);
+ static const uint8_t method1_expected_code[] = {
+ 0xe8,
+ static_cast<uint8_t>(diff_after), static_cast<uint8_t>(diff_after >> 8),
+ static_cast<uint8_t>(diff_after >> 16), static_cast<uint8_t>(diff_after >> 24)
+ };
+ EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(method1_expected_code)));
+ uint32_t diff_before = method1_offset - (method2_offset + kCallCode.size() /* PC adjustment */);
+ static const uint8_t method2_expected_code[] = {
+ 0xe8,
+ static_cast<uint8_t>(diff_before), static_cast<uint8_t>(diff_before >> 8),
+ static_cast<uint8_t>(diff_before >> 16), static_cast<uint8_t>(diff_before >> 24)
+ };
+ EXPECT_TRUE(CheckLinkedMethod(MethodRef(2u), ArrayRef<const uint8_t>(method2_expected_code)));
+}
+
+TEST_F(X86RelativePatcherTest, CallTrampoline) {
+ LinkerPatch patches[] = {
+ LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 2u),
+ };
+ AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches));
+ Link();
+
+ auto result = method_offset_map_.FindMethodOffset(MethodRef(1));
+ ASSERT_TRUE(result.first);
+ uint32_t diff = kTrampolineOffset - (result.second + kCallCode.size());
+ static const uint8_t expected_code[] = {
+ 0xe8,
+ static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8),
+ static_cast<uint8_t>(diff >> 16), static_cast<uint8_t>(diff >> 24)
+ };
+ EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+}
+
+TEST_F(X86RelativePatcherTest, DexCacheReference) {
+ dex_cache_arrays_begin_ = 0x12345678;
+ constexpr size_t kElementOffset = 0x1234;
+ static const uint8_t raw_code[] = {
+ 0xe8, 0x00, 0x00, 0x00, 0x00, // call +0
+ 0x5b, // pop ebx
+ 0x8b, 0x83, 0x00, 0x01, 0x00, 0x00, // mov eax, [ebx + 256 (kDummy32BitValue)]
+ };
+ constexpr uint32_t anchor_offset = 5u; // After call +0.
+ ArrayRef<const uint8_t> code(raw_code);
+ LinkerPatch patches[] = {
+ LinkerPatch::DexCacheArrayPatch(code.size() - 4u, nullptr, anchor_offset, kElementOffset),
+ };
+ AddCompiledMethod(MethodRef(1u), code, ArrayRef<const LinkerPatch>(patches));
+ Link();
+
+ auto result = method_offset_map_.FindMethodOffset(MethodRef(1u));
+ ASSERT_TRUE(result.first);
+ uint32_t diff =
+ dex_cache_arrays_begin_ + kElementOffset - (result.second + anchor_offset);
+ static const uint8_t expected_code[] = {
+ 0xe8, 0x00, 0x00, 0x00, 0x00, // call +0
+ 0x5b, // pop ebx
+ 0x8b, 0x83, // mov eax, [ebx + diff]
+ static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8),
+ static_cast<uint8_t>(diff >> 16), static_cast<uint8_t>(diff >> 24)
+ };
+ EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+}
+
+} // namespace linker
+} // namespace art
diff --git a/compiler/linker/x86_64/relative_patcher_x86_64.cc b/compiler/linker/x86_64/relative_patcher_x86_64.cc
new file mode 100644
index 0000000000..598f3ac4a8
--- /dev/null
+++ b/compiler/linker/x86_64/relative_patcher_x86_64.cc
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "linker/x86_64/relative_patcher_x86_64.h"
+
+#include "compiled_method.h"
+
+namespace art {
+namespace linker {
+
+void X86_64RelativePatcher::PatchDexCacheReference(std::vector<uint8_t>* code,
+ const LinkerPatch& patch,
+ uint32_t patch_offset, uint32_t target_offset) {
+ DCHECK_LE(patch.LiteralOffset() + 4u, code->size());
+ // Unsigned arithmetic with its well-defined overflow behavior is just fine here.
+ uint32_t displacement = target_offset - patch_offset;
+ displacement -= kPcDisplacement; // The base PC is at the end of the 4-byte patch.
+
+ typedef __attribute__((__aligned__(1))) int32_t unaligned_int32_t;
+ reinterpret_cast<unaligned_int32_t*>(&(*code)[patch.LiteralOffset()])[0] = displacement;
+}
+
+} // namespace linker
+} // namespace art
diff --git a/compiler/linker/x86_64/relative_patcher_x86_64.h b/compiler/linker/x86_64/relative_patcher_x86_64.h
new file mode 100644
index 0000000000..af687b4a2f
--- /dev/null
+++ b/compiler/linker/x86_64/relative_patcher_x86_64.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_LINKER_X86_64_RELATIVE_PATCHER_X86_64_H_
+#define ART_COMPILER_LINKER_X86_64_RELATIVE_PATCHER_X86_64_H_
+
+#include "linker/x86/relative_patcher_x86_base.h"
+
+namespace art {
+namespace linker {
+
+class X86_64RelativePatcher FINAL : public X86BaseRelativePatcher {
+ public:
+ X86_64RelativePatcher() { }
+
+ void PatchDexCacheReference(std::vector<uint8_t>* code, const LinkerPatch& patch,
+ uint32_t patch_offset, uint32_t target_offset) OVERRIDE;
+};
+
+} // namespace linker
+} // namespace art
+
+#endif // ART_COMPILER_LINKER_X86_64_RELATIVE_PATCHER_X86_64_H_
diff --git a/compiler/linker/x86_64/relative_patcher_x86_64_test.cc b/compiler/linker/x86_64/relative_patcher_x86_64_test.cc
new file mode 100644
index 0000000000..36e0f01a50
--- /dev/null
+++ b/compiler/linker/x86_64/relative_patcher_x86_64_test.cc
@@ -0,0 +1,136 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "linker/relative_patcher_test.h"
+#include "linker/x86_64/relative_patcher_x86_64.h"
+
+namespace art {
+namespace linker {
+
+class X86_64RelativePatcherTest : public RelativePatcherTest {
+ public:
+ X86_64RelativePatcherTest() : RelativePatcherTest(kX86_64, "default") { }
+
+ protected:
+ static const uint8_t kCallRawCode[];
+ static const ArrayRef<const uint8_t> kCallCode;
+ static const uint8_t kDexCacheLoadRawCode[];
+ static const ArrayRef<const uint8_t> kDexCacheLoadCode;
+
+ uint32_t GetMethodOffset(uint32_t method_idx) {
+ auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx));
+ CHECK(result.first);
+ return result.second;
+ }
+};
+
+const uint8_t X86_64RelativePatcherTest::kCallRawCode[] = {
+ 0xe8, 0x00, 0x01, 0x00, 0x00
+};
+
+const ArrayRef<const uint8_t> X86_64RelativePatcherTest::kCallCode(kCallRawCode);
+
+const uint8_t X86_64RelativePatcherTest::kDexCacheLoadRawCode[] = {
+ 0x8b, 0x05, // mov eax, [rip + <offset>]
+ 0x00, 0x01, 0x00, 0x00
+};
+
+const ArrayRef<const uint8_t> X86_64RelativePatcherTest::kDexCacheLoadCode(
+ kDexCacheLoadRawCode);
+
+TEST_F(X86_64RelativePatcherTest, CallSelf) {
+ LinkerPatch patches[] = {
+ LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 1u),
+ };
+ AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches));
+ Link();
+
+ static const uint8_t expected_code[] = {
+ 0xe8, 0xfb, 0xff, 0xff, 0xff
+ };
+ EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+}
+
+TEST_F(X86_64RelativePatcherTest, CallOther) {
+ LinkerPatch method1_patches[] = {
+ LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 2u),
+ };
+ AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(method1_patches));
+ LinkerPatch method2_patches[] = {
+ LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 1u),
+ };
+ AddCompiledMethod(MethodRef(2u), kCallCode, ArrayRef<const LinkerPatch>(method2_patches));
+ Link();
+
+ uint32_t method1_offset = GetMethodOffset(1u);
+ uint32_t method2_offset = GetMethodOffset(2u);
+ uint32_t diff_after = method2_offset - (method1_offset + kCallCode.size() /* PC adjustment */);
+ static const uint8_t method1_expected_code[] = {
+ 0xe8,
+ static_cast<uint8_t>(diff_after), static_cast<uint8_t>(diff_after >> 8),
+ static_cast<uint8_t>(diff_after >> 16), static_cast<uint8_t>(diff_after >> 24)
+ };
+ EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(method1_expected_code)));
+ uint32_t diff_before = method1_offset - (method2_offset + kCallCode.size() /* PC adjustment */);
+ static const uint8_t method2_expected_code[] = {
+ 0xe8,
+ static_cast<uint8_t>(diff_before), static_cast<uint8_t>(diff_before >> 8),
+ static_cast<uint8_t>(diff_before >> 16), static_cast<uint8_t>(diff_before >> 24)
+ };
+ EXPECT_TRUE(CheckLinkedMethod(MethodRef(2u), ArrayRef<const uint8_t>(method2_expected_code)));
+}
+
+TEST_F(X86_64RelativePatcherTest, CallTrampoline) {
+ LinkerPatch patches[] = {
+ LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 2u),
+ };
+ AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches));
+ Link();
+
+ auto result = method_offset_map_.FindMethodOffset(MethodRef(1u));
+ ASSERT_TRUE(result.first);
+ uint32_t diff = kTrampolineOffset - (result.second + kCallCode.size());
+ static const uint8_t expected_code[] = {
+ 0xe8,
+ static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8),
+ static_cast<uint8_t>(diff >> 16), static_cast<uint8_t>(diff >> 24)
+ };
+ EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+}
+
+TEST_F(X86_64RelativePatcherTest, DexCacheReference) {
+ dex_cache_arrays_begin_ = 0x12345678;
+ constexpr size_t kElementOffset = 0x1234;
+ LinkerPatch patches[] = {
+ LinkerPatch::DexCacheArrayPatch(kDexCacheLoadCode.size() - 4u, nullptr, 0u, kElementOffset),
+ };
+ AddCompiledMethod(MethodRef(1u), kDexCacheLoadCode, ArrayRef<const LinkerPatch>(patches));
+ Link();
+
+ auto result = method_offset_map_.FindMethodOffset(MethodRef(1u));
+ ASSERT_TRUE(result.first);
+ uint32_t diff =
+ dex_cache_arrays_begin_ + kElementOffset - (result.second + kDexCacheLoadCode.size());
+ static const uint8_t expected_code[] = {
+ 0x8b, 0x05,
+ static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8),
+ static_cast<uint8_t>(diff >> 16), static_cast<uint8_t>(diff >> 24)
+ };
+ EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+}
+
+} // namespace linker
+} // namespace art
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index afd39e8874..989b04fa36 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -17,11 +17,14 @@
#include "arch/instruction_set_features.h"
#include "class_linker.h"
#include "common_compiler_test.h"
+#include "compiled_method.h"
#include "compiler.h"
#include "dex/pass_manager.h"
#include "dex/quick/dex_file_to_method_inliner_map.h"
#include "dex/quick_compiler_callbacks.h"
#include "dex/verification_results.h"
+#include "driver/compiler_driver.h"
+#include "driver/compiler_options.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "mirror/art_method-inl.h"
#include "mirror/class-inl.h"
@@ -173,7 +176,7 @@ TEST_F(OatTest, OatHeaderSizeCheck) {
EXPECT_EQ(72U, sizeof(OatHeader));
EXPECT_EQ(4U, sizeof(OatMethodOffsets));
EXPECT_EQ(28U, sizeof(OatQuickMethodHeader));
- EXPECT_EQ(91 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
+ EXPECT_EQ(92 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
}
TEST_F(OatTest, OatHeaderIsValid) {
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index b3bb438bac..5b4cc54858 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -18,16 +18,21 @@
#include <zlib.h>
+#include "arch/arm64/instruction_set_features_arm64.h"
#include "base/allocator.h"
#include "base/bit_vector.h"
#include "base/stl_util.h"
#include "base/unix_file/fd_file.h"
#include "class_linker.h"
#include "compiled_class.h"
+#include "compiled_method.h"
#include "dex_file-inl.h"
#include "dex/verification_results.h"
+#include "driver/compiler_driver.h"
+#include "driver/compiler_options.h"
#include "gc/space/space.h"
#include "image_writer.h"
+#include "linker/relative_patcher.h"
#include "mirror/art_method-inl.h"
#include "mirror/array.h"
#include "mirror/class_loader.h"
@@ -37,352 +42,10 @@
#include "safe_map.h"
#include "scoped_thread_state_change.h"
#include "handle_scope-inl.h"
-#include "utils/arm/assembler_thumb2.h"
-#include "utils/arm64/assembler_arm64.h"
#include "verifier/method_verifier.h"
namespace art {
-class OatWriter::RelativeCallPatcher {
- public:
- virtual ~RelativeCallPatcher() { }
-
- // Reserve space for relative call thunks if needed, return adjusted offset.
- // After all methods have been processed it's call one last time with compiled_method == nullptr.
- virtual uint32_t ReserveSpace(uint32_t offset, const CompiledMethod* compiled_method) = 0;
-
- // Write relative call thunks if needed, return adjusted offset.
- virtual uint32_t WriteThunks(OutputStream* out, uint32_t offset) = 0;
-
- // Patch method code. The input displacement is relative to the patched location,
- // the patcher may need to adjust it if the correct base is different.
- virtual void Patch(std::vector<uint8_t>* code, uint32_t literal_offset, uint32_t patch_offset,
- uint32_t target_offset) = 0;
-
- protected:
- RelativeCallPatcher() { }
-
- private:
- DISALLOW_COPY_AND_ASSIGN(RelativeCallPatcher);
-};
-
-class OatWriter::NoRelativeCallPatcher FINAL : public RelativeCallPatcher {
- public:
- NoRelativeCallPatcher() { }
-
- uint32_t ReserveSpace(uint32_t offset,
- const CompiledMethod* compiled_method ATTRIBUTE_UNUSED) OVERRIDE {
- return offset; // No space reserved; no patches expected.
- }
-
- uint32_t WriteThunks(OutputStream* out ATTRIBUTE_UNUSED, uint32_t offset) OVERRIDE {
- return offset; // No thunks added; no patches expected.
- }
-
- void Patch(std::vector<uint8_t>* code ATTRIBUTE_UNUSED, uint32_t literal_offset ATTRIBUTE_UNUSED,
- uint32_t patch_offset ATTRIBUTE_UNUSED,
- uint32_t target_offset ATTRIBUTE_UNUSED) OVERRIDE {
- LOG(FATAL) << "Unexpected relative patch.";
- }
-
- private:
- DISALLOW_COPY_AND_ASSIGN(NoRelativeCallPatcher);
-};
-
-class OatWriter::X86RelativeCallPatcher FINAL : public RelativeCallPatcher {
- public:
- X86RelativeCallPatcher() { }
-
- uint32_t ReserveSpace(uint32_t offset,
- const CompiledMethod* compiled_method ATTRIBUTE_UNUSED) OVERRIDE {
- return offset; // No space reserved; no limit on relative call distance.
- }
-
- uint32_t WriteThunks(OutputStream* out ATTRIBUTE_UNUSED, uint32_t offset) OVERRIDE {
- return offset; // No thunks added; no limit on relative call distance.
- }
-
- void Patch(std::vector<uint8_t>* code, uint32_t literal_offset, uint32_t patch_offset,
- uint32_t target_offset) OVERRIDE {
- DCHECK_LE(literal_offset + 4u, code->size());
- // Unsigned arithmetic with its well-defined overflow behavior is just fine here.
- uint32_t displacement = target_offset - patch_offset;
- displacement -= kPcDisplacement; // The base PC is at the end of the 4-byte patch.
-
- typedef __attribute__((__aligned__(1))) int32_t unaligned_int32_t;
- reinterpret_cast<unaligned_int32_t*>(&(*code)[literal_offset])[0] = displacement;
- }
-
- private:
- // PC displacement from patch location; x86 PC for relative calls points to the next
- // instruction and the patch location is 4 bytes earlier.
- static constexpr int32_t kPcDisplacement = 4;
-
- DISALLOW_COPY_AND_ASSIGN(X86RelativeCallPatcher);
-};
-
-class OatWriter::ArmBaseRelativeCallPatcher : public RelativeCallPatcher {
- public:
- ArmBaseRelativeCallPatcher(OatWriter* writer,
- InstructionSet instruction_set, std::vector<uint8_t> thunk_code,
- uint32_t max_positive_displacement, uint32_t max_negative_displacement)
- : writer_(writer), instruction_set_(instruction_set), thunk_code_(thunk_code),
- max_positive_displacement_(max_positive_displacement),
- max_negative_displacement_(max_negative_displacement),
- thunk_locations_(), current_thunk_to_write_(0u), unprocessed_patches_() {
- }
-
- uint32_t ReserveSpace(uint32_t offset, const CompiledMethod* compiled_method) OVERRIDE {
- // NOTE: The final thunk can be reserved from InitCodeMethodVisitor::EndClass() while it
- // may be written early by WriteCodeMethodVisitor::VisitMethod() for a deduplicated chunk
- // of code. To avoid any alignment discrepancies for the final chunk, we always align the
- // offset after reserving of writing any chunk.
- if (UNLIKELY(compiled_method == nullptr)) {
- uint32_t aligned_offset = CompiledMethod::AlignCode(offset, instruction_set_);
- bool needs_thunk = ReserveSpaceProcessPatches(aligned_offset);
- if (needs_thunk) {
- thunk_locations_.push_back(aligned_offset);
- offset = CompiledMethod::AlignCode(aligned_offset + thunk_code_.size(), instruction_set_);
- }
- return offset;
- }
- DCHECK(compiled_method->GetQuickCode() != nullptr);
- uint32_t quick_code_size = compiled_method->GetQuickCode()->size();
- uint32_t quick_code_offset = compiled_method->AlignCode(offset) + sizeof(OatQuickMethodHeader);
- uint32_t next_aligned_offset = compiled_method->AlignCode(quick_code_offset + quick_code_size);
- if (!unprocessed_patches_.empty() &&
- next_aligned_offset - unprocessed_patches_.front().second > max_positive_displacement_) {
- bool needs_thunk = ReserveSpaceProcessPatches(next_aligned_offset);
- if (needs_thunk) {
- // A single thunk will cover all pending patches.
- unprocessed_patches_.clear();
- uint32_t thunk_location = compiled_method->AlignCode(offset);
- thunk_locations_.push_back(thunk_location);
- offset = CompiledMethod::AlignCode(thunk_location + thunk_code_.size(), instruction_set_);
- }
- }
- for (const LinkerPatch& patch : compiled_method->GetPatches()) {
- if (patch.Type() == kLinkerPatchCallRelative) {
- unprocessed_patches_.emplace_back(patch.TargetMethod(),
- quick_code_offset + patch.LiteralOffset());
- }
- }
- return offset;
- }
-
- uint32_t WriteThunks(OutputStream* out, uint32_t offset) OVERRIDE {
- if (current_thunk_to_write_ == thunk_locations_.size()) {
- return offset;
- }
- uint32_t aligned_offset = CompiledMethod::AlignCode(offset, instruction_set_);
- if (UNLIKELY(aligned_offset == thunk_locations_[current_thunk_to_write_])) {
- ++current_thunk_to_write_;
- uint32_t aligned_code_delta = aligned_offset - offset;
- if (aligned_code_delta != 0u && !writer_->WriteCodeAlignment(out, aligned_code_delta)) {
- return 0u;
- }
- if (!out->WriteFully(thunk_code_.data(), thunk_code_.size())) {
- return 0u;
- }
- writer_->size_relative_call_thunks_ += thunk_code_.size();
- uint32_t thunk_end_offset = aligned_offset + thunk_code_.size();
- // Align after writing chunk, see the ReserveSpace() above.
- offset = CompiledMethod::AlignCode(thunk_end_offset, instruction_set_);
- aligned_code_delta = offset - thunk_end_offset;
- if (aligned_code_delta != 0u && !writer_->WriteCodeAlignment(out, aligned_code_delta)) {
- return 0u;
- }
- }
- return offset;
- }
-
- protected:
- uint32_t CalculateDisplacement(uint32_t patch_offset, uint32_t target_offset) {
- // Unsigned arithmetic with its well-defined overflow behavior is just fine here.
- uint32_t displacement = target_offset - patch_offset;
- // NOTE: With unsigned arithmetic we do mean to use && rather than || below.
- if (displacement > max_positive_displacement_ && displacement < -max_negative_displacement_) {
- // Unwritten thunks have higher offsets, check if it's within range.
- DCHECK(current_thunk_to_write_ == thunk_locations_.size() ||
- thunk_locations_[current_thunk_to_write_] > patch_offset);
- if (current_thunk_to_write_ != thunk_locations_.size() &&
- thunk_locations_[current_thunk_to_write_] - patch_offset < max_positive_displacement_) {
- displacement = thunk_locations_[current_thunk_to_write_] - patch_offset;
- } else {
- // We must have a previous thunk then.
- DCHECK_NE(current_thunk_to_write_, 0u);
- DCHECK_LT(thunk_locations_[current_thunk_to_write_ - 1], patch_offset);
- displacement = thunk_locations_[current_thunk_to_write_ - 1] - patch_offset;
- DCHECK(displacement >= -max_negative_displacement_);
- }
- }
- return displacement;
- }
-
- private:
- bool ReserveSpaceProcessPatches(uint32_t next_aligned_offset) {
- // Process as many patches as possible, stop only on unresolved targets or calls too far back.
- while (!unprocessed_patches_.empty()) {
- uint32_t patch_offset = unprocessed_patches_.front().second;
- auto it = writer_->method_offset_map_.find(unprocessed_patches_.front().first);
- if (it == writer_->method_offset_map_.end()) {
- // If still unresolved, check if we have a thunk within range.
- DCHECK(thunk_locations_.empty() || thunk_locations_.back() <= patch_offset);
- if (thunk_locations_.empty() ||
- patch_offset - thunk_locations_.back() > max_negative_displacement_) {
- return next_aligned_offset - patch_offset > max_positive_displacement_;
- }
- } else if (it->second >= patch_offset) {
- DCHECK_LE(it->second - patch_offset, max_positive_displacement_);
- } else {
- // When calling back, check if we have a thunk that's closer than the actual target.
- uint32_t target_offset = (thunk_locations_.empty() || it->second > thunk_locations_.back())
- ? it->second
- : thunk_locations_.back();
- DCHECK_GT(patch_offset, target_offset);
- if (patch_offset - target_offset > max_negative_displacement_) {
- return true;
- }
- }
- unprocessed_patches_.pop_front();
- }
- return false;
- }
-
- OatWriter* const writer_;
- const InstructionSet instruction_set_;
- const std::vector<uint8_t> thunk_code_;
- const uint32_t max_positive_displacement_;
- const uint32_t max_negative_displacement_;
- std::vector<uint32_t> thunk_locations_;
- size_t current_thunk_to_write_;
-
- // ReserveSpace() tracks unprocessed patches.
- typedef std::pair<MethodReference, uint32_t> UnprocessedPatch;
- std::deque<UnprocessedPatch> unprocessed_patches_;
-
- DISALLOW_COPY_AND_ASSIGN(ArmBaseRelativeCallPatcher);
-};
-
-class OatWriter::Thumb2RelativeCallPatcher FINAL : public ArmBaseRelativeCallPatcher {
- public:
- explicit Thumb2RelativeCallPatcher(OatWriter* writer)
- : ArmBaseRelativeCallPatcher(writer, kThumb2, CompileThunkCode(),
- kMaxPositiveDisplacement, kMaxNegativeDisplacement) {
- }
-
- void Patch(std::vector<uint8_t>* code, uint32_t literal_offset, uint32_t patch_offset,
- uint32_t target_offset) OVERRIDE {
- DCHECK_LE(literal_offset + 4u, code->size());
- DCHECK_EQ(literal_offset & 1u, 0u);
- DCHECK_EQ(patch_offset & 1u, 0u);
- DCHECK_EQ(target_offset & 1u, 1u); // Thumb2 mode bit.
- uint32_t displacement = CalculateDisplacement(patch_offset, target_offset & ~1u);
- displacement -= kPcDisplacement; // The base PC is at the end of the 4-byte patch.
- DCHECK_EQ(displacement & 1u, 0u);
- DCHECK((displacement >> 24) == 0u || (displacement >> 24) == 255u); // 25-bit signed.
- uint32_t signbit = (displacement >> 31) & 0x1;
- uint32_t i1 = (displacement >> 23) & 0x1;
- uint32_t i2 = (displacement >> 22) & 0x1;
- uint32_t imm10 = (displacement >> 12) & 0x03ff;
- uint32_t imm11 = (displacement >> 1) & 0x07ff;
- uint32_t j1 = i1 ^ (signbit ^ 1);
- uint32_t j2 = i2 ^ (signbit ^ 1);
- uint32_t value = (signbit << 26) | (j1 << 13) | (j2 << 11) | (imm10 << 16) | imm11;
- value |= 0xf000d000; // BL
-
- uint8_t* addr = &(*code)[literal_offset];
- // Check that we're just overwriting an existing BL.
- DCHECK_EQ(addr[1] & 0xf8, 0xf0);
- DCHECK_EQ(addr[3] & 0xd0, 0xd0);
- // Write the new BL.
- addr[0] = (value >> 16) & 0xff;
- addr[1] = (value >> 24) & 0xff;
- addr[2] = (value >> 0) & 0xff;
- addr[3] = (value >> 8) & 0xff;
- }
-
- private:
- static std::vector<uint8_t> CompileThunkCode() {
- // The thunk just uses the entry point in the ArtMethod. This works even for calls
- // to the generic JNI and interpreter trampolines.
- arm::Thumb2Assembler assembler;
- assembler.LoadFromOffset(
- arm::kLoadWord, arm::PC, arm::R0,
- mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value());
- assembler.bkpt(0);
- std::vector<uint8_t> thunk_code(assembler.CodeSize());
- MemoryRegion code(thunk_code.data(), thunk_code.size());
- assembler.FinalizeInstructions(code);
- return thunk_code;
- }
-
- // PC displacement from patch location; Thumb2 PC is always at instruction address + 4.
- static constexpr int32_t kPcDisplacement = 4;
-
- // Maximum positive and negative displacement measured from the patch location.
- // (Signed 25 bit displacement with the last bit 0 has range [-2^24, 2^24-2] measured from
- // the Thumb2 PC pointing right after the BL, i.e. 4 bytes later than the patch location.)
- static constexpr uint32_t kMaxPositiveDisplacement = (1u << 24) - 2 + kPcDisplacement;
- static constexpr uint32_t kMaxNegativeDisplacement = (1u << 24) - kPcDisplacement;
-
- DISALLOW_COPY_AND_ASSIGN(Thumb2RelativeCallPatcher);
-};
-
-class OatWriter::Arm64RelativeCallPatcher FINAL : public ArmBaseRelativeCallPatcher {
- public:
- explicit Arm64RelativeCallPatcher(OatWriter* writer)
- : ArmBaseRelativeCallPatcher(writer, kArm64, CompileThunkCode(),
- kMaxPositiveDisplacement, kMaxNegativeDisplacement) {
- }
-
- void Patch(std::vector<uint8_t>* code, uint32_t literal_offset, uint32_t patch_offset,
- uint32_t target_offset) OVERRIDE {
- DCHECK_LE(literal_offset + 4u, code->size());
- DCHECK_EQ(literal_offset & 3u, 0u);
- DCHECK_EQ(patch_offset & 3u, 0u);
- DCHECK_EQ(target_offset & 3u, 0u);
- uint32_t displacement = CalculateDisplacement(patch_offset, target_offset & ~1u);
- DCHECK_EQ(displacement & 3u, 0u);
- DCHECK((displacement >> 27) == 0u || (displacement >> 27) == 31u); // 28-bit signed.
- uint32_t value = (displacement & 0x0fffffffu) >> 2;
- value |= 0x94000000; // BL
-
- uint8_t* addr = &(*code)[literal_offset];
- // Check that we're just overwriting an existing BL.
- DCHECK_EQ(addr[3] & 0xfc, 0x94);
- // Write the new BL.
- addr[0] = (value >> 0) & 0xff;
- addr[1] = (value >> 8) & 0xff;
- addr[2] = (value >> 16) & 0xff;
- addr[3] = (value >> 24) & 0xff;
- }
-
- private:
- static std::vector<uint8_t> CompileThunkCode() {
- // The thunk just uses the entry point in the ArtMethod. This works even for calls
- // to the generic JNI and interpreter trampolines.
- arm64::Arm64Assembler assembler;
- Offset offset(mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
- kArm64PointerSize).Int32Value());
- assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0));
- // Ensure we emit the literal pool.
- assembler.EmitSlowPaths();
- std::vector<uint8_t> thunk_code(assembler.CodeSize());
- MemoryRegion code(thunk_code.data(), thunk_code.size());
- assembler.FinalizeInstructions(code);
- return thunk_code;
- }
-
- // Maximum positive and negative displacement measured from the patch location.
- // (Signed 28 bit displacement with the last bit 0 has range [-2^27, 2^27-4] measured from
- // the ARM64 PC pointing to the BL.)
- static constexpr uint32_t kMaxPositiveDisplacement = (1u << 27) - 4u;
- static constexpr uint32_t kMaxNegativeDisplacement = (1u << 27);
-
- DISALLOW_COPY_AND_ASSIGN(Arm64RelativeCallPatcher);
-};
-
#define DCHECK_OFFSET() \
DCHECK_EQ(static_cast<off_t>(file_offset + relative_offset), out->Seek(0, kSeekCurrent)) \
<< "file_offset=" << file_offset << " relative_offset=" << relative_offset
@@ -427,6 +90,7 @@ OatWriter::OatWriter(const std::vector<const DexFile*>& dex_files,
size_code_(0),
size_code_alignment_(0),
size_relative_call_thunks_(0),
+ size_misc_thunks_(0),
size_mapping_table_(0),
size_vmap_table_(0),
size_gc_map_(0),
@@ -442,23 +106,10 @@ OatWriter::OatWriter(const std::vector<const DexFile*>& dex_files,
method_offset_map_() {
CHECK(key_value_store != nullptr);
- switch (compiler_driver_->GetInstructionSet()) {
- case kX86:
- case kX86_64:
- relative_call_patcher_.reset(new X86RelativeCallPatcher);
- break;
- case kArm:
- // Fall through: we generate Thumb2 code for "arm".
- case kThumb2:
- relative_call_patcher_.reset(new Thumb2RelativeCallPatcher(this));
- break;
- case kArm64:
- relative_call_patcher_.reset(new Arm64RelativeCallPatcher(this));
- break;
- default:
- relative_call_patcher_.reset(new NoRelativeCallPatcher);
- break;
- }
+ InstructionSet instruction_set = compiler_driver_->GetInstructionSet();
+ const InstructionSetFeatures* features = compiler_driver_->GetInstructionSetFeatures();
+ relative_patcher_ = linker::RelativePatcher::Create(instruction_set, features,
+ &method_offset_map_);
size_t offset;
{
@@ -706,7 +357,7 @@ class OatWriter::InitCodeMethodVisitor : public OatDexMethodVisitor {
bool EndClass() {
OatDexMethodVisitor::EndClass();
if (oat_class_index_ == writer_->oat_classes_.size()) {
- offset_ = writer_->relative_call_patcher_->ReserveSpace(offset_, nullptr);
+ offset_ = writer_->relative_patcher_->ReserveSpaceEnd(offset_);
}
return true;
}
@@ -722,36 +373,36 @@ class OatWriter::InitCodeMethodVisitor : public OatDexMethodVisitor {
const SwapVector<uint8_t>* quick_code = compiled_method->GetQuickCode();
CHECK(quick_code != nullptr);
- offset_ = writer_->relative_call_patcher_->ReserveSpace(offset_, compiled_method);
- offset_ = compiled_method->AlignCode(offset_);
- DCHECK_ALIGNED_PARAM(offset_,
- GetInstructionSetAlignment(compiled_method->GetInstructionSet()));
uint32_t code_size = quick_code->size() * sizeof(uint8_t);
CHECK_NE(code_size, 0U);
uint32_t thumb_offset = compiled_method->CodeDelta();
- quick_code_offset = offset_ + sizeof(OatQuickMethodHeader) + thumb_offset;
-
- bool deduped = false;
// Deduplicate code arrays.
+ bool deduped = false;
auto lb = dedupe_map_.lower_bound(compiled_method);
if (lb != dedupe_map_.end() && !dedupe_map_.key_comp()(compiled_method, lb->first)) {
quick_code_offset = lb->second;
deduped = true;
} else {
+ offset_ = writer_->relative_patcher_->ReserveSpace(
+ offset_, compiled_method, MethodReference(dex_file_, it.GetMemberIndex()));
+ offset_ = compiled_method->AlignCode(offset_);
+ DCHECK_ALIGNED_PARAM(offset_,
+ GetInstructionSetAlignment(compiled_method->GetInstructionSet()));
+ quick_code_offset = offset_ + sizeof(OatQuickMethodHeader) + thumb_offset;
dedupe_map_.PutBefore(lb, compiled_method, quick_code_offset);
}
MethodReference method_ref(dex_file_, it.GetMemberIndex());
- auto method_lb = writer_->method_offset_map_.lower_bound(method_ref);
- if (method_lb != writer_->method_offset_map_.end() &&
- !writer_->method_offset_map_.key_comp()(method_ref, method_lb->first)) {
+ auto method_lb = writer_->method_offset_map_.map.lower_bound(method_ref);
+ if (method_lb != writer_->method_offset_map_.map.end() &&
+ !writer_->method_offset_map_.map.key_comp()(method_ref, method_lb->first)) {
// TODO: Should this be a hard failure?
LOG(WARNING) << "Multiple definitions of "
<< PrettyMethod(method_ref.dex_method_index, *method_ref.dex_file)
<< ((method_lb->second != quick_code_offset) ? "; OFFSET MISMATCH" : "");
} else {
- writer_->method_offset_map_.PutBefore(method_lb, method_ref, quick_code_offset);
+ writer_->method_offset_map_.map.PutBefore(method_lb, method_ref, quick_code_offset);
}
// Update quick method header.
@@ -790,7 +441,7 @@ class OatWriter::InitCodeMethodVisitor : public OatDexMethodVisitor {
if (!compiled_method->GetPatches().empty()) {
uintptr_t base_loc = offset_ - code_size - writer_->oat_header_->GetExecutableOffset();
for (const LinkerPatch& patch : compiled_method->GetPatches()) {
- if (patch.Type() != kLinkerPatchCallRelative) {
+ if (!patch.IsPcRelative()) {
writer_->absolute_patch_locations_.push_back(base_loc + patch.LiteralOffset());
}
}
@@ -799,22 +450,18 @@ class OatWriter::InitCodeMethodVisitor : public OatDexMethodVisitor {
if (writer_->compiler_driver_->GetCompilerOptions().GetIncludeDebugSymbols()) {
// Record debug information for this function if we are doing that.
-
- std::string name = PrettyMethod(it.GetMemberIndex(), *dex_file_, true);
- if (deduped) {
- // TODO We should place the DEDUPED tag on the first instance of a deduplicated symbol
- // so that it will show up in a debuggerd crash report.
- name += " [ DEDUPED ]";
- }
-
const uint32_t quick_code_start = quick_code_offset -
- writer_->oat_header_->GetExecutableOffset();
- const DexFile::CodeItem *code_item = it.GetMethodCodeItem();
- writer_->method_info_.push_back(DebugInfo(name,
- dex_file_->GetSourceFile(dex_file_->GetClassDef(class_def_index_)),
- quick_code_start, quick_code_start + code_size,
- code_item == nullptr ? nullptr : dex_file_->GetDebugInfoStream(code_item),
- compiled_method));
+ writer_->oat_header_->GetExecutableOffset() - thumb_offset;
+ writer_->method_info_.push_back(DebugInfo {
+ dex_file_,
+ class_def_index_,
+ it.GetMemberIndex(),
+ it.GetMethodAccessFlags(),
+ it.GetMethodCodeItem(),
+ deduped,
+ quick_code_start,
+ quick_code_start + code_size,
+ compiled_method});
}
if (kIsDebugBuild) {
@@ -851,6 +498,37 @@ class OatWriter::InitCodeMethodVisitor : public OatDexMethodVisitor {
}
private:
+ struct CodeOffsetsKeyComparator {
+ bool operator()(const CompiledMethod* lhs, const CompiledMethod* rhs) const {
+ if (lhs->GetQuickCode() != rhs->GetQuickCode()) {
+ return lhs->GetQuickCode() < rhs->GetQuickCode();
+ }
+ // If the code is the same, all other fields are likely to be the same as well.
+ if (UNLIKELY(lhs->GetMappingTable() != rhs->GetMappingTable())) {
+ return lhs->GetMappingTable() < rhs->GetMappingTable();
+ }
+ if (UNLIKELY(lhs->GetVmapTable() != rhs->GetVmapTable())) {
+ return lhs->GetVmapTable() < rhs->GetVmapTable();
+ }
+ if (UNLIKELY(lhs->GetGcMap() != rhs->GetGcMap())) {
+ return lhs->GetGcMap() < rhs->GetGcMap();
+ }
+ const auto& lhs_patches = lhs->GetPatches();
+ const auto& rhs_patches = rhs->GetPatches();
+ if (UNLIKELY(lhs_patches.size() != rhs_patches.size())) {
+ return lhs_patches.size() < rhs_patches.size();
+ }
+ auto rit = rhs_patches.begin();
+ for (const LinkerPatch& lpatch : lhs_patches) {
+ if (UNLIKELY(!(lpatch == *rit))) {
+ return lpatch < *rit;
+ }
+ ++rit;
+ }
+ return false;
+ }
+ };
+
// Deduplication is already done on a pointer basis by the compiler driver,
// so we can simply compare the pointers to find out if things are duplicated.
SafeMap<const CompiledMethod*, uint32_t, CodeOffsetsKeyComparator> dedupe_map_;
@@ -978,7 +656,7 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor {
bool result = OatDexMethodVisitor::EndClass();
if (oat_class_index_ == writer_->oat_classes_.size()) {
DCHECK(result); // OatDexMethodVisitor::EndClass() never fails.
- offset_ = writer_->relative_call_patcher_->WriteThunks(out_, offset_);
+ offset_ = writer_->relative_patcher_->WriteThunks(out_, offset_);
if (UNLIKELY(offset_ == 0u)) {
PLOG(ERROR) << "Failed to write final relative call thunks";
result = false;
@@ -1000,33 +678,32 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor {
if (quick_code != nullptr) {
// Need a wrapper if we create a copy for patching.
ArrayRef<const uint8_t> wrapped(*quick_code);
-
- offset_ = writer_->relative_call_patcher_->WriteThunks(out, offset_);
- if (offset_ == 0u) {
- ReportWriteFailure("relative call thunk", it);
- return false;
- }
- uint32_t aligned_offset = compiled_method->AlignCode(offset_);
- uint32_t aligned_code_delta = aligned_offset - offset_;
- if (aligned_code_delta != 0) {
- if (!writer_->WriteCodeAlignment(out, aligned_code_delta)) {
- ReportWriteFailure("code alignment padding", it);
- return false;
- }
- offset_ += aligned_code_delta;
- DCHECK_OFFSET_();
- }
- DCHECK_ALIGNED_PARAM(offset_,
- GetInstructionSetAlignment(compiled_method->GetInstructionSet()));
uint32_t code_size = quick_code->size() * sizeof(uint8_t);
CHECK_NE(code_size, 0U);
// Deduplicate code arrays.
const OatMethodOffsets& method_offsets = oat_class->method_offsets_[method_offsets_index_];
- DCHECK(method_offsets.code_offset_ < offset_ || method_offsets.code_offset_ ==
- offset_ + sizeof(OatQuickMethodHeader) + compiled_method->CodeDelta())
- << PrettyMethod(it.GetMemberIndex(), *dex_file_);
if (method_offsets.code_offset_ >= offset_) {
+ offset_ = writer_->relative_patcher_->WriteThunks(out, offset_);
+ if (offset_ == 0u) {
+ ReportWriteFailure("relative call thunk", it);
+ return false;
+ }
+ uint32_t aligned_offset = compiled_method->AlignCode(offset_);
+ uint32_t aligned_code_delta = aligned_offset - offset_;
+ if (aligned_code_delta != 0) {
+ if (!writer_->WriteCodeAlignment(out, aligned_code_delta)) {
+ ReportWriteFailure("code alignment padding", it);
+ return false;
+ }
+ offset_ += aligned_code_delta;
+ DCHECK_OFFSET_();
+ }
+ DCHECK_ALIGNED_PARAM(offset_,
+ GetInstructionSetAlignment(compiled_method->GetInstructionSet()));
+ DCHECK_EQ(method_offsets.code_offset_,
+ offset_ + sizeof(OatQuickMethodHeader) + compiled_method->CodeDelta())
+ << PrettyMethod(it.GetMemberIndex(), *dex_file_);
const OatQuickMethodHeader& method_header =
oat_class->method_headers_[method_offsets_index_];
writer_->oat_header_->UpdateChecksum(&method_header, sizeof(method_header));
@@ -1039,15 +716,21 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor {
DCHECK_OFFSET_();
if (!compiled_method->GetPatches().empty()) {
- patched_code_ = std::vector<uint8_t>(quick_code->begin(), quick_code->end());
+ patched_code_.assign(quick_code->begin(), quick_code->end());
wrapped = ArrayRef<const uint8_t>(patched_code_);
for (const LinkerPatch& patch : compiled_method->GetPatches()) {
if (patch.Type() == kLinkerPatchCallRelative) {
// NOTE: Relative calls across oat files are not supported.
uint32_t target_offset = GetTargetOffset(patch);
uint32_t literal_offset = patch.LiteralOffset();
- writer_->relative_call_patcher_->Patch(&patched_code_, literal_offset,
+ writer_->relative_patcher_->PatchCall(&patched_code_, literal_offset,
offset_ + literal_offset, target_offset);
+ } else if (patch.Type() == kLinkerPatchDexCacheArray) {
+ uint32_t target_offset = GetDexCacheOffset(patch);
+ uint32_t literal_offset = patch.LiteralOffset();
+ writer_->relative_patcher_->PatchDexCacheReference(&patched_code_, patch,
+ offset_ + literal_offset,
+ target_offset);
} else if (patch.Type() == kLinkerPatchCall) {
uint32_t target_offset = GetTargetOffset(patch);
PatchCodeAddress(&patched_code_, patch.LiteralOffset(), target_offset);
@@ -1102,9 +785,9 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor {
}
uint32_t GetTargetOffset(const LinkerPatch& patch) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
- auto target_it = writer_->method_offset_map_.find(patch.TargetMethod());
+ auto target_it = writer_->method_offset_map_.map.find(patch.TargetMethod());
uint32_t target_offset =
- (target_it != writer_->method_offset_map_.end()) ? target_it->second : 0u;
+ (target_it != writer_->method_offset_map_.map.end()) ? target_it->second : 0u;
// If there's no compiled code, point to the correct trampoline.
if (UNLIKELY(target_offset == 0)) {
mirror::ArtMethod* target = GetTargetMethod(patch);
@@ -1134,6 +817,18 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor {
return type;
}
+ uint32_t GetDexCacheOffset(const LinkerPatch& patch) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+ if (writer_->image_writer_ != nullptr) {
+ auto* element = writer_->image_writer_->GetDexCacheArrayElementImageAddress(
+ patch.TargetDexCacheDexFile(), patch.TargetDexCacheElementOffset());
+ const uint8_t* oat_data = writer_->image_writer_->GetOatFileBegin() + file_offset_;
+ return reinterpret_cast<const uint8_t*>(element) - oat_data;
+ } else {
+ LOG(FATAL) << "Unimplemented.";
+ UNREACHABLE();
+ }
+ }
+
void PatchObjectAddress(std::vector<uint8_t>* code, uint32_t offset, mirror::Object* object)
SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
// NOTE: Direct method pointers across oat files don't use linker patches. However, direct
@@ -1472,6 +1167,7 @@ bool OatWriter::Write(OutputStream* out) {
DO_STAT(size_code_);
DO_STAT(size_code_alignment_);
DO_STAT(size_relative_call_thunks_);
+ DO_STAT(size_misc_thunks_);
DO_STAT(size_mapping_table_);
DO_STAT(size_vmap_table_);
DO_STAT(size_gc_map_);
@@ -1630,6 +1326,10 @@ size_t OatWriter::WriteCodeDexFiles(OutputStream* out,
#undef VISIT
+ size_code_alignment_ += relative_patcher_->CodeAlignmentSize();
+ size_relative_call_thunks_ += relative_patcher_->RelativeCallThunksSize();
+ size_misc_thunks_ += relative_patcher_->MiscThunksSize();
+
return relative_offset;
}
@@ -1645,6 +1345,15 @@ bool OatWriter::WriteCodeAlignment(OutputStream* out, uint32_t aligned_code_delt
return true;
}
+std::pair<bool, uint32_t> OatWriter::MethodOffsetMap::FindMethodOffset(MethodReference ref) {
+ auto it = map.find(ref);
+ if (it == map.end()) {
+ return std::pair<bool, uint32_t>(false, 0u);
+ } else {
+ return std::pair<bool, uint32_t>(true, it->second);
+ }
+}
+
OatWriter::OatDexFile::OatDexFile(size_t offset, const DexFile& dex_file) {
offset_ = offset;
const std::string& location(dex_file.GetLocation());
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index fd2ccae4a5..51bc9b4483 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -21,7 +21,7 @@
#include <cstddef>
#include <memory>
-#include "driver/compiler_driver.h"
+#include "linker/relative_patcher.h" // For linker::RelativePatcherTargetProvider.
#include "mem_map.h"
#include "method_reference.h"
#include "oat.h"
@@ -32,8 +32,10 @@ namespace art {
class BitVector;
class CompiledMethod;
+class CompilerDriver;
class ImageWriter;
class OutputStream;
+class TimingLogger;
// OatHeader variable length with count of D OatDexFiles
//
@@ -113,25 +115,25 @@ class OatWriter {
~OatWriter();
struct DebugInfo {
- DebugInfo(const std::string& method_name, const char* src_file_name,
- uint32_t low_pc, uint32_t high_pc, const uint8_t* dbgstream,
- CompiledMethod* compiled_method)
- : method_name_(method_name), src_file_name_(src_file_name),
- low_pc_(low_pc), high_pc_(high_pc), dbgstream_(dbgstream),
- compiled_method_(compiled_method) {
- }
- std::string method_name_; // Note: this name is a pretty-printed name.
- const char* src_file_name_;
- uint32_t low_pc_;
- uint32_t high_pc_;
- const uint8_t* dbgstream_;
+ const DexFile* dex_file_;
+ size_t class_def_index_;
+ uint32_t dex_method_index_;
+ uint32_t access_flags_;
+ const DexFile::CodeItem *code_item_;
+ bool deduped_;
+ uint32_t low_pc_;
+ uint32_t high_pc_;
CompiledMethod* compiled_method_;
};
- const std::vector<DebugInfo>& GetCFIMethodInfo() const {
+ const std::vector<DebugInfo>& GetMethodDebugInfo() const {
return method_info_;
}
+ const CompilerDriver* GetCompilerDriver() {
+ return compiler_driver_;
+ }
+
private:
// The DataAccess classes are helper classes that provide access to members related to
// a given map, i.e. GC map, mapping table or vmap table. By abstracting these away
@@ -312,6 +314,7 @@ class OatWriter {
uint32_t size_code_;
uint32_t size_code_alignment_;
uint32_t size_relative_call_thunks_;
+ uint32_t size_misc_thunks_;
uint32_t size_mapping_table_;
uint32_t size_vmap_table_;
uint32_t size_gc_map_;
@@ -325,50 +328,19 @@ class OatWriter {
uint32_t size_oat_class_method_bitmaps_;
uint32_t size_oat_class_method_offsets_;
- class RelativeCallPatcher;
- class NoRelativeCallPatcher;
- class X86RelativeCallPatcher;
- class ArmBaseRelativeCallPatcher;
- class Thumb2RelativeCallPatcher;
- class Arm64RelativeCallPatcher;
-
- std::unique_ptr<RelativeCallPatcher> relative_call_patcher_;
+ std::unique_ptr<linker::RelativePatcher> relative_patcher_;
// The locations of absolute patches relative to the start of the executable section.
std::vector<uintptr_t> absolute_patch_locations_;
- SafeMap<MethodReference, uint32_t, MethodReferenceComparator> method_offset_map_;
-
- struct CodeOffsetsKeyComparator {
- bool operator()(const CompiledMethod* lhs, const CompiledMethod* rhs) const {
- if (lhs->GetQuickCode() != rhs->GetQuickCode()) {
- return lhs->GetQuickCode() < rhs->GetQuickCode();
- }
- // If the code is the same, all other fields are likely to be the same as well.
- if (UNLIKELY(lhs->GetMappingTable() != rhs->GetMappingTable())) {
- return lhs->GetMappingTable() < rhs->GetMappingTable();
- }
- if (UNLIKELY(lhs->GetVmapTable() != rhs->GetVmapTable())) {
- return lhs->GetVmapTable() < rhs->GetVmapTable();
- }
- if (UNLIKELY(lhs->GetGcMap() != rhs->GetGcMap())) {
- return lhs->GetGcMap() < rhs->GetGcMap();
- }
- const auto& lhs_patches = lhs->GetPatches();
- const auto& rhs_patches = rhs->GetPatches();
- if (UNLIKELY(lhs_patches.size() != rhs_patches.size())) {
- return lhs_patches.size() < rhs_patches.size();
- }
- auto rit = rhs_patches.begin();
- for (const LinkerPatch& lpatch : lhs_patches) {
- if (UNLIKELY(!(lpatch == *rit))) {
- return lpatch < *rit;
- }
- ++rit;
- }
- return false;
- }
+ // Map method reference to assigned offset.
+ // Wrap the map in a class implementing linker::RelativePatcherTargetProvider.
+ class MethodOffsetMap FINAL : public linker::RelativePatcherTargetProvider {
+ public:
+ std::pair<bool, uint32_t> FindMethodOffset(MethodReference ref) OVERRIDE;
+ SafeMap<MethodReference, uint32_t, MethodReferenceComparator> map;
};
+ MethodOffsetMap method_offset_map_;
DISALLOW_COPY_AND_ASSIGN(OatWriter);
};
diff --git a/compiler/optimizing/boolean_simplifier.cc b/compiler/optimizing/boolean_simplifier.cc
index ab77505b6f..be432c5a20 100644
--- a/compiler/optimizing/boolean_simplifier.cc
+++ b/compiler/optimizing/boolean_simplifier.cc
@@ -59,7 +59,8 @@ static HInstruction* GetOppositeCondition(HInstruction* cond) {
return new (allocator) HGreaterThan(lhs, rhs);
} else if (cond->IsGreaterThan()) {
return new (allocator) HLessThanOrEqual(lhs, rhs);
- } else if (cond->IsGreaterThanOrEqual()) {
+ } else {
+ DCHECK(cond->IsGreaterThanOrEqual());
return new (allocator) HLessThan(lhs, rhs);
}
} else if (cond->IsIntConstant()) {
@@ -70,10 +71,11 @@ static HInstruction* GetOppositeCondition(HInstruction* cond) {
DCHECK(int_const->IsOne());
return graph->GetIntConstant(0);
}
+ } else {
+ // General case when 'cond' is another instruction of type boolean.
+ // Negate with 'cond == 0'.
+ return new (allocator) HEqual(cond, graph->GetIntConstant(0));
}
-
- // TODO: b/19992954
- return nullptr;
}
void HBooleanSimplifier::Run() {
@@ -105,10 +107,6 @@ void HBooleanSimplifier::Run() {
HInstruction* replacement;
if (NegatesCondition(true_value, false_value)) {
replacement = GetOppositeCondition(if_condition);
- if (replacement == nullptr) {
- // Something we could not handle.
- continue;
- }
if (replacement->GetBlock() == nullptr) {
block->InsertInstructionBefore(replacement, if_instruction);
}
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index 1d167949f4..6511120794 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -239,7 +239,6 @@ class ValueBound : public ValueObject {
*underflow = true;
return Min();
}
- return ValueBound(instruction_, new_constant);
}
private:
@@ -443,9 +442,31 @@ class MonotonicValueRange : public ValueRange {
class BCEVisitor : public HGraphVisitor {
public:
+ // The least number of bounds checks that should be eliminated by triggering
+ // the deoptimization technique.
+ static constexpr size_t kThresholdForAddingDeoptimize = 2;
+
+ // Very large constant index is considered as an anomaly. This is a threshold
+ // beyond which we don't bother to apply the deoptimization technique since
+ // it's likely some AIOOBE will be thrown.
+ static constexpr int32_t kMaxConstantForAddingDeoptimize = INT_MAX - 1024 * 1024;
+
explicit BCEVisitor(HGraph* graph)
: HGraphVisitor(graph),
- maps_(graph->GetBlocks().Size()) {}
+ maps_(graph->GetBlocks().Size()),
+ need_to_revisit_block_(false) {}
+
+ void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
+ first_constant_index_bounds_check_map_.clear();
+ HGraphVisitor::VisitBasicBlock(block);
+ if (need_to_revisit_block_) {
+ AddComparesWithDeoptimization(block);
+ need_to_revisit_block_ = false;
+ first_constant_index_bounds_check_map_.clear();
+ GetValueRangeMap(block)->clear();
+ HGraphVisitor::VisitBasicBlock(block);
+ }
+ }
private:
// Return the map of proven value ranges at the beginning of a basic block.
@@ -701,9 +722,26 @@ class BCEVisitor : public HGraphVisitor {
}
}
+ if (first_constant_index_bounds_check_map_.find(array_length->GetId()) ==
+ first_constant_index_bounds_check_map_.end()) {
+ // Remember the first bounds check against array_length of a constant index.
+ // That bounds check instruction has an associated HEnvironment where we
+ // may add an HDeoptimize to eliminate bounds checks of constant indices
+ // against array_length.
+ first_constant_index_bounds_check_map_.Put(array_length->GetId(), bounds_check);
+ } else {
+ // We've seen it at least twice. It's beneficial to introduce a compare with
+ // deoptimization fallback to eliminate the bounds checks.
+ need_to_revisit_block_ = true;
+ }
+
// Once we have an array access like 'array[5] = 1', we record array.length >= 6.
// We currently don't do it for non-constant index since a valid array[i] can't prove
// a valid array[i-1] yet due to the lower bound side.
+ if (constant == INT_MAX) {
+ // INT_MAX as an index will definitely throw AIOOBE.
+ return;
+ }
ValueBound lower = ValueBound(nullptr, constant + 1);
ValueBound upper = ValueBound::Max();
ValueRange* range = new (GetGraph()->GetArena())
@@ -938,8 +976,90 @@ class BCEVisitor : public HGraphVisitor {
}
}
+ void VisitDeoptimize(HDeoptimize* deoptimize) {
+ // Right now it's only HLessThanOrEqual.
+ DCHECK(deoptimize->InputAt(0)->IsLessThanOrEqual());
+ HLessThanOrEqual* less_than_or_equal = deoptimize->InputAt(0)->AsLessThanOrEqual();
+ HInstruction* instruction = less_than_or_equal->InputAt(0);
+ if (instruction->IsArrayLength()) {
+ HInstruction* constant = less_than_or_equal->InputAt(1);
+ DCHECK(constant->IsIntConstant());
+ DCHECK(constant->AsIntConstant()->GetValue() <= kMaxConstantForAddingDeoptimize);
+ ValueBound lower = ValueBound(nullptr, constant->AsIntConstant()->GetValue() + 1);
+ ValueRange* range = new (GetGraph()->GetArena())
+ ValueRange(GetGraph()->GetArena(), lower, ValueBound::Max());
+ GetValueRangeMap(deoptimize->GetBlock())->Overwrite(instruction->GetId(), range);
+ }
+ }
+
+ void AddCompareWithDeoptimization(HInstruction* array_length,
+ HIntConstant* const_instr,
+ HBasicBlock* block) {
+ DCHECK(array_length->IsArrayLength());
+ ValueRange* range = LookupValueRange(array_length, block);
+ ValueBound lower_bound = range->GetLower();
+ DCHECK(lower_bound.IsConstant());
+ DCHECK(const_instr->GetValue() <= kMaxConstantForAddingDeoptimize);
+ DCHECK_EQ(lower_bound.GetConstant(), const_instr->GetValue() + 1);
+
+ // If array_length is less than lower_const, deoptimize.
+ HBoundsCheck* bounds_check = first_constant_index_bounds_check_map_.Get(
+ array_length->GetId())->AsBoundsCheck();
+ HCondition* cond = new (GetGraph()->GetArena()) HLessThanOrEqual(array_length, const_instr);
+ HDeoptimize* deoptimize = new (GetGraph()->GetArena())
+ HDeoptimize(cond, bounds_check->GetDexPc());
+ block->InsertInstructionBefore(cond, bounds_check);
+ block->InsertInstructionBefore(deoptimize, bounds_check);
+ deoptimize->CopyEnvironmentFrom(bounds_check->GetEnvironment());
+ }
+
+ void AddComparesWithDeoptimization(HBasicBlock* block) {
+ for (ArenaSafeMap<int, HBoundsCheck*>::iterator it =
+ first_constant_index_bounds_check_map_.begin();
+ it != first_constant_index_bounds_check_map_.end();
+ ++it) {
+ HBoundsCheck* bounds_check = it->second;
+ HArrayLength* array_length = bounds_check->InputAt(1)->AsArrayLength();
+ HIntConstant* lower_bound_const_instr = nullptr;
+ int32_t lower_bound_const = INT_MIN;
+ size_t counter = 0;
+ // Count the constant indexing for which bounds checks haven't
+ // been removed yet.
+ for (HUseIterator<HInstruction*> it2(array_length->GetUses());
+ !it2.Done();
+ it2.Advance()) {
+ HInstruction* user = it2.Current()->GetUser();
+ if (user->GetBlock() == block &&
+ user->IsBoundsCheck() &&
+ user->AsBoundsCheck()->InputAt(0)->IsIntConstant()) {
+ DCHECK_EQ(array_length, user->AsBoundsCheck()->InputAt(1));
+ HIntConstant* const_instr = user->AsBoundsCheck()->InputAt(0)->AsIntConstant();
+ if (const_instr->GetValue() > lower_bound_const) {
+ lower_bound_const = const_instr->GetValue();
+ lower_bound_const_instr = const_instr;
+ }
+ counter++;
+ }
+ }
+ if (counter >= kThresholdForAddingDeoptimize &&
+ lower_bound_const_instr->GetValue() <= kMaxConstantForAddingDeoptimize) {
+ AddCompareWithDeoptimization(array_length, lower_bound_const_instr, block);
+ }
+ }
+ }
+
std::vector<std::unique_ptr<ArenaSafeMap<int, ValueRange*>>> maps_;
+ // Map an HArrayLength instruction's id to the first HBoundsCheck instruction in
+ // a block that checks a constant index against that HArrayLength.
+ SafeMap<int, HBoundsCheck*> first_constant_index_bounds_check_map_;
+
+ // For the block, there is at least one HArrayLength instruction for which there
+ // is more than one bounds check instruction with constant indexing. And it's
+ // beneficial to add a compare instruction that has deoptimization fallback and
+ // eliminate those bounds checks.
+ bool need_to_revisit_block_;
+
DISALLOW_COPY_AND_ASSIGN(BCEVisitor);
};
diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc
index b3653fe903..75cf1cf063 100644
--- a/compiler/optimizing/bounds_check_elimination_test.cc
+++ b/compiler/optimizing/bounds_check_elimination_test.cc
@@ -284,9 +284,9 @@ TEST(BoundsCheckEliminationTest, UnderflowArrayBoundsElimination) {
ASSERT_FALSE(IsRemoved(bounds_check));
}
-// array[5] = 1; // Can't eliminate.
-// array[4] = 1; // Can eliminate.
// array[6] = 1; // Can't eliminate.
+// array[5] = 1; // Can eliminate.
+// array[4] = 1; // Can eliminate.
TEST(BoundsCheckEliminationTest, ConstantArrayBoundsElimination) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
@@ -311,35 +311,35 @@ TEST(BoundsCheckEliminationTest, ConstantArrayBoundsElimination) {
HNullCheck* null_check = new (&allocator) HNullCheck(parameter, 0);
HArrayLength* array_length = new (&allocator) HArrayLength(null_check);
- HBoundsCheck* bounds_check5 = new (&allocator)
- HBoundsCheck(constant_5, array_length, 0);
+ HBoundsCheck* bounds_check6 = new (&allocator)
+ HBoundsCheck(constant_6, array_length, 0);
HInstruction* array_set = new (&allocator) HArraySet(
- null_check, bounds_check5, constant_1, Primitive::kPrimInt, 0);
+ null_check, bounds_check6, constant_1, Primitive::kPrimInt, 0);
block->AddInstruction(null_check);
block->AddInstruction(array_length);
- block->AddInstruction(bounds_check5);
+ block->AddInstruction(bounds_check6);
block->AddInstruction(array_set);
null_check = new (&allocator) HNullCheck(parameter, 0);
array_length = new (&allocator) HArrayLength(null_check);
- HBoundsCheck* bounds_check4 = new (&allocator)
- HBoundsCheck(constant_4, array_length, 0);
+ HBoundsCheck* bounds_check5 = new (&allocator)
+ HBoundsCheck(constant_5, array_length, 0);
array_set = new (&allocator) HArraySet(
- null_check, bounds_check4, constant_1, Primitive::kPrimInt, 0);
+ null_check, bounds_check5, constant_1, Primitive::kPrimInt, 0);
block->AddInstruction(null_check);
block->AddInstruction(array_length);
- block->AddInstruction(bounds_check4);
+ block->AddInstruction(bounds_check5);
block->AddInstruction(array_set);
null_check = new (&allocator) HNullCheck(parameter, 0);
array_length = new (&allocator) HArrayLength(null_check);
- HBoundsCheck* bounds_check6 = new (&allocator)
- HBoundsCheck(constant_6, array_length, 0);
+ HBoundsCheck* bounds_check4 = new (&allocator)
+ HBoundsCheck(constant_4, array_length, 0);
array_set = new (&allocator) HArraySet(
- null_check, bounds_check6, constant_1, Primitive::kPrimInt, 0);
+ null_check, bounds_check4, constant_1, Primitive::kPrimInt, 0);
block->AddInstruction(null_check);
block->AddInstruction(array_length);
- block->AddInstruction(bounds_check6);
+ block->AddInstruction(bounds_check4);
block->AddInstruction(array_set);
block->AddInstruction(new (&allocator) HGoto());
@@ -353,9 +353,9 @@ TEST(BoundsCheckEliminationTest, ConstantArrayBoundsElimination) {
RunSimplifierAndGvn(graph);
BoundsCheckElimination bounds_check_elimination(graph);
bounds_check_elimination.Run();
- ASSERT_FALSE(IsRemoved(bounds_check5));
- ASSERT_TRUE(IsRemoved(bounds_check4));
ASSERT_FALSE(IsRemoved(bounds_check6));
+ ASSERT_TRUE(IsRemoved(bounds_check5));
+ ASSERT_TRUE(IsRemoved(bounds_check4));
}
// for (int i=initial; i<array.length; i+=increment) { array[i] = 10; }
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 2cdd5af9f3..a912d4ccc4 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -23,6 +23,7 @@
#include "dex_instruction.h"
#include "dex_instruction-inl.h"
#include "driver/compiler_driver-inl.h"
+#include "driver/compiler_options.h"
#include "mirror/art_field.h"
#include "mirror/art_field-inl.h"
#include "mirror/class_loader.h"
@@ -230,8 +231,7 @@ void HGraphBuilder::MaybeRecordStat(MethodCompilationStat compilation_stat) {
}
}
-bool HGraphBuilder::SkipCompilation(size_t number_of_dex_instructions,
- size_t number_of_blocks ATTRIBUTE_UNUSED,
+bool HGraphBuilder::SkipCompilation(const DexFile::CodeItem& code_item,
size_t number_of_branches) {
const CompilerOptions& compiler_options = compiler_driver_->GetCompilerOptions();
CompilerOptions::CompilerFilter compiler_filter = compiler_options.GetCompilerFilter();
@@ -239,19 +239,20 @@ bool HGraphBuilder::SkipCompilation(size_t number_of_dex_instructions,
return false;
}
- if (compiler_options.IsHugeMethod(number_of_dex_instructions)) {
+ if (compiler_options.IsHugeMethod(code_item.insns_size_in_code_units_)) {
VLOG(compiler) << "Skip compilation of huge method "
<< PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_)
- << ": " << number_of_dex_instructions << " dex instructions";
+ << ": " << code_item.insns_size_in_code_units_ << " code units";
MaybeRecordStat(MethodCompilationStat::kNotCompiledHugeMethod);
return true;
}
// If it's large and contains no branches, it's likely to be machine generated initialization.
- if (compiler_options.IsLargeMethod(number_of_dex_instructions) && (number_of_branches == 0)) {
+ if (compiler_options.IsLargeMethod(code_item.insns_size_in_code_units_)
+ && (number_of_branches == 0)) {
VLOG(compiler) << "Skip compilation of large method with no branch "
<< PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_)
- << ": " << number_of_dex_instructions << " dex instructions";
+ << ": " << code_item.insns_size_in_code_units_ << " code units";
MaybeRecordStat(MethodCompilationStat::kNotCompiledLargeMethodNoBranches);
return true;
}
@@ -278,18 +279,14 @@ bool HGraphBuilder::BuildGraph(const DexFile::CodeItem& code_item) {
// Compute the number of dex instructions, blocks, and branches. We will
// check these values against limits given to the compiler.
- size_t number_of_dex_instructions = 0;
- size_t number_of_blocks = 0;
size_t number_of_branches = 0;
// To avoid splitting blocks, we compute ahead of time the instructions that
// start a new block, and create these blocks.
- ComputeBranchTargets(
- code_ptr, code_end, &number_of_dex_instructions, &number_of_blocks, &number_of_branches);
+ ComputeBranchTargets(code_ptr, code_end, &number_of_branches);
// Note that the compiler driver is null when unit testing.
- if ((compiler_driver_ != nullptr)
- && SkipCompilation(number_of_dex_instructions, number_of_blocks, number_of_branches)) {
+ if ((compiler_driver_ != nullptr) && SkipCompilation(code_item, number_of_branches)) {
return false;
}
@@ -355,8 +352,6 @@ void HGraphBuilder::MaybeUpdateCurrentBlock(size_t index) {
void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr,
const uint16_t* code_end,
- size_t* number_of_dex_instructions,
- size_t* number_of_blocks,
size_t* number_of_branches) {
branch_targets_.SetSize(code_end - code_ptr);
@@ -369,7 +364,6 @@ void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr,
// the locations these instructions branch to.
uint32_t dex_pc = 0;
while (code_ptr < code_end) {
- (*number_of_dex_instructions)++;
const Instruction& instruction = *Instruction::At(code_ptr);
if (instruction.IsBranch()) {
(*number_of_branches)++;
@@ -378,14 +372,12 @@ void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr,
if (FindBlockStartingAt(target) == nullptr) {
block = new (arena_) HBasicBlock(graph_, target);
branch_targets_.Put(target, block);
- (*number_of_blocks)++;
}
dex_pc += instruction.SizeInCodeUnits();
code_ptr += instruction.SizeInCodeUnits();
if ((code_ptr < code_end) && (FindBlockStartingAt(dex_pc) == nullptr)) {
block = new (arena_) HBasicBlock(graph_, dex_pc);
branch_targets_.Put(dex_pc, block);
- (*number_of_blocks)++;
}
} else if (instruction.IsSwitch()) {
SwitchTable table(instruction, dex_pc, instruction.Opcode() == Instruction::SPARSE_SWITCH);
@@ -403,14 +395,12 @@ void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr,
if (FindBlockStartingAt(target) == nullptr) {
block = new (arena_) HBasicBlock(graph_, target);
branch_targets_.Put(target, block);
- (*number_of_blocks)++;
}
// The next case gets its own block.
if (i < num_entries) {
block = new (arena_) HBasicBlock(graph_, target);
branch_targets_.Put(table.GetDexPcForIndex(i), block);
- (*number_of_blocks)++;
}
}
@@ -420,7 +410,6 @@ void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr,
if ((code_ptr < code_end) && (FindBlockStartingAt(dex_pc) == nullptr)) {
block = new (arena_) HBasicBlock(graph_, dex_pc);
branch_targets_.Put(dex_pc, block);
- (*number_of_blocks)++;
}
} else {
code_ptr += instruction.SizeInCodeUnits();
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 6a0738a7b9..dc6d97eb0c 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -90,8 +90,6 @@ class HGraphBuilder : public ValueObject {
// branches.
void ComputeBranchTargets(const uint16_t* start,
const uint16_t* end,
- size_t* number_of_dex_instructions,
- size_t* number_of_block,
size_t* number_of_branches);
void MaybeUpdateCurrentBlock(size_t index);
HBasicBlock* FindBlockStartingAt(int32_t index) const;
@@ -217,9 +215,7 @@ class HGraphBuilder : public ValueObject {
HInstruction* value, int32_t case_value_int,
int32_t target_offset, uint32_t dex_pc);
- bool SkipCompilation(size_t number_of_dex_instructions,
- size_t number_of_blocks,
- size_t number_of_branches);
+ bool SkipCompilation(const DexFile::CodeItem& code_item, size_t number_of_branches);
void MaybeRecordStat(MethodCompilationStat compilation_stat);
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index bd6e943bf0..8736374306 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -82,6 +82,7 @@ void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline)
HGraphVisitor* instruction_visitor = GetInstructionVisitor();
DCHECK_EQ(current_block_index_, 0u);
GenerateFrameEntry();
+ DCHECK_EQ(GetAssembler()->cfi().GetCurrentCFAOffset(), static_cast<int>(frame_size_));
for (size_t e = block_order_->Size(); current_block_index_ < e; ++current_block_index_) {
HBasicBlock* block = block_order_->Get(current_block_index_);
// Don't generate code for an empty block. Its predecessors will branch to its successor
@@ -132,7 +133,6 @@ size_t CodeGenerator::FindFreeEntry(bool* array, size_t length) {
}
LOG(FATAL) << "Could not find a register in baseline register allocator";
UNREACHABLE();
- return -1;
}
size_t CodeGenerator::FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t length) {
@@ -145,7 +145,6 @@ size_t CodeGenerator::FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t l
}
LOG(FATAL) << "Could not find a register in baseline register allocator";
UNREACHABLE();
- return -1;
}
void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots,
@@ -378,10 +377,14 @@ CodeGenerator* CodeGenerator::Create(HGraph* graph,
case kMips:
return nullptr;
case kX86: {
- return new x86::CodeGeneratorX86(graph, compiler_options);
+ return new x86::CodeGeneratorX86(graph,
+ *isa_features.AsX86InstructionSetFeatures(),
+ compiler_options);
}
case kX86_64: {
- return new x86_64::CodeGeneratorX86_64(graph, compiler_options);
+ return new x86_64::CodeGeneratorX86_64(graph,
+ *isa_features.AsX86_64InstructionSetFeatures(),
+ compiler_options);
}
default:
return nullptr;
@@ -413,7 +416,16 @@ void CodeGenerator::BuildNativeGCMap(
}
}
-void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data, DefaultSrcMap* src_map) const {
+void CodeGenerator::BuildSourceMap(DefaultSrcMap* src_map) const {
+ for (size_t i = 0; i < pc_infos_.Size(); i++) {
+ struct PcInfo pc_info = pc_infos_.Get(i);
+ uint32_t pc2dex_offset = pc_info.native_pc;
+ int32_t pc2dex_dalvik_offset = pc_info.dex_pc;
+ src_map->push_back(SrcMapElem({pc2dex_offset, pc2dex_dalvik_offset}));
+ }
+}
+
+void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data) const {
uint32_t pc2dex_data_size = 0u;
uint32_t pc2dex_entries = pc_infos_.Size();
uint32_t pc2dex_offset = 0u;
@@ -423,19 +435,12 @@ void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data, DefaultSrcMap*
uint32_t dex2pc_offset = 0u;
int32_t dex2pc_dalvik_offset = 0;
- if (src_map != nullptr) {
- src_map->reserve(pc2dex_entries);
- }
-
for (size_t i = 0; i < pc2dex_entries; i++) {
struct PcInfo pc_info = pc_infos_.Get(i);
pc2dex_data_size += UnsignedLeb128Size(pc_info.native_pc - pc2dex_offset);
pc2dex_data_size += SignedLeb128Size(pc_info.dex_pc - pc2dex_dalvik_offset);
pc2dex_offset = pc_info.native_pc;
pc2dex_dalvik_offset = pc_info.dex_pc;
- if (src_map != nullptr) {
- src_map->push_back(SrcMapElem({pc2dex_offset, pc2dex_dalvik_offset}));
- }
}
// Walk over the blocks and find which ones correspond to catch block entries.
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 07ca6b1ccf..b888aca264 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -205,7 +205,8 @@ class CodeGenerator {
slow_paths_.Add(slow_path);
}
- void BuildMappingTable(std::vector<uint8_t>* vector, DefaultSrcMap* src_map) const;
+ void BuildSourceMap(DefaultSrcMap* src_map) const;
+ void BuildMappingTable(std::vector<uint8_t>* vector) const;
void BuildVMapTable(std::vector<uint8_t>* vector) const;
void BuildNativeGCMap(
std::vector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const;
@@ -425,6 +426,8 @@ class CodeGenerator {
StackMapStream stack_map_stream_;
+ friend class OptimizingCFITest;
+
DISALLOW_COPY_AND_ASSIGN(CodeGenerator);
};
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 1f95041a92..a799a519c0 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -287,6 +287,26 @@ class TypeCheckSlowPathARM : public SlowPathCodeARM {
DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARM);
};
+class DeoptimizationSlowPathARM : public SlowPathCodeARM {
+ public:
+ explicit DeoptimizationSlowPathARM(HInstruction* instruction)
+ : instruction_(instruction) {}
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ __ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, instruction_->GetLocations());
+ DCHECK(instruction_->IsDeoptimize());
+ HDeoptimize* deoptimize = instruction_->AsDeoptimize();
+ uint32_t dex_pc = deoptimize->GetDexPc();
+ CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
+ arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this);
+ }
+
+ private:
+ HInstruction* const instruction_;
+ DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM);
+};
+
#undef __
#undef __
@@ -493,6 +513,14 @@ void CodeGeneratorARM::ComputeSpillMask() {
}
}
+static dwarf::Reg DWARFReg(Register reg) {
+ return dwarf::Reg::ArmCore(static_cast<int>(reg));
+}
+
+static dwarf::Reg DWARFReg(SRegister reg) {
+ return dwarf::Reg::ArmFp(static_cast<int>(reg));
+}
+
void CodeGeneratorARM::GenerateFrameEntry() {
bool skip_overflow_check =
IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm);
@@ -511,12 +539,19 @@ void CodeGeneratorARM::GenerateFrameEntry() {
// PC is in the list of callee-save to mimic Quick, but we need to push
// LR at entry instead.
- __ PushList((core_spill_mask_ & (~(1 << PC))) | 1 << LR);
+ uint32_t push_mask = (core_spill_mask_ & (~(1 << PC))) | 1 << LR;
+ __ PushList(push_mask);
+ __ cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(push_mask));
+ __ cfi().RelOffsetForMany(DWARFReg(Register(0)), 0, push_mask, kArmWordSize);
if (fpu_spill_mask_ != 0) {
SRegister start_register = SRegister(LeastSignificantBit(fpu_spill_mask_));
__ vpushs(start_register, POPCOUNT(fpu_spill_mask_));
+ __ cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(fpu_spill_mask_));
+ __ cfi().RelOffsetForMany(DWARFReg(SRegister(0)), 0, fpu_spill_mask_, kArmWordSize);
}
- __ AddConstant(SP, -(GetFrameSize() - FrameEntrySpillSize()));
+ int adjust = GetFrameSize() - FrameEntrySpillSize();
+ __ AddConstant(SP, -adjust);
+ __ cfi().AdjustCFAOffset(adjust);
__ StoreToOffset(kStoreWord, R0, SP, 0);
}
@@ -525,10 +560,14 @@ void CodeGeneratorARM::GenerateFrameExit() {
__ bx(LR);
return;
}
- __ AddConstant(SP, GetFrameSize() - FrameEntrySpillSize());
+ int adjust = GetFrameSize() - FrameEntrySpillSize();
+ __ AddConstant(SP, adjust);
+ __ cfi().AdjustCFAOffset(-adjust);
if (fpu_spill_mask_ != 0) {
SRegister start_register = SRegister(LeastSignificantBit(fpu_spill_mask_));
__ vpops(start_register, POPCOUNT(fpu_spill_mask_));
+ __ cfi().AdjustCFAOffset(-kArmPointerSize * POPCOUNT(fpu_spill_mask_));
+ __ cfi().RestoreMany(DWARFReg(SRegister(0)), fpu_spill_mask_);
}
__ PopList(core_spill_mask_);
}
@@ -542,7 +581,6 @@ Location CodeGeneratorARM::GetStackLocation(HLoadLocal* load) const {
case Primitive::kPrimLong:
case Primitive::kPrimDouble:
return Location::DoubleStackSlot(GetStackSlot(load->GetLocal()));
- break;
case Primitive::kPrimInt:
case Primitive::kPrimNot:
@@ -555,10 +593,11 @@ Location CodeGeneratorARM::GetStackLocation(HLoadLocal* load) const {
case Primitive::kPrimShort:
case Primitive::kPrimVoid:
LOG(FATAL) << "Unexpected type " << load->GetType();
+ UNREACHABLE();
}
LOG(FATAL) << "Unreachable";
- return Location();
+ UNREACHABLE();
}
Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) {
@@ -663,7 +702,6 @@ Location InvokeDexCallingConventionVisitor::GetReturnLocation(Primitive::Type ty
return Location();
}
UNREACHABLE();
- return Location();
}
void CodeGeneratorARM::Move32(Location destination, Location source) {
@@ -887,24 +925,17 @@ void InstructionCodeGeneratorARM::VisitExit(HExit* exit) {
UNUSED(exit);
}
-void LocationsBuilderARM::VisitIf(HIf* if_instr) {
- LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall);
- HInstruction* cond = if_instr->InputAt(0);
- if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
- locations->SetInAt(0, Location::RequiresRegister());
- }
-}
-
-void InstructionCodeGeneratorARM::VisitIf(HIf* if_instr) {
- HInstruction* cond = if_instr->InputAt(0);
+void InstructionCodeGeneratorARM::GenerateTestAndBranch(HInstruction* instruction,
+ Label* true_target,
+ Label* false_target,
+ Label* always_true_target) {
+ HInstruction* cond = instruction->InputAt(0);
if (cond->IsIntConstant()) {
// Constant condition, statically compared against 1.
int32_t cond_value = cond->AsIntConstant()->GetValue();
if (cond_value == 1) {
- if (!codegen_->GoesToNextBlock(if_instr->GetBlock(),
- if_instr->IfTrueSuccessor())) {
- __ b(codegen_->GetLabelOf(if_instr->IfTrueSuccessor()));
+ if (always_true_target != nullptr) {
+ __ b(always_true_target);
}
return;
} else {
@@ -913,10 +944,10 @@ void InstructionCodeGeneratorARM::VisitIf(HIf* if_instr) {
} else {
if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
// Condition has been materialized, compare the output to 0
- DCHECK(if_instr->GetLocations()->InAt(0).IsRegister());
- __ cmp(if_instr->GetLocations()->InAt(0).AsRegister<Register>(),
+ DCHECK(instruction->GetLocations()->InAt(0).IsRegister());
+ __ cmp(instruction->GetLocations()->InAt(0).AsRegister<Register>(),
ShifterOperand(0));
- __ b(codegen_->GetLabelOf(if_instr->IfTrueSuccessor()), NE);
+ __ b(true_target, NE);
} else {
// Condition has not been materialized, use its inputs as the
// comparison and its condition as the branch condition.
@@ -938,16 +969,55 @@ void InstructionCodeGeneratorARM::VisitIf(HIf* if_instr) {
__ cmp(left, ShifterOperand(temp));
}
}
- __ b(codegen_->GetLabelOf(if_instr->IfTrueSuccessor()),
- ARMCondition(cond->AsCondition()->GetCondition()));
+ __ b(true_target, ARMCondition(cond->AsCondition()->GetCondition()));
}
}
- if (!codegen_->GoesToNextBlock(if_instr->GetBlock(),
- if_instr->IfFalseSuccessor())) {
- __ b(codegen_->GetLabelOf(if_instr->IfFalseSuccessor()));
+ if (false_target != nullptr) {
+ __ b(false_target);
+ }
+}
+
+void LocationsBuilderARM::VisitIf(HIf* if_instr) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall);
+ HInstruction* cond = if_instr->InputAt(0);
+ if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+ locations->SetInAt(0, Location::RequiresRegister());
+ }
+}
+
+void InstructionCodeGeneratorARM::VisitIf(HIf* if_instr) {
+ Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor());
+ Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor());
+ Label* always_true_target = true_target;
+ if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
+ if_instr->IfTrueSuccessor())) {
+ always_true_target = nullptr;
+ }
+ if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
+ if_instr->IfFalseSuccessor())) {
+ false_target = nullptr;
+ }
+ GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target);
+}
+
+void LocationsBuilderARM::VisitDeoptimize(HDeoptimize* deoptimize) {
+ LocationSummary* locations = new (GetGraph()->GetArena())
+ LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
+ HInstruction* cond = deoptimize->InputAt(0);
+ DCHECK(cond->IsCondition());
+ if (cond->AsCondition()->NeedsMaterialization()) {
+ locations->SetInAt(0, Location::RequiresRegister());
}
}
+void InstructionCodeGeneratorARM::VisitDeoptimize(HDeoptimize* deoptimize) {
+ SlowPathCodeARM* slow_path = new (GetGraph()->GetArena())
+ DeoptimizationSlowPathARM(deoptimize);
+ codegen_->AddSlowPath(slow_path);
+ Label* slow_path_entry = slow_path->GetEntryLabel();
+ GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry);
+}
void LocationsBuilderARM::VisitCondition(HCondition* comp) {
LocationSummary* locations =
@@ -1139,7 +1209,10 @@ void LocationsBuilderARM::VisitReturnVoid(HReturnVoid* ret) {
void InstructionCodeGeneratorARM::VisitReturnVoid(HReturnVoid* ret) {
UNUSED(ret);
+ __ cfi().RememberState();
codegen_->GenerateFrameExit();
+ __ cfi().RestoreState();
+ __ cfi().DefCFAOffset(codegen_->GetFrameSize());
}
void LocationsBuilderARM::VisitReturn(HReturn* ret) {
@@ -1150,7 +1223,10 @@ void LocationsBuilderARM::VisitReturn(HReturn* ret) {
void InstructionCodeGeneratorARM::VisitReturn(HReturn* ret) {
UNUSED(ret);
+ __ cfi().RememberState();
codegen_->GenerateFrameExit();
+ __ cfi().RestoreState();
+ __ cfi().DefCFAOffset(codegen_->GetFrameSize());
}
void LocationsBuilderARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index bcdea7a639..06f425ea21 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -188,6 +188,10 @@ class InstructionCodeGeneratorARM : public HGraphVisitor {
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
void GenerateImplicitNullCheck(HNullCheck* instruction);
void GenerateExplicitNullCheck(HNullCheck* instruction);
+ void GenerateTestAndBranch(HInstruction* instruction,
+ Label* true_target,
+ Label* false_target,
+ Label* always_true_target);
ArmAssembler* const assembler_;
CodeGeneratorARM* const codegen_;
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 32ada3837e..5fe8adc86a 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -352,6 +352,26 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 {
DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARM64);
};
+class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 {
+ public:
+ explicit DeoptimizationSlowPathARM64(HInstruction* instruction)
+ : instruction_(instruction) {}
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ __ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, instruction_->GetLocations());
+ DCHECK(instruction_->IsDeoptimize());
+ HDeoptimize* deoptimize = instruction_->AsDeoptimize();
+ uint32_t dex_pc = deoptimize->GetDexPc();
+ CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
+ arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this);
+ }
+
+ private:
+ HInstruction* const instruction_;
+ DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM64);
+};
+
#undef __
Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) {
@@ -445,18 +465,65 @@ void CodeGeneratorARM64::GenerateFrameEntry() {
// ... : reserved frame space.
// sp[0] : current method.
__ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex));
- __ PokeCPURegList(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize());
- __ PokeCPURegList(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize());
+ GetAssembler()->cfi().AdjustCFAOffset(frame_size);
+ SpillRegisters(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize());
+ SpillRegisters(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize());
}
}
void CodeGeneratorARM64::GenerateFrameExit() {
if (!HasEmptyFrame()) {
int frame_size = GetFrameSize();
- __ PeekCPURegList(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize());
- __ PeekCPURegList(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize());
+ UnspillRegisters(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize());
+ UnspillRegisters(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize());
__ Drop(frame_size);
+ GetAssembler()->cfi().AdjustCFAOffset(-frame_size);
+ }
+}
+
+static inline dwarf::Reg DWARFReg(CPURegister reg) {
+ if (reg.IsFPRegister()) {
+ return dwarf::Reg::Arm64Fp(reg.code());
+ } else {
+ DCHECK_LT(reg.code(), 31u); // X0 - X30.
+ return dwarf::Reg::Arm64Core(reg.code());
+ }
+}
+
+void CodeGeneratorARM64::SpillRegisters(vixl::CPURegList registers, int offset) {
+ int size = registers.RegisterSizeInBytes();
+ while (registers.Count() >= 2) {
+ const CPURegister& dst0 = registers.PopLowestIndex();
+ const CPURegister& dst1 = registers.PopLowestIndex();
+ __ Stp(dst0, dst1, MemOperand(__ StackPointer(), offset));
+ GetAssembler()->cfi().RelOffset(DWARFReg(dst0), offset);
+ GetAssembler()->cfi().RelOffset(DWARFReg(dst1), offset + size);
+ offset += 2 * size;
}
+ if (!registers.IsEmpty()) {
+ const CPURegister& dst0 = registers.PopLowestIndex();
+ __ Str(dst0, MemOperand(__ StackPointer(), offset));
+ GetAssembler()->cfi().RelOffset(DWARFReg(dst0), offset);
+ }
+ DCHECK(registers.IsEmpty());
+}
+
+void CodeGeneratorARM64::UnspillRegisters(vixl::CPURegList registers, int offset) {
+ int size = registers.RegisterSizeInBytes();
+ while (registers.Count() >= 2) {
+ const CPURegister& dst0 = registers.PopLowestIndex();
+ const CPURegister& dst1 = registers.PopLowestIndex();
+ __ Ldp(dst0, dst1, MemOperand(__ StackPointer(), offset));
+ GetAssembler()->cfi().Restore(DWARFReg(dst0));
+ GetAssembler()->cfi().Restore(DWARFReg(dst1));
+ offset += 2 * size;
+ }
+ if (!registers.IsEmpty()) {
+ const CPURegister& dst0 = registers.PopLowestIndex();
+ __ Ldr(dst0, MemOperand(__ StackPointer(), offset));
+ GetAssembler()->cfi().Restore(DWARFReg(dst0));
+ }
+ DCHECK(registers.IsEmpty());
}
void CodeGeneratorARM64::Bind(HBasicBlock* block) {
@@ -1611,25 +1678,18 @@ void InstructionCodeGeneratorARM64::VisitGoto(HGoto* got) {
}
}
-void LocationsBuilderARM64::VisitIf(HIf* if_instr) {
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
- HInstruction* cond = if_instr->InputAt(0);
- if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
- locations->SetInAt(0, Location::RequiresRegister());
- }
-}
-
-void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) {
- HInstruction* cond = if_instr->InputAt(0);
+void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruction,
+ vixl::Label* true_target,
+ vixl::Label* false_target,
+ vixl::Label* always_true_target) {
+ HInstruction* cond = instruction->InputAt(0);
HCondition* condition = cond->AsCondition();
- vixl::Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor());
- vixl::Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor());
if (cond->IsIntConstant()) {
int32_t cond_value = cond->AsIntConstant()->GetValue();
if (cond_value == 1) {
- if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), if_instr->IfTrueSuccessor())) {
- __ B(true_target);
+ if (always_true_target != nullptr) {
+ __ B(always_true_target);
}
return;
} else {
@@ -1637,31 +1697,87 @@ void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) {
}
} else if (!cond->IsCondition() || condition->NeedsMaterialization()) {
// The condition instruction has been materialized, compare the output to 0.
- Location cond_val = if_instr->GetLocations()->InAt(0);
+ Location cond_val = instruction->GetLocations()->InAt(0);
DCHECK(cond_val.IsRegister());
- __ Cbnz(InputRegisterAt(if_instr, 0), true_target);
+ __ Cbnz(InputRegisterAt(instruction, 0), true_target);
} else {
// The condition instruction has not been materialized, use its inputs as
// the comparison and its condition as the branch condition.
Register lhs = InputRegisterAt(condition, 0);
Operand rhs = InputOperandAt(condition, 1);
Condition arm64_cond = ARM64Condition(condition->GetCondition());
- if ((arm64_cond == eq || arm64_cond == ne) && rhs.IsImmediate() && (rhs.immediate() == 0)) {
- if (arm64_cond == eq) {
- __ Cbz(lhs, true_target);
- } else {
- __ Cbnz(lhs, true_target);
+ if ((arm64_cond != gt && arm64_cond != le) && rhs.IsImmediate() && (rhs.immediate() == 0)) {
+ switch (arm64_cond) {
+ case eq:
+ __ Cbz(lhs, true_target);
+ break;
+ case ne:
+ __ Cbnz(lhs, true_target);
+ break;
+ case lt:
+ // Test the sign bit and branch accordingly.
+ __ Tbnz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, true_target);
+ break;
+ case ge:
+ // Test the sign bit and branch accordingly.
+ __ Tbz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, true_target);
+ break;
+ default:
+ // Without the `static_cast` the compiler throws an error for
+ // `-Werror=sign-promo`.
+ LOG(FATAL) << "Unexpected condition: " << static_cast<int>(arm64_cond);
}
} else {
__ Cmp(lhs, rhs);
__ B(arm64_cond, true_target);
}
}
- if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), if_instr->IfFalseSuccessor())) {
+ if (false_target != nullptr) {
__ B(false_target);
}
}
+void LocationsBuilderARM64::VisitIf(HIf* if_instr) {
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
+ HInstruction* cond = if_instr->InputAt(0);
+ if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+ locations->SetInAt(0, Location::RequiresRegister());
+ }
+}
+
+void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) {
+ vixl::Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor());
+ vixl::Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor());
+ vixl::Label* always_true_target = true_target;
+ if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
+ if_instr->IfTrueSuccessor())) {
+ always_true_target = nullptr;
+ }
+ if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
+ if_instr->IfFalseSuccessor())) {
+ false_target = nullptr;
+ }
+ GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target);
+}
+
+void LocationsBuilderARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
+ LocationSummary* locations = new (GetGraph()->GetArena())
+ LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
+ HInstruction* cond = deoptimize->InputAt(0);
+ DCHECK(cond->IsCondition());
+ if (cond->AsCondition()->NeedsMaterialization()) {
+ locations->SetInAt(0, Location::RequiresRegister());
+ }
+}
+
+void InstructionCodeGeneratorARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
+ SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena())
+ DeoptimizationSlowPathARM64(deoptimize);
+ codegen_->AddSlowPath(slow_path);
+ vixl::Label* slow_path_entry = slow_path->GetEntryLabel();
+ GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry);
+}
+
void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
@@ -2349,8 +2465,11 @@ void LocationsBuilderARM64::VisitReturn(HReturn* instruction) {
void InstructionCodeGeneratorARM64::VisitReturn(HReturn* instruction) {
UNUSED(instruction);
+ GetAssembler()->cfi().RememberState();
codegen_->GenerateFrameExit();
__ Ret();
+ GetAssembler()->cfi().RestoreState();
+ GetAssembler()->cfi().DefCFAOffset(codegen_->GetFrameSize());
}
void LocationsBuilderARM64::VisitReturnVoid(HReturnVoid* instruction) {
@@ -2359,8 +2478,11 @@ void LocationsBuilderARM64::VisitReturnVoid(HReturnVoid* instruction) {
void InstructionCodeGeneratorARM64::VisitReturnVoid(HReturnVoid* instruction) {
UNUSED(instruction);
+ GetAssembler()->cfi().RememberState();
codegen_->GenerateFrameExit();
__ Ret();
+ GetAssembler()->cfi().RestoreState();
+ GetAssembler()->cfi().DefCFAOffset(codegen_->GetFrameSize());
}
void LocationsBuilderARM64::VisitShl(HShl* shl) {
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 2c624d2926..9430e31037 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -23,8 +23,8 @@
#include "nodes.h"
#include "parallel_move_resolver.h"
#include "utils/arm64/assembler_arm64.h"
-#include "a64/disasm-a64.h"
-#include "a64/macro-assembler-a64.h"
+#include "vixl/a64/disasm-a64.h"
+#include "vixl/a64/macro-assembler-a64.h"
#include "arch/arm64/quick_method_frame_info_arm64.h"
namespace art {
@@ -165,6 +165,10 @@ class InstructionCodeGeneratorARM64 : public HGraphVisitor {
void HandleShift(HBinaryOperation* instr);
void GenerateImplicitNullCheck(HNullCheck* instruction);
void GenerateExplicitNullCheck(HNullCheck* instruction);
+ void GenerateTestAndBranch(HInstruction* instruction,
+ vixl::Label* true_target,
+ vixl::Label* false_target,
+ vixl::Label* always_true_target);
Arm64Assembler* const assembler_;
CodeGeneratorARM64* const codegen_;
@@ -223,6 +227,8 @@ class CodeGeneratorARM64 : public CodeGenerator {
void GenerateFrameEntry() OVERRIDE;
void GenerateFrameExit() OVERRIDE;
+ void SpillRegisters(vixl::CPURegList registers, int offset);
+ void UnspillRegisters(vixl::CPURegList registers, int offset);
vixl::CPURegList GetFramePreservedCoreRegisters() const {
return vixl::CPURegList(vixl::CPURegister::kRegister, vixl::kXRegSize,
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 007e25ab4a..a6fb07fa98 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -52,7 +52,7 @@ class NullCheckSlowPathX86 : public SlowPathCodeX86 {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
__ Bind(GetEntryLabel());
__ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pThrowNullPointer)));
- codegen->RecordPcInfo(instruction_, instruction_->GetDexPc());
+ RecordPcInfo(codegen, instruction_, instruction_->GetDexPc());
}
private:
@@ -67,7 +67,7 @@ class DivZeroCheckSlowPathX86 : public SlowPathCodeX86 {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
__ Bind(GetEntryLabel());
__ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pThrowDivZero)));
- codegen->RecordPcInfo(instruction_, instruction_->GetDexPc());
+ RecordPcInfo(codegen, instruction_, instruction_->GetDexPc());
}
private:
@@ -116,7 +116,7 @@ class BoundsCheckSlowPathX86 : public SlowPathCodeX86 {
length_location_,
Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
__ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pThrowArrayBounds)));
- codegen->RecordPcInfo(instruction_, instruction_->GetDexPc());
+ RecordPcInfo(codegen, instruction_, instruction_->GetDexPc());
}
private:
@@ -137,7 +137,7 @@ class SuspendCheckSlowPathX86 : public SlowPathCodeX86 {
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, instruction_->GetLocations());
__ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pTestSuspend)));
- codegen->RecordPcInfo(instruction_, instruction_->GetDexPc());
+ RecordPcInfo(codegen, instruction_, instruction_->GetDexPc());
RestoreLiveRegisters(codegen, instruction_->GetLocations());
if (successor_ == nullptr) {
__ jmp(GetReturnLabel());
@@ -295,6 +295,27 @@ class TypeCheckSlowPathX86 : public SlowPathCodeX86 {
DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86);
};
+class DeoptimizationSlowPathX86 : public SlowPathCodeX86 {
+ public:
+ explicit DeoptimizationSlowPathX86(HInstruction* instruction)
+ : instruction_(instruction) {}
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ __ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, instruction_->GetLocations());
+ __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pDeoptimize)));
+ // No need to restore live registers.
+ DCHECK(instruction_->IsDeoptimize());
+ HDeoptimize* deoptimize = instruction_->AsDeoptimize();
+ uint32_t dex_pc = deoptimize->GetDexPc();
+ codegen->RecordPcInfo(instruction_, dex_pc, this);
+ }
+
+ private:
+ HInstruction* const instruction_;
+ DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86);
+};
+
#undef __
#define __ reinterpret_cast<X86Assembler*>(GetAssembler())->
@@ -340,7 +361,9 @@ size_t CodeGeneratorX86::RestoreFloatingPointRegister(size_t stack_index, uint32
return GetFloatingPointSpillSlotSize();
}
-CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, const CompilerOptions& compiler_options)
+CodeGeneratorX86::CodeGeneratorX86(HGraph* graph,
+ const X86InstructionSetFeatures& isa_features,
+ const CompilerOptions& compiler_options)
: CodeGenerator(graph,
kNumberOfCpuRegisters,
kNumberOfXmmRegisters,
@@ -353,7 +376,8 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, const CompilerOptions& compile
block_labels_(graph->GetArena(), 0),
location_builder_(graph, this),
instruction_visitor_(graph, this),
- move_resolver_(graph->GetArena(), this) {
+ move_resolver_(graph->GetArena(), this),
+ isa_features_(isa_features) {
// Use a fake return address register to mimic Quick.
AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
}
@@ -436,7 +460,12 @@ InstructionCodeGeneratorX86::InstructionCodeGeneratorX86(HGraph* graph, CodeGene
assembler_(codegen->GetAssembler()),
codegen_(codegen) {}
+static dwarf::Reg DWARFReg(Register reg) {
+ return dwarf::Reg::X86Core(static_cast<int>(reg));
+}
+
void CodeGeneratorX86::GenerateFrameEntry() {
+ __ cfi().SetCurrentCFAOffset(kX86WordSize); // return address
__ Bind(&frame_entry_label_);
bool skip_overflow_check =
IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86);
@@ -455,10 +484,14 @@ void CodeGeneratorX86::GenerateFrameEntry() {
Register reg = kCoreCalleeSaves[i];
if (allocated_registers_.ContainsCoreRegister(reg)) {
__ pushl(reg);
+ __ cfi().AdjustCFAOffset(kX86WordSize);
+ __ cfi().RelOffset(DWARFReg(reg), 0);
}
}
- __ subl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize()));
+ int adjust = GetFrameSize() - FrameEntrySpillSize();
+ __ subl(ESP, Immediate(adjust));
+ __ cfi().AdjustCFAOffset(adjust);
__ movl(Address(ESP, kCurrentMethodStackOffset), EAX);
}
@@ -467,12 +500,16 @@ void CodeGeneratorX86::GenerateFrameExit() {
return;
}
- __ addl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize()));
+ int adjust = GetFrameSize() - FrameEntrySpillSize();
+ __ addl(ESP, Immediate(adjust));
+ __ cfi().AdjustCFAOffset(-adjust);
for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
Register reg = kCoreCalleeSaves[i];
if (allocated_registers_.ContainsCoreRegister(reg)) {
__ popl(reg);
+ __ cfi().AdjustCFAOffset(-static_cast<int>(kX86WordSize));
+ __ cfi().Restore(DWARFReg(reg));
}
}
}
@@ -491,7 +528,6 @@ Location CodeGeneratorX86::GetStackLocation(HLoadLocal* load) const {
case Primitive::kPrimLong:
case Primitive::kPrimDouble:
return Location::DoubleStackSlot(GetStackSlot(load->GetLocal()));
- break;
case Primitive::kPrimInt:
case Primitive::kPrimNot:
@@ -504,10 +540,11 @@ Location CodeGeneratorX86::GetStackLocation(HLoadLocal* load) const {
case Primitive::kPrimShort:
case Primitive::kPrimVoid:
LOG(FATAL) << "Unexpected type " << load->GetType();
+ UNREACHABLE();
}
LOG(FATAL) << "Unreachable";
- return Location();
+ UNREACHABLE();
}
Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) {
@@ -785,24 +822,17 @@ void InstructionCodeGeneratorX86::VisitExit(HExit* exit) {
UNUSED(exit);
}
-void LocationsBuilderX86::VisitIf(HIf* if_instr) {
- LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall);
- HInstruction* cond = if_instr->InputAt(0);
- if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
- locations->SetInAt(0, Location::Any());
- }
-}
-
-void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) {
- HInstruction* cond = if_instr->InputAt(0);
+void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instruction,
+ Label* true_target,
+ Label* false_target,
+ Label* always_true_target) {
+ HInstruction* cond = instruction->InputAt(0);
if (cond->IsIntConstant()) {
// Constant condition, statically compared against 1.
int32_t cond_value = cond->AsIntConstant()->GetValue();
if (cond_value == 1) {
- if (!codegen_->GoesToNextBlock(if_instr->GetBlock(),
- if_instr->IfTrueSuccessor())) {
- __ jmp(codegen_->GetLabelOf(if_instr->IfTrueSuccessor()));
+ if (always_true_target != nullptr) {
+ __ jmp(always_true_target);
}
return;
} else {
@@ -815,20 +845,19 @@ void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) {
// evaluated just before the if, we don't need to evaluate it
// again.
bool eflags_set = cond->IsCondition()
- && cond->AsCondition()->IsBeforeWhenDisregardMoves(if_instr);
+ && cond->AsCondition()->IsBeforeWhenDisregardMoves(instruction);
if (materialized) {
if (!eflags_set) {
// Materialized condition, compare against 0.
- Location lhs = if_instr->GetLocations()->InAt(0);
+ Location lhs = instruction->GetLocations()->InAt(0);
if (lhs.IsRegister()) {
__ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>());
} else {
__ cmpl(Address(ESP, lhs.GetStackIndex()), Immediate(0));
}
- __ j(kNotEqual, codegen_->GetLabelOf(if_instr->IfTrueSuccessor()));
+ __ j(kNotEqual, true_target);
} else {
- __ j(X86Condition(cond->AsCondition()->GetCondition()),
- codegen_->GetLabelOf(if_instr->IfTrueSuccessor()));
+ __ j(X86Condition(cond->AsCondition()->GetCondition()), true_target);
}
} else {
Location lhs = cond->GetLocations()->InAt(0);
@@ -847,14 +876,54 @@ void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) {
} else {
__ cmpl(lhs.AsRegister<Register>(), Address(ESP, rhs.GetStackIndex()));
}
- __ j(X86Condition(cond->AsCondition()->GetCondition()),
- codegen_->GetLabelOf(if_instr->IfTrueSuccessor()));
+ __ j(X86Condition(cond->AsCondition()->GetCondition()), true_target);
}
}
- if (!codegen_->GoesToNextBlock(if_instr->GetBlock(),
- if_instr->IfFalseSuccessor())) {
- __ jmp(codegen_->GetLabelOf(if_instr->IfFalseSuccessor()));
+ if (false_target != nullptr) {
+ __ jmp(false_target);
+ }
+}
+
+void LocationsBuilderX86::VisitIf(HIf* if_instr) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall);
+ HInstruction* cond = if_instr->InputAt(0);
+ if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+ locations->SetInAt(0, Location::Any());
+ }
+}
+
+void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) {
+ Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor());
+ Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor());
+ Label* always_true_target = true_target;
+ if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
+ if_instr->IfTrueSuccessor())) {
+ always_true_target = nullptr;
+ }
+ if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
+ if_instr->IfFalseSuccessor())) {
+ false_target = nullptr;
}
+ GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target);
+}
+
+void LocationsBuilderX86::VisitDeoptimize(HDeoptimize* deoptimize) {
+ LocationSummary* locations = new (GetGraph()->GetArena())
+ LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
+ HInstruction* cond = deoptimize->InputAt(0);
+ DCHECK(cond->IsCondition());
+ if (cond->AsCondition()->NeedsMaterialization()) {
+ locations->SetInAt(0, Location::Any());
+ }
+}
+
+void InstructionCodeGeneratorX86::VisitDeoptimize(HDeoptimize* deoptimize) {
+ SlowPathCodeX86* slow_path = new (GetGraph()->GetArena())
+ DeoptimizationSlowPathX86(deoptimize);
+ codegen_->AddSlowPath(slow_path);
+ Label* slow_path_entry = slow_path->GetEntryLabel();
+ GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry);
}
void LocationsBuilderX86::VisitLocal(HLocal* local) {
@@ -1047,8 +1116,11 @@ void LocationsBuilderX86::VisitReturnVoid(HReturnVoid* ret) {
void InstructionCodeGeneratorX86::VisitReturnVoid(HReturnVoid* ret) {
UNUSED(ret);
+ __ cfi().RememberState();
codegen_->GenerateFrameExit();
__ ret();
+ __ cfi().RestoreState();
+ __ cfi().DefCFAOffset(codegen_->GetFrameSize());
}
void LocationsBuilderX86::VisitReturn(HReturn* ret) {
@@ -1106,12 +1178,15 @@ void InstructionCodeGeneratorX86::VisitReturn(HReturn* ret) {
LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType();
}
}
+ __ cfi().RememberState();
codegen_->GenerateFrameExit();
__ ret();
+ __ cfi().RestoreState();
+ __ cfi().DefCFAOffset(codegen_->GetFrameSize());
}
void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
- IntrinsicLocationsBuilderX86 intrinsic(GetGraph()->GetArena());
+ IntrinsicLocationsBuilderX86 intrinsic(codegen_);
if (intrinsic.TryDispatch(invoke)) {
return;
}
@@ -2637,16 +2712,16 @@ void LocationsBuilderX86::HandleShift(HBinaryOperation* op) {
switch (op->GetResultType()) {
case Primitive::kPrimInt: {
- locations->SetInAt(0, Location::RequiresRegister());
- // The shift count needs to be in CL.
+ locations->SetInAt(0, Location::Any());
+ // The shift count needs to be in CL or a constant.
locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1)));
locations->SetOut(Location::SameAsFirstInput());
break;
}
case Primitive::kPrimLong: {
locations->SetInAt(0, Location::RequiresRegister());
- // The shift count needs to be in CL.
- locations->SetInAt(1, Location::RegisterLocation(ECX));
+ // The shift count needs to be in CL or a constant.
+ locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1)));
locations->SetOut(Location::SameAsFirstInput());
break;
}
@@ -2665,38 +2740,87 @@ void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) {
switch (op->GetResultType()) {
case Primitive::kPrimInt: {
- Register first_reg = first.AsRegister<Register>();
- if (second.IsRegister()) {
- Register second_reg = second.AsRegister<Register>();
- DCHECK_EQ(ECX, second_reg);
- if (op->IsShl()) {
- __ shll(first_reg, second_reg);
- } else if (op->IsShr()) {
- __ sarl(first_reg, second_reg);
+ if (first.IsRegister()) {
+ Register first_reg = first.AsRegister<Register>();
+ if (second.IsRegister()) {
+ Register second_reg = second.AsRegister<Register>();
+ DCHECK_EQ(ECX, second_reg);
+ if (op->IsShl()) {
+ __ shll(first_reg, second_reg);
+ } else if (op->IsShr()) {
+ __ sarl(first_reg, second_reg);
+ } else {
+ __ shrl(first_reg, second_reg);
+ }
} else {
- __ shrl(first_reg, second_reg);
+ int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue;
+ if (shift == 0) {
+ return;
+ }
+ Immediate imm(shift);
+ if (op->IsShl()) {
+ __ shll(first_reg, imm);
+ } else if (op->IsShr()) {
+ __ sarl(first_reg, imm);
+ } else {
+ __ shrl(first_reg, imm);
+ }
}
} else {
- Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue);
- if (op->IsShl()) {
- __ shll(first_reg, imm);
- } else if (op->IsShr()) {
- __ sarl(first_reg, imm);
+ DCHECK(first.IsStackSlot()) << first;
+ Address addr(ESP, first.GetStackIndex());
+ if (second.IsRegister()) {
+ Register second_reg = second.AsRegister<Register>();
+ DCHECK_EQ(ECX, second_reg);
+ if (op->IsShl()) {
+ __ shll(addr, second_reg);
+ } else if (op->IsShr()) {
+ __ sarl(addr, second_reg);
+ } else {
+ __ shrl(addr, second_reg);
+ }
} else {
- __ shrl(first_reg, imm);
+ int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue;
+ if (shift == 0) {
+ return;
+ }
+ Immediate imm(shift);
+ if (op->IsShl()) {
+ __ shll(addr, imm);
+ } else if (op->IsShr()) {
+ __ sarl(addr, imm);
+ } else {
+ __ shrl(addr, imm);
+ }
}
}
+
break;
}
case Primitive::kPrimLong: {
- Register second_reg = second.AsRegister<Register>();
- DCHECK_EQ(ECX, second_reg);
- if (op->IsShl()) {
- GenerateShlLong(first, second_reg);
- } else if (op->IsShr()) {
- GenerateShrLong(first, second_reg);
+ if (second.IsRegister()) {
+ Register second_reg = second.AsRegister<Register>();
+ DCHECK_EQ(ECX, second_reg);
+ if (op->IsShl()) {
+ GenerateShlLong(first, second_reg);
+ } else if (op->IsShr()) {
+ GenerateShrLong(first, second_reg);
+ } else {
+ GenerateUShrLong(first, second_reg);
+ }
} else {
- GenerateUShrLong(first, second_reg);
+ // Shift by a constant.
+ int shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftValue;
+ // Nothing to do if the shift is 0, as the input is already the output.
+ if (shift != 0) {
+ if (op->IsShl()) {
+ GenerateShlLong(first, shift);
+ } else if (op->IsShr()) {
+ GenerateShrLong(first, shift);
+ } else {
+ GenerateUShrLong(first, shift);
+ }
+ }
}
break;
}
@@ -2705,6 +2829,26 @@ void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) {
}
}
+void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, int shift) {
+ Register low = loc.AsRegisterPairLow<Register>();
+ Register high = loc.AsRegisterPairHigh<Register>();
+ if (shift == 32) {
+ // Shift by 32 is easy. High gets low, and low gets 0.
+ codegen_->EmitParallelMoves(
+ loc.ToLow(), loc.ToHigh(),
+ Location::ConstantLocation(GetGraph()->GetIntConstant(0)), loc.ToLow());
+ } else if (shift > 32) {
+ // Low part becomes 0. High part is low part << (shift-32).
+ __ movl(high, low);
+ __ shll(high, Immediate(shift - 32));
+ __ xorl(low, low);
+ } else {
+ // Between 1 and 31.
+ __ shld(high, low, Immediate(shift));
+ __ shll(low, Immediate(shift));
+ }
+}
+
void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, Register shifter) {
Label done;
__ shld(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>(), shifter);
@@ -2716,6 +2860,27 @@ void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, Register
__ Bind(&done);
}
+void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, int shift) {
+ Register low = loc.AsRegisterPairLow<Register>();
+ Register high = loc.AsRegisterPairHigh<Register>();
+ if (shift == 32) {
+ // Need to copy the sign.
+ DCHECK_NE(low, high);
+ __ movl(low, high);
+ __ sarl(high, Immediate(31));
+ } else if (shift > 32) {
+ DCHECK_NE(low, high);
+ // High part becomes sign. Low part is shifted by shift - 32.
+ __ movl(low, high);
+ __ sarl(high, Immediate(31));
+ __ shrl(low, Immediate(shift - 32));
+ } else {
+ // Between 1 and 31.
+ __ shrd(low, high, Immediate(shift));
+ __ sarl(high, Immediate(shift));
+ }
+}
+
void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, Register shifter) {
Label done;
__ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
@@ -2727,6 +2892,26 @@ void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, Register
__ Bind(&done);
}
+void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, int shift) {
+ Register low = loc.AsRegisterPairLow<Register>();
+ Register high = loc.AsRegisterPairHigh<Register>();
+ if (shift == 32) {
+ // Shift by 32 is easy. Low gets high, and high gets 0.
+ codegen_->EmitParallelMoves(
+ loc.ToHigh(), loc.ToLow(),
+ Location::ConstantLocation(GetGraph()->GetIntConstant(0)), loc.ToHigh());
+ } else if (shift > 32) {
+ // Low part is high >> (shift - 32). High part becomes 0.
+ __ movl(low, high);
+ __ shrl(low, Immediate(shift - 32));
+ __ xorl(high, high);
+ } else {
+ // Between 1 and 31.
+ __ shrd(low, high, Immediate(shift));
+ __ shrl(high, Immediate(shift));
+ }
+}
+
void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, Register shifter) {
Label done;
__ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
@@ -3301,7 +3486,7 @@ void InstructionCodeGeneratorX86::GenerateExplicitNullCheck(HNullCheck* instruct
Location obj = locations->InAt(0);
if (obj.IsRegister()) {
- __ cmpl(obj.AsRegister<Register>(), Immediate(0));
+ __ testl(obj.AsRegister<Register>(), obj.AsRegister<Register>());
} else if (obj.IsStackSlot()) {
__ cmpl(Address(ESP, obj.GetStackIndex()), Immediate(0));
} else {
@@ -3487,7 +3672,13 @@ void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) {
// Ensure the value is in a byte register.
locations->SetInAt(2, Location::ByteRegisterOrConstant(EAX, instruction->InputAt(2)));
} else {
- locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
+ bool is_fp_type = (value_type == Primitive::kPrimFloat)
+ || (value_type == Primitive::kPrimDouble);
+ if (is_fp_type) {
+ locations->SetInAt(2, Location::RequiresFpuRegister());
+ } else {
+ locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
+ }
}
// Temporary registers for the write barrier.
if (needs_write_barrier) {
@@ -3766,23 +3957,43 @@ X86Assembler* ParallelMoveResolverX86::GetAssembler() const {
}
void ParallelMoveResolverX86::MoveMemoryToMemory32(int dst, int src) {
- ScratchRegisterScope ensure_scratch(
- this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
- Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
- int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
- __ movl(temp_reg, Address(ESP, src + stack_offset));
- __ movl(Address(ESP, dst + stack_offset), temp_reg);
+ ScratchRegisterScope possible_scratch(
+ this, kNoRegister, codegen_->GetNumberOfCoreRegisters());
+ int temp = possible_scratch.GetRegister();
+ if (temp == kNoRegister) {
+ // Use the stack.
+ __ pushl(Address(ESP, src));
+ __ popl(Address(ESP, dst));
+ } else {
+ Register temp_reg = static_cast<Register>(temp);
+ __ movl(temp_reg, Address(ESP, src));
+ __ movl(Address(ESP, dst), temp_reg);
+ }
}
void ParallelMoveResolverX86::MoveMemoryToMemory64(int dst, int src) {
- ScratchRegisterScope ensure_scratch(
- this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
- Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
- int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
- __ movl(temp_reg, Address(ESP, src + stack_offset));
- __ movl(Address(ESP, dst + stack_offset), temp_reg);
- __ movl(temp_reg, Address(ESP, src + stack_offset + kX86WordSize));
- __ movl(Address(ESP, dst + stack_offset + kX86WordSize), temp_reg);
+ ScratchRegisterScope possible_scratch(
+ this, kNoRegister, codegen_->GetNumberOfCoreRegisters());
+ int temp = possible_scratch.GetRegister();
+ if (temp == kNoRegister) {
+ // Use the stack instead.
+ // Push src low word.
+ __ pushl(Address(ESP, src));
+ // Push src high word. Stack offset = 4.
+ __ pushl(Address(ESP, src + 4 /* offset */ + kX86WordSize /* high */));
+
+ // Pop into dst high word. Stack offset = 8.
+ // Pop with ESP address uses the 'after increment' value of ESP.
+ __ popl(Address(ESP, dst + 4 /* offset */ + kX86WordSize /* high */));
+ // Finally dst low word. Stack offset = 4.
+ __ popl(Address(ESP, dst));
+ } else {
+ Register temp_reg = static_cast<Register>(temp);
+ __ movl(temp_reg, Address(ESP, src));
+ __ movl(Address(ESP, dst), temp_reg);
+ __ movl(temp_reg, Address(ESP, src + kX86WordSize));
+ __ movl(Address(ESP, dst + kX86WordSize), temp_reg);
+ }
}
void ParallelMoveResolverX86::EmitMove(size_t index) {
@@ -3847,10 +4058,18 @@ void ParallelMoveResolverX86::EmitMove(size_t index) {
__ xorps(dest, dest);
} else {
ScratchRegisterScope ensure_scratch(
- this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
- Register temp = static_cast<Register>(ensure_scratch.GetRegister());
- __ movl(temp, Immediate(value));
- __ movd(dest, temp);
+ this, kNoRegister, codegen_->GetNumberOfCoreRegisters());
+ int temp_reg = ensure_scratch.GetRegister();
+ if (temp_reg == kNoRegister) {
+ // Avoid spilling/restoring a scratch register by using the stack.
+ __ pushl(Immediate(value));
+ __ movss(dest, Address(ESP, 0));
+ __ addl(ESP, Immediate(4));
+ } else {
+ Register temp = static_cast<Register>(temp_reg);
+ __ movl(temp, Immediate(value));
+ __ movd(dest, temp);
+ }
}
} else {
DCHECK(destination.IsStackSlot()) << destination;
@@ -3899,42 +4118,96 @@ void ParallelMoveResolverX86::EmitMove(size_t index) {
}
}
-void ParallelMoveResolverX86::Exchange(Register reg, int mem) {
- Register suggested_scratch = reg == EAX ? EBX : EAX;
- ScratchRegisterScope ensure_scratch(
- this, reg, suggested_scratch, codegen_->GetNumberOfCoreRegisters());
+void ParallelMoveResolverX86::Exchange(Register reg1, Register reg2) {
+ // Prefer to avoid xchg as it isn't speedy on smaller processors.
+ ScratchRegisterScope possible_scratch(
+ this, reg1, codegen_->GetNumberOfCoreRegisters());
+ int temp_reg = possible_scratch.GetRegister();
+ if (temp_reg == kNoRegister || temp_reg == reg2) {
+ __ pushl(reg1);
+ __ movl(reg1, reg2);
+ __ popl(reg2);
+ } else {
+ Register temp = static_cast<Register>(temp_reg);
+ __ movl(temp, reg1);
+ __ movl(reg1, reg2);
+ __ movl(reg2, temp);
+ }
+}
- int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
- __ movl(static_cast<Register>(ensure_scratch.GetRegister()), Address(ESP, mem + stack_offset));
- __ movl(Address(ESP, mem + stack_offset), reg);
- __ movl(reg, static_cast<Register>(ensure_scratch.GetRegister()));
+void ParallelMoveResolverX86::Exchange(Register reg, int mem) {
+ ScratchRegisterScope possible_scratch(
+ this, reg, codegen_->GetNumberOfCoreRegisters());
+ int temp_reg = possible_scratch.GetRegister();
+ if (temp_reg == kNoRegister) {
+ __ pushl(Address(ESP, mem));
+ __ movl(Address(ESP, mem + kX86WordSize), reg);
+ __ popl(reg);
+ } else {
+ Register temp = static_cast<Register>(temp_reg);
+ __ movl(temp, Address(ESP, mem));
+ __ movl(Address(ESP, mem), reg);
+ __ movl(reg, temp);
+ }
}
void ParallelMoveResolverX86::Exchange32(XmmRegister reg, int mem) {
- ScratchRegisterScope ensure_scratch(
- this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
-
- Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
- int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
- __ movl(temp_reg, Address(ESP, mem + stack_offset));
- __ movss(Address(ESP, mem + stack_offset), reg);
- __ movd(reg, temp_reg);
+ ScratchRegisterScope possible_scratch(
+ this, kNoRegister, codegen_->GetNumberOfCoreRegisters());
+ int temp_reg = possible_scratch.GetRegister();
+ if (temp_reg == kNoRegister) {
+ __ pushl(Address(ESP, mem));
+ __ movss(Address(ESP, mem + kX86WordSize), reg);
+ __ movss(reg, Address(ESP, 0));
+ __ addl(ESP, Immediate(kX86WordSize));
+ } else {
+ Register temp = static_cast<Register>(temp_reg);
+ __ movl(temp, Address(ESP, mem));
+ __ movss(Address(ESP, mem), reg);
+ __ movd(reg, temp);
+ }
}
void ParallelMoveResolverX86::Exchange(int mem1, int mem2) {
- ScratchRegisterScope ensure_scratch1(
- this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
-
- Register suggested_scratch = ensure_scratch1.GetRegister() == EAX ? EBX : EAX;
- ScratchRegisterScope ensure_scratch2(
- this, ensure_scratch1.GetRegister(), suggested_scratch, codegen_->GetNumberOfCoreRegisters());
-
- int stack_offset = ensure_scratch1.IsSpilled() ? kX86WordSize : 0;
- stack_offset += ensure_scratch2.IsSpilled() ? kX86WordSize : 0;
- __ movl(static_cast<Register>(ensure_scratch1.GetRegister()), Address(ESP, mem1 + stack_offset));
- __ movl(static_cast<Register>(ensure_scratch2.GetRegister()), Address(ESP, mem2 + stack_offset));
- __ movl(Address(ESP, mem2 + stack_offset), static_cast<Register>(ensure_scratch1.GetRegister()));
- __ movl(Address(ESP, mem1 + stack_offset), static_cast<Register>(ensure_scratch2.GetRegister()));
+ ScratchRegisterScope possible_scratch1(
+ this, kNoRegister, codegen_->GetNumberOfCoreRegisters());
+ int temp_reg1 = possible_scratch1.GetRegister();
+ if (temp_reg1 == kNoRegister) {
+ // No free registers. Use the stack.
+ __ pushl(Address(ESP, mem1));
+ __ pushl(Address(ESP, mem2 + kX86WordSize));
+ // Pop with ESP address uses the 'after increment' value of ESP.
+ __ popl(Address(ESP, mem1 + kX86WordSize));
+ __ popl(Address(ESP, mem2));
+ } else {
+ // Got the first one. Try for a second.
+ ScratchRegisterScope possible_scratch2(
+ this, temp_reg1, codegen_->GetNumberOfCoreRegisters());
+ int temp_reg2 = possible_scratch2.GetRegister();
+ if (temp_reg2 == kNoRegister) {
+ Register temp = static_cast<Register>(temp_reg1);
+ // Bummer. Only have one free register to use.
+ // Save mem1 on the stack.
+ __ pushl(Address(ESP, mem1));
+
+ // Copy mem2 into mem1.
+ __ movl(temp, Address(ESP, mem2 + kX86WordSize));
+ __ movl(Address(ESP, mem1 + kX86WordSize), temp);
+
+ // Now pop mem1 into mem2.
+ // Pop with ESP address uses the 'after increment' value of ESP.
+ __ popl(Address(ESP, mem2));
+ } else {
+ // Great. We have 2 registers to play with.
+ Register temp1 = static_cast<Register>(temp_reg1);
+ Register temp2 = static_cast<Register>(temp_reg2);
+ DCHECK_NE(temp1, temp2);
+ __ movl(temp1, Address(ESP, mem1));
+ __ movl(temp2, Address(ESP, mem2));
+ __ movl(Address(ESP, mem2), temp1);
+ __ movl(Address(ESP, mem1), temp2);
+ }
+ }
}
void ParallelMoveResolverX86::EmitSwap(size_t index) {
@@ -3943,7 +4216,7 @@ void ParallelMoveResolverX86::EmitSwap(size_t index) {
Location destination = move->GetDestination();
if (source.IsRegister() && destination.IsRegister()) {
- __ xchgl(destination.AsRegister<Register>(), source.AsRegister<Register>());
+ Exchange(destination.AsRegister<Register>(), source.AsRegister<Register>());
} else if (source.IsRegister() && destination.IsStackSlot()) {
Exchange(source.AsRegister<Register>(), destination.GetStackIndex());
} else if (source.IsStackSlot() && destination.IsRegister()) {
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index a5489d2844..8c56e35329 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -106,6 +106,7 @@ class ParallelMoveResolverX86 : public ParallelMoveResolver {
X86Assembler* GetAssembler() const;
private:
+ void Exchange(Register reg1, Register Reg2);
void Exchange(Register reg, int mem);
void Exchange(int mem1, int mem2);
void Exchange32(XmmRegister reg, int mem);
@@ -171,6 +172,9 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor {
void GenerateShlLong(const Location& loc, Register shifter);
void GenerateShrLong(const Location& loc, Register shifter);
void GenerateUShrLong(const Location& loc, Register shifter);
+ void GenerateShlLong(const Location& loc, int shift);
+ void GenerateShrLong(const Location& loc, int shift);
+ void GenerateUShrLong(const Location& loc, int shift);
void GenerateMemoryBarrier(MemBarrierKind kind);
void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
@@ -179,6 +183,10 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor {
void GenerateImplicitNullCheck(HNullCheck* instruction);
void GenerateExplicitNullCheck(HNullCheck* instruction);
+ void GenerateTestAndBranch(HInstruction* instruction,
+ Label* true_target,
+ Label* false_target,
+ Label* always_true_target);
X86Assembler* const assembler_;
CodeGeneratorX86* const codegen_;
@@ -188,7 +196,9 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor {
class CodeGeneratorX86 : public CodeGenerator {
public:
- CodeGeneratorX86(HGraph* graph, const CompilerOptions& compiler_options);
+ CodeGeneratorX86(HGraph* graph,
+ const X86InstructionSetFeatures& isa_features,
+ const CompilerOptions& compiler_options);
virtual ~CodeGeneratorX86() {}
void GenerateFrameEntry() OVERRIDE;
@@ -274,6 +284,10 @@ class CodeGeneratorX86 : public CodeGenerator {
Label* GetFrameEntryLabel() { return &frame_entry_label_; }
+ const X86InstructionSetFeatures& GetInstructionSetFeatures() const {
+ return isa_features_;
+ }
+
private:
// Labels for each block that will be compiled.
GrowableArray<Label> block_labels_;
@@ -282,6 +296,7 @@ class CodeGeneratorX86 : public CodeGenerator {
InstructionCodeGeneratorX86 instruction_visitor_;
ParallelMoveResolverX86 move_resolver_;
X86Assembler assembler_;
+ const X86InstructionSetFeatures& isa_features_;
DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86);
};
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 2bb0349932..01b24ea33f 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -315,6 +315,27 @@ class TypeCheckSlowPathX86_64 : public SlowPathCodeX86_64 {
DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86_64);
};
+class DeoptimizationSlowPathX86_64 : public SlowPathCodeX86_64 {
+ public:
+ explicit DeoptimizationSlowPathX86_64(HInstruction* instruction)
+ : instruction_(instruction) {}
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ __ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, instruction_->GetLocations());
+ __ gs()->call(
+ Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pDeoptimize), true));
+ DCHECK(instruction_->IsDeoptimize());
+ HDeoptimize* deoptimize = instruction_->AsDeoptimize();
+ uint32_t dex_pc = deoptimize->GetDexPc();
+ codegen->RecordPcInfo(instruction_, dex_pc, this);
+ }
+
+ private:
+ HInstruction* const instruction_;
+ DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64);
+};
+
#undef __
#define __ reinterpret_cast<X86_64Assembler*>(GetAssembler())->
@@ -391,7 +412,9 @@ size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uin
static constexpr int kNumberOfCpuRegisterPairs = 0;
// Use a fake return address register to mimic Quick.
static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1);
-CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, const CompilerOptions& compiler_options)
+CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
+ const X86_64InstructionSetFeatures& isa_features,
+ const CompilerOptions& compiler_options)
: CodeGenerator(graph,
kNumberOfCpuRegisters,
kNumberOfFloatRegisters,
@@ -405,7 +428,9 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, const CompilerOptions& c
block_labels_(graph->GetArena(), 0),
location_builder_(graph, this),
instruction_visitor_(graph, this),
- move_resolver_(graph->GetArena(), this) {
+ move_resolver_(graph->GetArena(), this),
+ isa_features_(isa_features),
+ constant_area_start_(0) {
AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
}
@@ -458,7 +483,15 @@ void CodeGeneratorX86_64::SetupBlockedRegisters(bool is_baseline) const {
}
}
+static dwarf::Reg DWARFReg(Register reg) {
+ return dwarf::Reg::X86_64Core(static_cast<int>(reg));
+}
+static dwarf::Reg DWARFReg(FloatRegister reg) {
+ return dwarf::Reg::X86_64Fp(static_cast<int>(reg));
+}
+
void CodeGeneratorX86_64::GenerateFrameEntry() {
+ __ cfi().SetCurrentCFAOffset(kX86_64WordSize); // return address
__ Bind(&frame_entry_label_);
bool skip_overflow_check = IsLeafMethod()
&& !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64);
@@ -478,17 +511,22 @@ void CodeGeneratorX86_64::GenerateFrameEntry() {
Register reg = kCoreCalleeSaves[i];
if (allocated_registers_.ContainsCoreRegister(reg)) {
__ pushq(CpuRegister(reg));
+ __ cfi().AdjustCFAOffset(kX86_64WordSize);
+ __ cfi().RelOffset(DWARFReg(reg), 0);
}
}
- __ subq(CpuRegister(RSP), Immediate(GetFrameSize() - GetCoreSpillSize()));
+ int adjust = GetFrameSize() - GetCoreSpillSize();
+ __ subq(CpuRegister(RSP), Immediate(adjust));
+ __ cfi().AdjustCFAOffset(adjust);
uint32_t xmm_spill_location = GetFpuSpillStart();
size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize();
for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
- __ movsd(Address(CpuRegister(RSP), xmm_spill_location + (xmm_spill_slot_size * i)),
- XmmRegister(kFpuCalleeSaves[i]));
+ int offset = xmm_spill_location + (xmm_spill_slot_size * i);
+ __ movsd(Address(CpuRegister(RSP), offset), XmmRegister(kFpuCalleeSaves[i]));
+ __ cfi().RelOffset(DWARFReg(kFpuCalleeSaves[i]), offset);
}
}
@@ -503,17 +541,22 @@ void CodeGeneratorX86_64::GenerateFrameExit() {
size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize();
for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
- __ movsd(XmmRegister(kFpuCalleeSaves[i]),
- Address(CpuRegister(RSP), xmm_spill_location + (xmm_spill_slot_size * i)));
+ int offset = xmm_spill_location + (xmm_spill_slot_size * i);
+ __ movsd(XmmRegister(kFpuCalleeSaves[i]), Address(CpuRegister(RSP), offset));
+ __ cfi().Restore(DWARFReg(kFpuCalleeSaves[i]));
}
}
- __ addq(CpuRegister(RSP), Immediate(GetFrameSize() - GetCoreSpillSize()));
+ int adjust = GetFrameSize() - GetCoreSpillSize();
+ __ addq(CpuRegister(RSP), Immediate(adjust));
+ __ cfi().AdjustCFAOffset(-adjust);
for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
Register reg = kCoreCalleeSaves[i];
if (allocated_registers_.ContainsCoreRegister(reg)) {
__ popq(CpuRegister(reg));
+ __ cfi().AdjustCFAOffset(-static_cast<int>(kX86_64WordSize));
+ __ cfi().Restore(DWARFReg(reg));
}
}
}
@@ -532,7 +575,6 @@ Location CodeGeneratorX86_64::GetStackLocation(HLoadLocal* load) const {
case Primitive::kPrimLong:
case Primitive::kPrimDouble:
return Location::DoubleStackSlot(GetStackSlot(load->GetLocal()));
- break;
case Primitive::kPrimInt:
case Primitive::kPrimNot:
@@ -545,10 +587,11 @@ Location CodeGeneratorX86_64::GetStackLocation(HLoadLocal* load) const {
case Primitive::kPrimShort:
case Primitive::kPrimVoid:
LOG(FATAL) << "Unexpected type " << load->GetType();
+ UNREACHABLE();
}
LOG(FATAL) << "Unreachable";
- return Location();
+ UNREACHABLE();
}
void CodeGeneratorX86_64::Move(Location destination, Location source) {
@@ -607,7 +650,7 @@ void CodeGeneratorX86_64::Move(Location destination, Location source) {
source.AsFpuRegister<XmmRegister>());
} else if (source.IsConstant()) {
HConstant* constant = source.GetConstant();
- int64_t value = constant->AsLongConstant()->GetValue();
+ int64_t value;
if (constant->IsDoubleConstant()) {
value = bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue());
} else {
@@ -735,24 +778,17 @@ void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit) {
UNUSED(exit);
}
-void LocationsBuilderX86_64::VisitIf(HIf* if_instr) {
- LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall);
- HInstruction* cond = if_instr->InputAt(0);
- if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
- locations->SetInAt(0, Location::Any());
- }
-}
-
-void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
- HInstruction* cond = if_instr->InputAt(0);
+void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruction,
+ Label* true_target,
+ Label* false_target,
+ Label* always_true_target) {
+ HInstruction* cond = instruction->InputAt(0);
if (cond->IsIntConstant()) {
// Constant condition, statically compared against 1.
int32_t cond_value = cond->AsIntConstant()->GetValue();
if (cond_value == 1) {
- if (!codegen_->GoesToNextBlock(if_instr->GetBlock(),
- if_instr->IfTrueSuccessor())) {
- __ jmp(codegen_->GetLabelOf(if_instr->IfTrueSuccessor()));
+ if (always_true_target != nullptr) {
+ __ jmp(always_true_target);
}
return;
} else {
@@ -765,21 +801,20 @@ void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
// evaluated just before the if, we don't need to evaluate it
// again.
bool eflags_set = cond->IsCondition()
- && cond->AsCondition()->IsBeforeWhenDisregardMoves(if_instr);
+ && cond->AsCondition()->IsBeforeWhenDisregardMoves(instruction);
if (materialized) {
if (!eflags_set) {
// Materialized condition, compare against 0.
- Location lhs = if_instr->GetLocations()->InAt(0);
+ Location lhs = instruction->GetLocations()->InAt(0);
if (lhs.IsRegister()) {
__ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
} else {
__ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()),
Immediate(0));
}
- __ j(kNotEqual, codegen_->GetLabelOf(if_instr->IfTrueSuccessor()));
+ __ j(kNotEqual, true_target);
} else {
- __ j(X86_64Condition(cond->AsCondition()->GetCondition()),
- codegen_->GetLabelOf(if_instr->IfTrueSuccessor()));
+ __ j(X86_64Condition(cond->AsCondition()->GetCondition()), true_target);
}
} else {
Location lhs = cond->GetLocations()->InAt(0);
@@ -797,16 +832,56 @@ void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
__ cmpl(lhs.AsRegister<CpuRegister>(),
Address(CpuRegister(RSP), rhs.GetStackIndex()));
}
- __ j(X86_64Condition(cond->AsCondition()->GetCondition()),
- codegen_->GetLabelOf(if_instr->IfTrueSuccessor()));
+ __ j(X86_64Condition(cond->AsCondition()->GetCondition()), true_target);
}
}
- if (!codegen_->GoesToNextBlock(if_instr->GetBlock(),
- if_instr->IfFalseSuccessor())) {
- __ jmp(codegen_->GetLabelOf(if_instr->IfFalseSuccessor()));
+ if (false_target != nullptr) {
+ __ jmp(false_target);
+ }
+}
+
+void LocationsBuilderX86_64::VisitIf(HIf* if_instr) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall);
+ HInstruction* cond = if_instr->InputAt(0);
+ if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+ locations->SetInAt(0, Location::Any());
}
}
+void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
+ Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor());
+ Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor());
+ Label* always_true_target = true_target;
+ if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
+ if_instr->IfTrueSuccessor())) {
+ always_true_target = nullptr;
+ }
+ if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
+ if_instr->IfFalseSuccessor())) {
+ false_target = nullptr;
+ }
+ GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target);
+}
+
+void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
+ LocationSummary* locations = new (GetGraph()->GetArena())
+ LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
+ HInstruction* cond = deoptimize->InputAt(0);
+ DCHECK(cond->IsCondition());
+ if (cond->AsCondition()->NeedsMaterialization()) {
+ locations->SetInAt(0, Location::Any());
+ }
+}
+
+void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
+ SlowPathCodeX86_64* slow_path = new (GetGraph()->GetArena())
+ DeoptimizationSlowPathX86_64(deoptimize);
+ codegen_->AddSlowPath(slow_path);
+ Label* slow_path_entry = slow_path->GetEntryLabel();
+ GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry);
+}
+
void LocationsBuilderX86_64::VisitLocal(HLocal* local) {
local->SetLocations(nullptr);
}
@@ -1068,8 +1143,11 @@ void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) {
void InstructionCodeGeneratorX86_64::VisitReturnVoid(HReturnVoid* ret) {
UNUSED(ret);
+ __ cfi().RememberState();
codegen_->GenerateFrameExit();
__ ret();
+ __ cfi().RestoreState();
+ __ cfi().DefCFAOffset(codegen_->GetFrameSize());
}
void LocationsBuilderX86_64::VisitReturn(HReturn* ret) {
@@ -1120,8 +1198,11 @@ void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) {
LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
}
}
+ __ cfi().RememberState();
codegen_->GenerateFrameExit();
__ ret();
+ __ cfi().RestoreState();
+ __ cfi().DefCFAOffset(codegen_->GetFrameSize());
}
Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) {
@@ -1181,7 +1262,7 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type
}
void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
- IntrinsicLocationsBuilderX86_64 intrinsic(GetGraph()->GetArena());
+ IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
if (intrinsic.TryDispatch(invoke)) {
return;
}
@@ -1242,7 +1323,7 @@ void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) {
}
void LocationsBuilderX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
- IntrinsicLocationsBuilderX86_64 intrinsic(GetGraph()->GetArena());
+ IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
if (intrinsic.TryDispatch(invoke)) {
return;
}
@@ -1896,7 +1977,7 @@ void LocationsBuilderX86_64::VisitAdd(HAdd* add) {
case Primitive::kPrimDouble:
case Primitive::kPrimFloat: {
locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::Any());
locations->SetOut(Location::SameAsFirstInput());
break;
}
@@ -1960,12 +2041,30 @@ void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) {
}
case Primitive::kPrimFloat: {
- __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ if (second.IsFpuRegister()) {
+ __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ } else if (second.IsConstant()) {
+ __ addss(first.AsFpuRegister<XmmRegister>(),
+ codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+ } else {
+ DCHECK(second.IsStackSlot());
+ __ addss(first.AsFpuRegister<XmmRegister>(),
+ Address(CpuRegister(RSP), second.GetStackIndex()));
+ }
break;
}
case Primitive::kPrimDouble: {
- __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ if (second.IsFpuRegister()) {
+ __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ } else if (second.IsConstant()) {
+ __ addsd(first.AsFpuRegister<XmmRegister>(),
+ codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+ } else {
+ DCHECK(second.IsDoubleStackSlot());
+ __ addsd(first.AsFpuRegister<XmmRegister>(),
+ Address(CpuRegister(RSP), second.GetStackIndex()));
+ }
break;
}
@@ -1993,7 +2092,7 @@ void LocationsBuilderX86_64::VisitSub(HSub* sub) {
case Primitive::kPrimFloat:
case Primitive::kPrimDouble: {
locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::Any());
locations->SetOut(Location::SameAsFirstInput());
break;
}
@@ -2031,12 +2130,30 @@ void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) {
}
case Primitive::kPrimFloat: {
- __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ if (second.IsFpuRegister()) {
+ __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ } else if (second.IsConstant()) {
+ __ subss(first.AsFpuRegister<XmmRegister>(),
+ codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+ } else {
+ DCHECK(second.IsStackSlot());
+ __ subss(first.AsFpuRegister<XmmRegister>(),
+ Address(CpuRegister(RSP), second.GetStackIndex()));
+ }
break;
}
case Primitive::kPrimDouble: {
- __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ if (second.IsFpuRegister()) {
+ __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ } else if (second.IsConstant()) {
+ __ subsd(first.AsFpuRegister<XmmRegister>(),
+ codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+ } else {
+ DCHECK(second.IsDoubleStackSlot());
+ __ subsd(first.AsFpuRegister<XmmRegister>(),
+ Address(CpuRegister(RSP), second.GetStackIndex()));
+ }
break;
}
@@ -2069,7 +2186,7 @@ void LocationsBuilderX86_64::VisitMul(HMul* mul) {
case Primitive::kPrimFloat:
case Primitive::kPrimDouble: {
locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::Any());
locations->SetOut(Location::SameAsFirstInput());
break;
}
@@ -2114,13 +2231,31 @@ void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) {
case Primitive::kPrimFloat: {
DCHECK(first.Equals(locations->Out()));
- __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ if (second.IsFpuRegister()) {
+ __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ } else if (second.IsConstant()) {
+ __ mulss(first.AsFpuRegister<XmmRegister>(),
+ codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+ } else {
+ DCHECK(second.IsStackSlot());
+ __ mulss(first.AsFpuRegister<XmmRegister>(),
+ Address(CpuRegister(RSP), second.GetStackIndex()));
+ }
break;
}
case Primitive::kPrimDouble: {
DCHECK(first.Equals(locations->Out()));
- __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ if (second.IsFpuRegister()) {
+ __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ } else if (second.IsConstant()) {
+ __ mulsd(first.AsFpuRegister<XmmRegister>(),
+ codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+ } else {
+ DCHECK(second.IsDoubleStackSlot());
+ __ mulsd(first.AsFpuRegister<XmmRegister>(),
+ Address(CpuRegister(RSP), second.GetStackIndex()));
+ }
break;
}
@@ -2493,7 +2628,7 @@ void LocationsBuilderX86_64::VisitDiv(HDiv* div) {
case Primitive::kPrimFloat:
case Primitive::kPrimDouble: {
locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::Any());
locations->SetOut(Location::SameAsFirstInput());
break;
}
@@ -2518,12 +2653,30 @@ void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) {
}
case Primitive::kPrimFloat: {
- __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ if (second.IsFpuRegister()) {
+ __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ } else if (second.IsConstant()) {
+ __ divss(first.AsFpuRegister<XmmRegister>(),
+ codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+ } else {
+ DCHECK(second.IsStackSlot());
+ __ divss(first.AsFpuRegister<XmmRegister>(),
+ Address(CpuRegister(RSP), second.GetStackIndex()));
+ }
break;
}
case Primitive::kPrimDouble: {
- __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ if (second.IsFpuRegister()) {
+ __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ } else if (second.IsConstant()) {
+ __ divsd(first.AsFpuRegister<XmmRegister>(),
+ codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+ } else {
+ DCHECK(second.IsDoubleStackSlot());
+ __ divsd(first.AsFpuRegister<XmmRegister>(),
+ Address(CpuRegister(RSP), second.GetStackIndex()));
+ }
break;
}
@@ -3668,15 +3821,27 @@ void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg, int mem) {
void ParallelMoveResolverX86_64::Exchange64(int mem1, int mem2) {
ScratchRegisterScope ensure_scratch(
- this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
+ this, TMP, codegen_->GetNumberOfCoreRegisters());
- int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
- __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
- __ movq(CpuRegister(ensure_scratch.GetRegister()),
- Address(CpuRegister(RSP), mem2 + stack_offset));
- __ movq(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
- __ movq(Address(CpuRegister(RSP), mem1 + stack_offset),
- CpuRegister(ensure_scratch.GetRegister()));
+ int temp_reg = ensure_scratch.GetRegister();
+ if (temp_reg == kNoRegister) {
+ // Use the stack as a temporary.
+ // Save mem1 on the stack.
+ __ pushq(Address(CpuRegister(RSP), mem1));
+
+ // Copy mem2 into mem1.
+ __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem2 + kX86_64WordSize));
+ __ movq(Address(CpuRegister(RSP), mem1 + kX86_64WordSize), CpuRegister(TMP));
+
+ // Now pop mem1 into mem2.
+ __ popq(Address(CpuRegister(RSP), mem2));
+ } else {
+ CpuRegister temp = CpuRegister(temp_reg);
+ __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem1));
+ __ movq(temp, Address(CpuRegister(RSP), mem2));
+ __ movq(Address(CpuRegister(RSP), mem2), CpuRegister(TMP));
+ __ movq(Address(CpuRegister(RSP), mem1), temp);
+ }
}
void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) {
@@ -3685,6 +3850,13 @@ void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) {
__ movd(reg, CpuRegister(TMP));
}
+void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg1, CpuRegister reg2) {
+ // Prefer to avoid xchg as it isn't speedy on smaller processors.
+ __ movq(CpuRegister(TMP), reg1);
+ __ movq(reg1, reg2);
+ __ movq(reg2, CpuRegister(TMP));
+}
+
void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) {
__ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
__ movsd(Address(CpuRegister(RSP), mem), reg);
@@ -3697,7 +3869,7 @@ void ParallelMoveResolverX86_64::EmitSwap(size_t index) {
Location destination = move->GetDestination();
if (source.IsRegister() && destination.IsRegister()) {
- __ xchgq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
+ Exchange64(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
} else if (source.IsRegister() && destination.IsStackSlot()) {
Exchange32(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
} else if (source.IsStackSlot() && destination.IsRegister()) {
@@ -4062,5 +4234,66 @@ void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction) {
LOG(FATAL) << "Unreachable";
}
+void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) {
+ // Generate the constant area if needed.
+ X86_64Assembler* assembler = GetAssembler();
+ if (!assembler->IsConstantAreaEmpty()) {
+ // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8
+ // byte values. If used for vectors at a later time, this will need to be
+ // updated to 16 bytes with the appropriate offset.
+ assembler->Align(4, 0);
+ constant_area_start_ = assembler->CodeSize();
+ assembler->AddConstantArea();
+ }
+
+ // And finish up.
+ CodeGenerator::Finalize(allocator);
+}
+
+/**
+ * Class to handle late fixup of offsets into constant area.
+ */
+class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocMisc> {
+ public:
+ RIPFixup(const CodeGeneratorX86_64& codegen, int offset)
+ : codegen_(codegen), offset_into_constant_area_(offset) {}
+
+ private:
+ void Process(const MemoryRegion& region, int pos) OVERRIDE {
+ // Patch the correct offset for the instruction. We use the address of the
+ // 'next' instruction, which is 'pos' (patch the 4 bytes before).
+ int constant_offset = codegen_.ConstantAreaStart() + offset_into_constant_area_;
+ int relative_position = constant_offset - pos;
+
+ // Patch in the right value.
+ region.StoreUnaligned<int32_t>(pos - 4, relative_position);
+ }
+
+ const CodeGeneratorX86_64& codegen_;
+
+ // Location in constant area that the fixup refers to.
+ int offset_into_constant_area_;
+};
+
+Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) {
+ AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddDouble(v));
+ return Address::RIP(fixup);
+}
+
+Address CodeGeneratorX86_64::LiteralFloatAddress(float v) {
+ AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddFloat(v));
+ return Address::RIP(fixup);
+}
+
+Address CodeGeneratorX86_64::LiteralInt32Address(int32_t v) {
+ AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddInt32(v));
+ return Address::RIP(fixup);
+}
+
+Address CodeGeneratorX86_64::LiteralInt64Address(int64_t v) {
+ AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddInt64(v));
+ return Address::RIP(fixup);
+}
+
} // namespace x86_64
} // namespace art
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index f6fbc2e6bc..61bf6ac71d 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -118,6 +118,7 @@ class ParallelMoveResolverX86_64 : public ParallelMoveResolver {
void Exchange32(CpuRegister reg, int mem);
void Exchange32(XmmRegister reg, int mem);
void Exchange32(int mem1, int mem2);
+ void Exchange64(CpuRegister reg1, CpuRegister reg2);
void Exchange64(CpuRegister reg, int mem);
void Exchange64(XmmRegister reg, int mem);
void Exchange64(int mem1, int mem2);
@@ -185,6 +186,10 @@ class InstructionCodeGeneratorX86_64 : public HGraphVisitor {
void GenerateExplicitNullCheck(HNullCheck* instruction);
void PushOntoFPStack(Location source, uint32_t temp_offset,
uint32_t stack_adjustment, bool is_float);
+ void GenerateTestAndBranch(HInstruction* instruction,
+ Label* true_target,
+ Label* false_target,
+ Label* always_true_target);
X86_64Assembler* const assembler_;
CodeGeneratorX86_64* const codegen_;
@@ -194,7 +199,9 @@ class InstructionCodeGeneratorX86_64 : public HGraphVisitor {
class CodeGeneratorX86_64 : public CodeGenerator {
public:
- CodeGeneratorX86_64(HGraph* graph, const CompilerOptions& compiler_options);
+ CodeGeneratorX86_64(HGraph* graph,
+ const X86_64InstructionSetFeatures& isa_features,
+ const CompilerOptions& compiler_options);
virtual ~CodeGeneratorX86_64() {}
void GenerateFrameEntry() OVERRIDE;
@@ -240,6 +247,7 @@ class CodeGeneratorX86_64 : public CodeGenerator {
Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
+ void Finalize(CodeAllocator* allocator) OVERRIDE;
InstructionSet GetInstructionSet() const OVERRIDE {
return InstructionSet::kX86_64;
@@ -267,6 +275,19 @@ class CodeGeneratorX86_64 : public CodeGenerator {
void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, CpuRegister temp);
+ const X86_64InstructionSetFeatures& GetInstructionSetFeatures() const {
+ return isa_features_;
+ }
+
+ int ConstantAreaStart() const {
+ return constant_area_start_;
+ }
+
+ Address LiteralDoubleAddress(double v);
+ Address LiteralFloatAddress(float v);
+ Address LiteralInt32Address(int32_t v);
+ Address LiteralInt64Address(int64_t v);
+
private:
// Labels for each block that will be compiled.
GrowableArray<Label> block_labels_;
@@ -275,6 +296,11 @@ class CodeGeneratorX86_64 : public CodeGenerator {
InstructionCodeGeneratorX86_64 instruction_visitor_;
ParallelMoveResolverX86_64 move_resolver_;
X86_64Assembler assembler_;
+ const X86_64InstructionSetFeatures& isa_features_;
+
+ // Offset to the start of the constant area in the assembled code.
+ // Used for fixups to the constant area.
+ int constant_area_start_;
DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86_64);
};
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index 6053ad51f4..2be117bf38 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -19,6 +19,8 @@
#include "arch/instruction_set.h"
#include "arch/arm/instruction_set_features_arm.h"
#include "arch/arm64/instruction_set_features_arm64.h"
+#include "arch/x86/instruction_set_features_x86.h"
+#include "arch/x86_64/instruction_set_features_x86_64.h"
#include "base/macros.h"
#include "builder.h"
#include "code_generator_arm.h"
@@ -108,7 +110,9 @@ static void RunCodeBaseline(HGraph* graph, bool has_result, Expected expected) {
InternalCodeAllocator allocator;
CompilerOptions compiler_options;
- x86::CodeGeneratorX86 codegenX86(graph, compiler_options);
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), compiler_options);
// We avoid doing a stack overflow check that requires the runtime being setup,
// by making sure the compiler knows the methods we are running are leaf methods.
codegenX86.CompileBaseline(&allocator, true);
@@ -124,7 +128,9 @@ static void RunCodeBaseline(HGraph* graph, bool has_result, Expected expected) {
Run(allocator, codegenARM, has_result, expected);
}
- x86_64::CodeGeneratorX86_64 codegenX86_64(graph, compiler_options);
+ std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64(
+ X86_64InstructionSetFeatures::FromCppDefines());
+ x86_64::CodeGeneratorX86_64 codegenX86_64(graph, *features_x86_64.get(), compiler_options);
codegenX86_64.CompileBaseline(&allocator, true);
if (kRuntimeISA == kX86_64) {
Run(allocator, codegenX86_64, has_result, expected);
@@ -175,10 +181,14 @@ static void RunCodeOptimized(HGraph* graph,
compiler_options);
RunCodeOptimized(&codegenARM64, graph, hook_before_codegen, has_result, expected);
} else if (kRuntimeISA == kX86) {
- x86::CodeGeneratorX86 codegenX86(graph, compiler_options);
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), compiler_options);
RunCodeOptimized(&codegenX86, graph, hook_before_codegen, has_result, expected);
} else if (kRuntimeISA == kX86_64) {
- x86_64::CodeGeneratorX86_64 codegenX86_64(graph, compiler_options);
+ std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64(
+ X86_64InstructionSetFeatures::FromCppDefines());
+ x86_64::CodeGeneratorX86_64 codegenX86_64(graph, *features_x86_64.get(), compiler_options);
RunCodeOptimized(&codegenX86_64, graph, hook_before_codegen, has_result, expected);
}
}
diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h
index fd8c0c6242..966165bf4c 100644
--- a/compiler/optimizing/common_arm64.h
+++ b/compiler/optimizing/common_arm64.h
@@ -20,8 +20,8 @@
#include "locations.h"
#include "nodes.h"
#include "utils/arm64/assembler_arm64.h"
-#include "a64/disasm-a64.h"
-#include "a64/macro-assembler-a64.h"
+#include "vixl/a64/disasm-a64.h"
+#include "vixl/a64/macro-assembler-a64.h"
namespace art {
namespace arm64 {
diff --git a/compiler/optimizing/constant_folding_test.cc b/compiler/optimizing/constant_folding_test.cc
index 6853d54c48..02ad675dc3 100644
--- a/compiler/optimizing/constant_folding_test.cc
+++ b/compiler/optimizing/constant_folding_test.cc
@@ -16,6 +16,7 @@
#include <functional>
+#include "arch/x86/instruction_set_features_x86.h"
#include "code_generator_x86.h"
#include "constant_folding.h"
#include "dead_code_elimination.h"
@@ -46,7 +47,9 @@ static void TestCode(const uint16_t* data,
std::string actual_before = printer_before.str();
ASSERT_EQ(expected_before, actual_before);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), CompilerOptions());
HConstantFolding(graph).Run();
SSAChecker ssa_checker_cf(&allocator, graph);
ssa_checker_cf.Run();
diff --git a/compiler/optimizing/dead_code_elimination_test.cc b/compiler/optimizing/dead_code_elimination_test.cc
index a644719622..98ae1ec5d3 100644
--- a/compiler/optimizing/dead_code_elimination_test.cc
+++ b/compiler/optimizing/dead_code_elimination_test.cc
@@ -14,6 +14,7 @@
* limitations under the License.
*/
+#include "arch/x86/instruction_set_features_x86.h"
#include "code_generator_x86.h"
#include "dead_code_elimination.h"
#include "driver/compiler_options.h"
@@ -40,7 +41,9 @@ static void TestCode(const uint16_t* data,
std::string actual_before = printer_before.str();
ASSERT_EQ(actual_before, expected_before);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), CompilerOptions());
HDeadCodeElimination(graph).Run();
SSAChecker ssa_checker(&allocator, graph);
ssa_checker.Run();
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 49c0d3884f..4c283788b5 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -337,13 +337,11 @@ class HGraphVisualizerPrinter : public HGraphVisitor {
HGraphVisualizer::HGraphVisualizer(std::ostream* output,
HGraph* graph,
- const CodeGenerator& codegen,
- const char* method_name)
- : output_(output), graph_(graph), codegen_(codegen) {
- if (output == nullptr) {
- return;
- }
+ const CodeGenerator& codegen)
+ : output_(output), graph_(graph), codegen_(codegen) {}
+void HGraphVisualizer::PrintHeader(const char* method_name) const {
+ DCHECK(output_ != nullptr);
HGraphVisualizerPrinter printer(graph_, *output_, "", true, codegen_);
printer.StartTag("compilation");
printer.PrintProperty("name", method_name);
diff --git a/compiler/optimizing/graph_visualizer.h b/compiler/optimizing/graph_visualizer.h
index bc553aed74..513bceb369 100644
--- a/compiler/optimizing/graph_visualizer.h
+++ b/compiler/optimizing/graph_visualizer.h
@@ -35,9 +35,9 @@ class HGraphVisualizer : public ValueObject {
public:
HGraphVisualizer(std::ostream* output,
HGraph* graph,
- const CodeGenerator& codegen,
- const char* method_name);
+ const CodeGenerator& codegen);
+ void PrintHeader(const char* method_name) const;
void DumpGraph(const char* pass_name, bool is_after_pass = true) const;
private:
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index 628a844cc7..20aa45f197 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -90,7 +90,6 @@ static Intrinsics GetIntrinsic(InlineMethod method) {
LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
UNREACHABLE();
}
- break;
case kIntrinsicReverseBytes:
switch (GetType(method.d.data, true)) {
case Primitive::kPrimShort:
@@ -103,7 +102,6 @@ static Intrinsics GetIntrinsic(InlineMethod method) {
LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
UNREACHABLE();
}
- break;
// Abs.
case kIntrinsicAbsDouble:
@@ -166,7 +164,6 @@ static Intrinsics GetIntrinsic(InlineMethod method) {
LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
UNREACHABLE();
}
- break;
// Memory.poke.
case kIntrinsicPoke:
@@ -183,7 +180,6 @@ static Intrinsics GetIntrinsic(InlineMethod method) {
LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
UNREACHABLE();
}
- break;
// String.
case kIntrinsicCharAt:
@@ -211,7 +207,6 @@ static Intrinsics GetIntrinsic(InlineMethod method) {
LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
UNREACHABLE();
}
- break;
case kIntrinsicUnsafeGet: {
const bool is_volatile = (method.d.data & kIntrinsicFlagIsVolatile);
switch (GetType(method.d.data, false)) {
@@ -225,7 +220,6 @@ static Intrinsics GetIntrinsic(InlineMethod method) {
LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
UNREACHABLE();
}
- break;
}
case kIntrinsicUnsafePut: {
enum Sync { kNoSync, kVolatile, kOrdered };
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 33176f009c..94e27e912e 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -776,10 +776,10 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat
__ mov(out, ShifterOperand(0), CC);
}
-void IntrinsicLocationsBuilderARM::VisitUnsafeCASInt(HInvoke* invoke ATTRIBUTE_UNUSED) {
+void IntrinsicLocationsBuilderARM::VisitUnsafeCASInt(HInvoke* invoke) {
CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke);
}
-void IntrinsicLocationsBuilderARM::VisitUnsafeCASObject(HInvoke* invoke ATTRIBUTE_UNUSED) {
+void IntrinsicLocationsBuilderARM::VisitUnsafeCASObject(HInvoke* invoke) {
CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke);
}
void IntrinsicCodeGeneratorARM::VisitUnsafeCASInt(HInvoke* invoke) {
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 72d303c870..d1176c460f 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -28,8 +28,8 @@
#include "utils/arm64/assembler_arm64.h"
#include "utils/arm64/constants_arm64.h"
-#include "a64/disasm-a64.h"
-#include "a64/macro-assembler-a64.h"
+#include "vixl/a64/disasm-a64.h"
+#include "vixl/a64/macro-assembler-a64.h"
using namespace vixl; // NOLINT(build/namespaces)
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 384737f55a..aec2d19b1d 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -16,6 +16,7 @@
#include "intrinsics_x86.h"
+#include "arch/x86/instruction_set_features_x86.h"
#include "code_generator_x86.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "intrinsics.h"
@@ -34,6 +35,11 @@ static constexpr int kDoubleNaNHigh = 0x7FF80000;
static constexpr int kDoubleNaNLow = 0x00000000;
static constexpr int kFloatNaN = 0x7FC00000;
+IntrinsicLocationsBuilderX86::IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen)
+ : arena_(codegen->GetGraph()->GetArena()), codegen_(codegen) {
+}
+
+
X86Assembler* IntrinsicCodeGeneratorX86::GetAssembler() {
return reinterpret_cast<X86Assembler*>(codegen_->GetAssembler());
}
@@ -152,6 +158,7 @@ class IntrinsicSlowPathX86 : public SlowPathCodeX86 {
if (invoke_->IsInvokeStaticOrDirect()) {
codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), EAX);
+ RecordPcInfo(codegen, invoke_, invoke_->GetDexPc());
} else {
UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented";
UNREACHABLE();
@@ -313,6 +320,27 @@ void IntrinsicCodeGeneratorX86::VisitIntegerReverseBytes(HInvoke* invoke) {
GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
}
+void IntrinsicLocationsBuilderX86::VisitLongReverseBytes(HInvoke* invoke) {
+ CreateLongToLongLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitLongReverseBytes(HInvoke* invoke) {
+ LocationSummary* locations = invoke->GetLocations();
+ Location input = locations->InAt(0);
+ Register input_lo = input.AsRegisterPairLow<Register>();
+ Register input_hi = input.AsRegisterPairHigh<Register>();
+ Location output = locations->Out();
+ Register output_lo = output.AsRegisterPairLow<Register>();
+ Register output_hi = output.AsRegisterPairHigh<Register>();
+
+ X86Assembler* assembler = GetAssembler();
+ // Assign the inputs to the outputs, mixing low/high.
+ __ movl(output_lo, input_hi);
+ __ movl(output_hi, input_lo);
+ __ bswapl(output_lo);
+ __ bswapl(output_hi);
+}
+
void IntrinsicLocationsBuilderX86::VisitShortReverseBytes(HInvoke* invoke) {
CreateIntToIntLocations(arena_, invoke);
}
@@ -719,6 +747,149 @@ void IntrinsicCodeGeneratorX86::VisitMathSqrt(HInvoke* invoke) {
GetAssembler()->sqrtsd(out, in);
}
+static void InvokeOutOfLineIntrinsic(CodeGeneratorX86* codegen, HInvoke* invoke) {
+ MoveArguments(invoke, codegen->GetGraph()->GetArena(), codegen);
+
+ DCHECK(invoke->IsInvokeStaticOrDirect());
+ codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), EAX);
+ codegen->RecordPcInfo(invoke, invoke->GetDexPc());
+
+ // Copy the result back to the expected output.
+ Location out = invoke->GetLocations()->Out();
+ if (out.IsValid()) {
+ DCHECK(out.IsRegister());
+ MoveFromReturnRegister(out, invoke->GetType(), codegen);
+ }
+}
+
+static void CreateSSE41FPToFPLocations(ArenaAllocator* arena,
+ HInvoke* invoke,
+ CodeGeneratorX86* codegen) {
+ // Do we have instruction support?
+ if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
+ CreateFPToFPLocations(arena, invoke);
+ return;
+ }
+
+ // We have to fall back to a call to the intrinsic.
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kCall);
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+ locations->SetOut(Location::FpuRegisterLocation(XMM0));
+ // Needs to be EAX for the invoke.
+ locations->AddTemp(Location::RegisterLocation(EAX));
+}
+
+static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86* codegen,
+ HInvoke* invoke,
+ X86Assembler* assembler,
+ int round_mode) {
+ LocationSummary* locations = invoke->GetLocations();
+ if (locations->WillCall()) {
+ InvokeOutOfLineIntrinsic(codegen, invoke);
+ } else {
+ XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
+ XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
+ __ roundsd(out, in, Immediate(round_mode));
+ }
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathCeil(HInvoke* invoke) {
+ CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathCeil(HInvoke* invoke) {
+ GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathFloor(HInvoke* invoke) {
+ CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathFloor(HInvoke* invoke) {
+ GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1);
+}
+
+void IntrinsicLocationsBuilderX86::VisitMathRint(HInvoke* invoke) {
+ CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathRint(HInvoke* invoke) {
+ GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0);
+}
+
+// Note that 32 bit x86 doesn't have the capability to inline MathRoundDouble,
+// as it needs 64 bit instructions.
+void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) {
+ // Do we have instruction support?
+ if (codegen_->GetInstructionSetFeatures().HasSSE4_1()) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+ locations->AddTemp(Location::RequiresFpuRegister());
+ locations->AddTemp(Location::RequiresFpuRegister());
+ return;
+ }
+
+ // We have to fall back to a call to the intrinsic.
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kCall);
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+ locations->SetOut(Location::RegisterLocation(EAX));
+ // Needs to be EAX for the invoke.
+ locations->AddTemp(Location::RegisterLocation(EAX));
+}
+
+void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) {
+ LocationSummary* locations = invoke->GetLocations();
+ if (locations->WillCall()) {
+ InvokeOutOfLineIntrinsic(codegen_, invoke);
+ return;
+ }
+
+ // Implement RoundFloat as t1 = floor(input + 0.5f); convert to int.
+ XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
+ Register out = locations->Out().AsRegister<Register>();
+ XmmRegister maxInt = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ XmmRegister inPlusPointFive = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+ Label done, nan;
+ X86Assembler* assembler = GetAssembler();
+
+ // Generate 0.5 into inPlusPointFive.
+ __ movl(out, Immediate(bit_cast<int32_t, float>(0.5f)));
+ __ movd(inPlusPointFive, out);
+
+ // Add in the input.
+ __ addss(inPlusPointFive, in);
+
+ // And truncate to an integer.
+ __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1));
+
+ __ movl(out, Immediate(kPrimIntMax));
+ // maxInt = int-to-float(out)
+ __ cvtsi2ss(maxInt, out);
+
+ // if inPlusPointFive >= maxInt goto done
+ __ comiss(inPlusPointFive, maxInt);
+ __ j(kAboveEqual, &done);
+
+ // if input == NaN goto nan
+ __ j(kUnordered, &nan);
+
+ // output = float-to-int-truncate(input)
+ __ cvttss2si(out, inPlusPointFive);
+ __ jmp(&done);
+ __ Bind(&nan);
+
+ // output = 0
+ __ xorl(out, out);
+ __ Bind(&done);
+}
+
void IntrinsicLocationsBuilderX86::VisitStringCharAt(HInvoke* invoke) {
// The inputs plus one temp.
LocationSummary* locations = new (arena_) LocationSummary(invoke,
@@ -1180,6 +1351,181 @@ void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_);
}
+static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type,
+ HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
+ locations->SetInAt(1, Location::RequiresRegister());
+ // Offset is a long, but in 32 bit mode, we only need the low word.
+ // Can we update the invoke here to remove a TypeConvert to Long?
+ locations->SetInAt(2, Location::RequiresRegister());
+ // Expected value must be in EAX or EDX:EAX.
+ // For long, new value must be in ECX:EBX.
+ if (type == Primitive::kPrimLong) {
+ locations->SetInAt(3, Location::RegisterPairLocation(EAX, EDX));
+ locations->SetInAt(4, Location::RegisterPairLocation(EBX, ECX));
+ } else {
+ locations->SetInAt(3, Location::RegisterLocation(EAX));
+ locations->SetInAt(4, Location::RequiresRegister());
+ }
+
+ // Force a byte register for the output.
+ locations->SetOut(Location::RegisterLocation(EAX));
+ if (type == Primitive::kPrimNot) {
+ // Need temp registers for card-marking.
+ locations->AddTemp(Location::RequiresRegister());
+ // Need a byte register for marking.
+ locations->AddTemp(Location::RegisterLocation(ECX));
+ }
+}
+
+void IntrinsicLocationsBuilderX86::VisitUnsafeCASInt(HInvoke* invoke) {
+ CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke);
+}
+
+void IntrinsicLocationsBuilderX86::VisitUnsafeCASLong(HInvoke* invoke) {
+ CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke);
+}
+
+void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) {
+ CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke);
+}
+
+static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* codegen) {
+ X86Assembler* assembler =
+ reinterpret_cast<X86Assembler*>(codegen->GetAssembler());
+ LocationSummary* locations = invoke->GetLocations();
+
+ Register base = locations->InAt(1).AsRegister<Register>();
+ Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
+ Location out = locations->Out();
+ DCHECK_EQ(out.AsRegister<Register>(), EAX);
+
+ if (type == Primitive::kPrimLong) {
+ DCHECK_EQ(locations->InAt(3).AsRegisterPairLow<Register>(), EAX);
+ DCHECK_EQ(locations->InAt(3).AsRegisterPairHigh<Register>(), EDX);
+ DCHECK_EQ(locations->InAt(4).AsRegisterPairLow<Register>(), EBX);
+ DCHECK_EQ(locations->InAt(4).AsRegisterPairHigh<Register>(), ECX);
+ __ LockCmpxchg8b(Address(base, offset, TIMES_1, 0));
+ } else {
+ // Integer or object.
+ DCHECK_EQ(locations->InAt(3).AsRegister<Register>(), EAX);
+ Register value = locations->InAt(4).AsRegister<Register>();
+ if (type == Primitive::kPrimNot) {
+ // Mark card for object assuming new value is stored.
+ codegen->MarkGCCard(locations->GetTemp(0).AsRegister<Register>(),
+ locations->GetTemp(1).AsRegister<Register>(),
+ base,
+ value);
+ }
+
+ __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value);
+ }
+
+ // locked cmpxchg has full barrier semantics, and we don't need scheduling
+ // barriers at this time.
+
+ // Convert ZF into the boolean result.
+ __ setb(kZero, out.AsRegister<Register>());
+ __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
+}
+
+void IntrinsicCodeGeneratorX86::VisitUnsafeCASInt(HInvoke* invoke) {
+ GenCAS(Primitive::kPrimInt, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86::VisitUnsafeCASLong(HInvoke* invoke) {
+ GenCAS(Primitive::kPrimLong, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) {
+ GenCAS(Primitive::kPrimNot, invoke, codegen_);
+}
+
+void IntrinsicLocationsBuilderX86::VisitIntegerReverse(HInvoke* invoke) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ locations->AddTemp(Location::RequiresRegister());
+}
+
+static void SwapBits(Register reg, Register temp, int32_t shift, int32_t mask,
+ X86Assembler* assembler) {
+ Immediate imm_shift(shift);
+ Immediate imm_mask(mask);
+ __ movl(temp, reg);
+ __ shrl(reg, imm_shift);
+ __ andl(temp, imm_mask);
+ __ andl(reg, imm_mask);
+ __ shll(temp, imm_shift);
+ __ orl(reg, temp);
+}
+
+void IntrinsicCodeGeneratorX86::VisitIntegerReverse(HInvoke* invoke) {
+ X86Assembler* assembler =
+ reinterpret_cast<X86Assembler*>(codegen_->GetAssembler());
+ LocationSummary* locations = invoke->GetLocations();
+
+ Register reg = locations->InAt(0).AsRegister<Register>();
+ Register temp = locations->GetTemp(0).AsRegister<Register>();
+
+ /*
+ * Use one bswap instruction to reverse byte order first and then use 3 rounds of
+ * swapping bits to reverse bits in a number x. Using bswap to save instructions
+ * compared to generic luni implementation which has 5 rounds of swapping bits.
+ * x = bswap x
+ * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
+ * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
+ * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
+ */
+ __ bswapl(reg);
+ SwapBits(reg, temp, 1, 0x55555555, assembler);
+ SwapBits(reg, temp, 2, 0x33333333, assembler);
+ SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
+}
+
+void IntrinsicLocationsBuilderX86::VisitLongReverse(HInvoke* invoke) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorX86::VisitLongReverse(HInvoke* invoke) {
+ X86Assembler* assembler =
+ reinterpret_cast<X86Assembler*>(codegen_->GetAssembler());
+ LocationSummary* locations = invoke->GetLocations();
+
+ Register reg_low = locations->InAt(0).AsRegisterPairLow<Register>();
+ Register reg_high = locations->InAt(0).AsRegisterPairHigh<Register>();
+ Register temp = locations->GetTemp(0).AsRegister<Register>();
+
+ // We want to swap high/low, then bswap each one, and then do the same
+ // as a 32 bit reverse.
+ // Exchange high and low.
+ __ movl(temp, reg_low);
+ __ movl(reg_low, reg_high);
+ __ movl(reg_high, temp);
+
+ // bit-reverse low
+ __ bswapl(reg_low);
+ SwapBits(reg_low, temp, 1, 0x55555555, assembler);
+ SwapBits(reg_low, temp, 2, 0x33333333, assembler);
+ SwapBits(reg_low, temp, 4, 0x0f0f0f0f, assembler);
+
+ // bit-reverse high
+ __ bswapl(reg_high);
+ SwapBits(reg_high, temp, 1, 0x55555555, assembler);
+ SwapBits(reg_high, temp, 2, 0x33333333, assembler);
+ SwapBits(reg_high, temp, 4, 0x0f0f0f0f, assembler);
+}
+
// Unimplemented intrinsics.
#define UNIMPLEMENTED_INTRINSIC(Name) \
@@ -1188,20 +1534,10 @@ void IntrinsicLocationsBuilderX86::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSE
void IntrinsicCodeGeneratorX86::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
}
-UNIMPLEMENTED_INTRINSIC(IntegerReverse)
-UNIMPLEMENTED_INTRINSIC(LongReverse)
-UNIMPLEMENTED_INTRINSIC(LongReverseBytes)
-UNIMPLEMENTED_INTRINSIC(MathFloor)
-UNIMPLEMENTED_INTRINSIC(MathCeil)
-UNIMPLEMENTED_INTRINSIC(MathRint)
UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
-UNIMPLEMENTED_INTRINSIC(MathRoundFloat)
UNIMPLEMENTED_INTRINSIC(StringIndexOf)
UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
-UNIMPLEMENTED_INTRINSIC(UnsafeCASInt)
-UNIMPLEMENTED_INTRINSIC(UnsafeCASLong)
-UNIMPLEMENTED_INTRINSIC(UnsafeCASObject)
UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
} // namespace x86
diff --git a/compiler/optimizing/intrinsics_x86.h b/compiler/optimizing/intrinsics_x86.h
index e1e8260a5f..4292ec7b99 100644
--- a/compiler/optimizing/intrinsics_x86.h
+++ b/compiler/optimizing/intrinsics_x86.h
@@ -32,7 +32,7 @@ class X86Assembler;
class IntrinsicLocationsBuilderX86 FINAL : public IntrinsicVisitor {
public:
- explicit IntrinsicLocationsBuilderX86(ArenaAllocator* arena) : arena_(arena) {}
+ explicit IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen);
// Define visitor methods.
@@ -50,6 +50,7 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
private:
ArenaAllocator* arena_;
+ CodeGeneratorX86* codegen_;
DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderX86);
};
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 736cea88cb..cbf94f0f81 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -16,6 +16,7 @@
#include "intrinsics_x86_64.h"
+#include "arch/x86_64/instruction_set_features_x86_64.h"
#include "code_generator_x86_64.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "intrinsics.h"
@@ -30,6 +31,11 @@ namespace art {
namespace x86_64 {
+IntrinsicLocationsBuilderX86_64::IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64* codegen)
+ : arena_(codegen->GetGraph()->GetArena()), codegen_(codegen) {
+}
+
+
X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() {
return reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
}
@@ -292,25 +298,27 @@ static void CreateFloatToFloatPlusTemps(ArenaAllocator* arena, HInvoke* invoke)
// TODO: Allow x86 to work with memory. This requires assembler support, see below.
// locations->SetInAt(0, Location::Any()); // X86 can work on memory directly.
locations->SetOut(Location::SameAsFirstInput());
- locations->AddTemp(Location::RequiresRegister()); // Immediate constant.
- locations->AddTemp(Location::RequiresFpuRegister()); // FP version of above.
+ locations->AddTemp(Location::RequiresFpuRegister()); // FP reg to hold mask.
}
-static void MathAbsFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
+static void MathAbsFP(LocationSummary* locations,
+ bool is64bit,
+ X86_64Assembler* assembler,
+ CodeGeneratorX86_64* codegen) {
Location output = locations->Out();
- CpuRegister cpu_temp = locations->GetTemp(0).AsRegister<CpuRegister>();
if (output.IsFpuRegister()) {
// In-register
- XmmRegister xmm_temp = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+ XmmRegister xmm_temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ // TODO: Can mask directly with constant area using pand if we can guarantee
+ // that the literal is aligned on a 16 byte boundary. This will avoid a
+ // temporary.
if (is64bit) {
- __ movq(cpu_temp, Immediate(INT64_C(0x7FFFFFFFFFFFFFFF)));
- __ movd(xmm_temp, cpu_temp);
+ __ movsd(xmm_temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF)));
__ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp);
} else {
- __ movl(cpu_temp, Immediate(INT64_C(0x7FFFFFFF)));
- __ movd(xmm_temp, cpu_temp);
+ __ movss(xmm_temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF)));
__ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp);
}
} else {
@@ -335,7 +343,7 @@ void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) {
- MathAbsFP(invoke->GetLocations(), true, GetAssembler());
+ MathAbsFP(invoke->GetLocations(), true, GetAssembler(), codegen_);
}
void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) {
@@ -343,7 +351,7 @@ void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) {
- MathAbsFP(invoke->GetLocations(), false, GetAssembler());
+ MathAbsFP(invoke->GetLocations(), false, GetAssembler(), codegen_);
}
static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
@@ -393,8 +401,11 @@ void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) {
GenAbsInteger(invoke->GetLocations(), true, GetAssembler());
}
-static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double,
- X86_64Assembler* assembler) {
+static void GenMinMaxFP(LocationSummary* locations,
+ bool is_min,
+ bool is_double,
+ X86_64Assembler* assembler,
+ CodeGeneratorX86_64* codegen) {
Location op1_loc = locations->InAt(0);
Location op2_loc = locations->InAt(1);
Location out_loc = locations->Out();
@@ -421,7 +432,7 @@ static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double,
//
// This removes one jmp, but needs to copy one input (op1) to out.
//
- // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
+ // TODO: This is straight from Quick. Make NaN an out-of-line slowpath?
XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
@@ -455,14 +466,11 @@ static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double,
// NaN handling.
__ Bind(&nan);
- CpuRegister cpu_temp = locations->GetTemp(0).AsRegister<CpuRegister>();
- // TODO: Literal pool. Trades 64b immediate in CPU reg for direct memory access.
if (is_double) {
- __ movq(cpu_temp, Immediate(INT64_C(0x7FF8000000000000)));
+ __ movsd(out, codegen->LiteralInt64Address(INT64_C(0x7FF8000000000000)));
} else {
- __ movl(cpu_temp, Immediate(INT64_C(0x7FC00000)));
+ __ movss(out, codegen->LiteralInt32Address(INT32_C(0x7FC00000)));
}
- __ movd(out, cpu_temp, is_double);
__ jmp(&done);
// out := op2;
@@ -477,7 +485,7 @@ static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double,
__ Bind(&done);
}
-static void CreateFPFPToFPPlusTempLocations(ArenaAllocator* arena, HInvoke* invoke) {
+static void CreateFPFPToFP(ArenaAllocator* arena, HInvoke* invoke) {
LocationSummary* locations = new (arena) LocationSummary(invoke,
LocationSummary::kNoCall,
kIntrinsified);
@@ -486,39 +494,38 @@ static void CreateFPFPToFPPlusTempLocations(ArenaAllocator* arena, HInvoke* invo
// The following is sub-optimal, but all we can do for now. It would be fine to also accept
// the second input to be the output (we can simply swap inputs).
locations->SetOut(Location::SameAsFirstInput());
- locations->AddTemp(Location::RequiresRegister()); // Immediate constant.
}
void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFPPlusTempLocations(arena_, invoke);
+ CreateFPFPToFP(arena_, invoke);
}
void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler());
+ GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler(), codegen_);
}
void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
- CreateFPFPToFPPlusTempLocations(arena_, invoke);
+ CreateFPFPToFP(arena_, invoke);
}
void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler());
+ GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler(), codegen_);
}
void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFPPlusTempLocations(arena_, invoke);
+ CreateFPFPToFP(arena_, invoke);
}
void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler());
+ GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler(), codegen_);
}
void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
- CreateFPFPToFPPlusTempLocations(arena_, invoke);
+ CreateFPFPToFP(arena_, invoke);
}
void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler());
+ GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler(), codegen_);
}
static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
@@ -614,6 +621,203 @@ void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) {
GetAssembler()->sqrtsd(out, in);
}
+static void InvokeOutOfLineIntrinsic(CodeGeneratorX86_64* codegen, HInvoke* invoke) {
+ MoveArguments(invoke, codegen->GetGraph()->GetArena(), codegen);
+
+ DCHECK(invoke->IsInvokeStaticOrDirect());
+ codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), CpuRegister(RDI));
+ codegen->RecordPcInfo(invoke, invoke->GetDexPc());
+
+ // Copy the result back to the expected output.
+ Location out = invoke->GetLocations()->Out();
+ if (out.IsValid()) {
+ DCHECK(out.IsRegister());
+ MoveFromReturnRegister(out, invoke->GetType(), codegen);
+ }
+}
+
+static void CreateSSE41FPToFPLocations(ArenaAllocator* arena,
+ HInvoke* invoke,
+ CodeGeneratorX86_64* codegen) {
+ // Do we have instruction support?
+ if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
+ CreateFPToFPLocations(arena, invoke);
+ return;
+ }
+
+ // We have to fall back to a call to the intrinsic.
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kCall);
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+ locations->SetOut(Location::FpuRegisterLocation(XMM0));
+ // Needs to be RDI for the invoke.
+ locations->AddTemp(Location::RegisterLocation(RDI));
+}
+
+static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86_64* codegen,
+ HInvoke* invoke,
+ X86_64Assembler* assembler,
+ int round_mode) {
+ LocationSummary* locations = invoke->GetLocations();
+ if (locations->WillCall()) {
+ InvokeOutOfLineIntrinsic(codegen, invoke);
+ } else {
+ XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
+ XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
+ __ roundsd(out, in, Immediate(round_mode));
+ }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathCeil(HInvoke* invoke) {
+ CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathCeil(HInvoke* invoke) {
+ GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathFloor(HInvoke* invoke) {
+ CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathFloor(HInvoke* invoke) {
+ GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathRint(HInvoke* invoke) {
+ CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathRint(HInvoke* invoke) {
+ GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0);
+}
+
+static void CreateSSE41FPToIntLocations(ArenaAllocator* arena,
+ HInvoke* invoke,
+ CodeGeneratorX86_64* codegen) {
+ // Do we have instruction support?
+ if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+ locations->AddTemp(Location::RequiresFpuRegister());
+ locations->AddTemp(Location::RequiresFpuRegister());
+ return;
+ }
+
+ // We have to fall back to a call to the intrinsic.
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kCall);
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+ locations->SetOut(Location::RegisterLocation(RAX));
+ // Needs to be RDI for the invoke.
+ locations->AddTemp(Location::RegisterLocation(RDI));
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) {
+ CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) {
+ LocationSummary* locations = invoke->GetLocations();
+ if (locations->WillCall()) {
+ InvokeOutOfLineIntrinsic(codegen_, invoke);
+ return;
+ }
+
+ // Implement RoundFloat as t1 = floor(input + 0.5f); convert to int.
+ XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
+ CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+ XmmRegister maxInt = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ XmmRegister inPlusPointFive = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+ Label done, nan;
+ X86_64Assembler* assembler = GetAssembler();
+
+ // Generate 0.5 into inPlusPointFive.
+ __ movl(out, Immediate(bit_cast<int32_t, float>(0.5f)));
+ __ movd(inPlusPointFive, out, false);
+
+ // Add in the input.
+ __ addss(inPlusPointFive, in);
+
+ // And truncate to an integer.
+ __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1));
+
+ __ movl(out, Immediate(kPrimIntMax));
+ // maxInt = int-to-float(out)
+ __ cvtsi2ss(maxInt, out);
+
+ // if inPlusPointFive >= maxInt goto done
+ __ comiss(inPlusPointFive, maxInt);
+ __ j(kAboveEqual, &done);
+
+ // if input == NaN goto nan
+ __ j(kUnordered, &nan);
+
+ // output = float-to-int-truncate(input)
+ __ cvttss2si(out, inPlusPointFive);
+ __ jmp(&done);
+ __ Bind(&nan);
+
+ // output = 0
+ __ xorl(out, out);
+ __ Bind(&done);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) {
+ CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) {
+ LocationSummary* locations = invoke->GetLocations();
+ if (locations->WillCall()) {
+ InvokeOutOfLineIntrinsic(codegen_, invoke);
+ return;
+ }
+
+ // Implement RoundDouble as t1 = floor(input + 0.5); convert to long.
+ XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
+ CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+ XmmRegister maxLong = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ XmmRegister inPlusPointFive = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+ Label done, nan;
+ X86_64Assembler* assembler = GetAssembler();
+
+ // Generate 0.5 into inPlusPointFive.
+ __ movq(out, Immediate(bit_cast<int64_t, double>(0.5)));
+ __ movd(inPlusPointFive, out, true);
+
+ // Add in the input.
+ __ addsd(inPlusPointFive, in);
+
+ // And truncate to an integer.
+ __ roundsd(inPlusPointFive, inPlusPointFive, Immediate(1));
+
+ __ movq(out, Immediate(kPrimLongMax));
+ // maxLong = long-to-double(out)
+ __ cvtsi2sd(maxLong, out, true);
+
+ // if inPlusPointFive >= maxLong goto done
+ __ comisd(inPlusPointFive, maxLong);
+ __ j(kAboveEqual, &done);
+
+ // if input == NaN goto nan
+ __ j(kUnordered, &nan);
+
+ // output = double-to-long-truncate(input)
+ __ cvttsd2si(out, inPlusPointFive, true);
+ __ jmp(&done);
+ __ Bind(&nan);
+
+ // output = 0
+ __ xorq(out, out);
+ __ Bind(&done);
+}
+
void IntrinsicLocationsBuilderX86_64::VisitStringCharAt(HInvoke* invoke) {
// The inputs plus one temp.
LocationSummary* locations = new (arena_) LocationSummary(invoke,
@@ -999,6 +1203,175 @@ void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_);
}
+static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type,
+ HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(2, Location::RequiresRegister());
+ // expected value must be in EAX/RAX.
+ locations->SetInAt(3, Location::RegisterLocation(RAX));
+ locations->SetInAt(4, Location::RequiresRegister());
+
+ locations->SetOut(Location::RequiresRegister());
+ if (type == Primitive::kPrimNot) {
+ // Need temp registers for card-marking.
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
+ CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
+ CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
+ CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke);
+}
+
+static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* codegen) {
+ X86_64Assembler* assembler =
+ reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler());
+ LocationSummary* locations = invoke->GetLocations();
+
+ CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
+ CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
+ CpuRegister expected = locations->InAt(3).AsRegister<CpuRegister>();
+ DCHECK_EQ(expected.AsRegister(), RAX);
+ CpuRegister value = locations->InAt(4).AsRegister<CpuRegister>();
+ CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+
+ if (type == Primitive::kPrimLong) {
+ __ LockCmpxchgq(Address(base, offset, TIMES_1, 0), value);
+ } else {
+ // Integer or object.
+ if (type == Primitive::kPrimNot) {
+ // Mark card for object assuming new value is stored.
+ codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
+ locations->GetTemp(1).AsRegister<CpuRegister>(),
+ base,
+ value);
+ }
+
+ __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value);
+ }
+
+ // locked cmpxchg has full barrier semantics, and we don't need scheduling
+ // barriers at this time.
+
+ // Convert ZF into the boolean result.
+ __ setcc(kZero, out);
+ __ movzxb(out, out);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
+ GenCAS(Primitive::kPrimInt, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
+ GenCAS(Primitive::kPrimLong, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
+ GenCAS(Primitive::kPrimNot, invoke, codegen_);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitIntegerReverse(HInvoke* invoke) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ locations->AddTemp(Location::RequiresRegister());
+}
+
+static void SwapBits(CpuRegister reg, CpuRegister temp, int32_t shift, int32_t mask,
+ X86_64Assembler* assembler) {
+ Immediate imm_shift(shift);
+ Immediate imm_mask(mask);
+ __ movl(temp, reg);
+ __ shrl(reg, imm_shift);
+ __ andl(temp, imm_mask);
+ __ andl(reg, imm_mask);
+ __ shll(temp, imm_shift);
+ __ orl(reg, temp);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitIntegerReverse(HInvoke* invoke) {
+ X86_64Assembler* assembler =
+ reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
+ LocationSummary* locations = invoke->GetLocations();
+
+ CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
+ CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
+
+ /*
+ * Use one bswap instruction to reverse byte order first and then use 3 rounds of
+ * swapping bits to reverse bits in a number x. Using bswap to save instructions
+ * compared to generic luni implementation which has 5 rounds of swapping bits.
+ * x = bswap x
+ * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
+ * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
+ * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
+ */
+ __ bswapl(reg);
+ SwapBits(reg, temp, 1, 0x55555555, assembler);
+ SwapBits(reg, temp, 2, 0x33333333, assembler);
+ SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitLongReverse(HInvoke* invoke) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+}
+
+static void SwapBits64(CpuRegister reg, CpuRegister temp, CpuRegister temp_mask,
+ int32_t shift, int64_t mask, X86_64Assembler* assembler) {
+ Immediate imm_shift(shift);
+ __ movq(temp_mask, Immediate(mask));
+ __ movq(temp, reg);
+ __ shrq(reg, imm_shift);
+ __ andq(temp, temp_mask);
+ __ andq(reg, temp_mask);
+ __ shlq(temp, imm_shift);
+ __ orq(reg, temp);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitLongReverse(HInvoke* invoke) {
+ X86_64Assembler* assembler =
+ reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
+ LocationSummary* locations = invoke->GetLocations();
+
+ CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
+ CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
+ CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
+
+ /*
+ * Use one bswap instruction to reverse byte order first and then use 3 rounds of
+ * swapping bits to reverse bits in a long number x. Using bswap to save instructions
+ * compared to generic luni implementation which has 5 rounds of swapping bits.
+ * x = bswap x
+ * x = (x & 0x5555555555555555) << 1 | (x >> 1) & 0x5555555555555555;
+ * x = (x & 0x3333333333333333) << 2 | (x >> 2) & 0x3333333333333333;
+ * x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F0F0F0F0F;
+ */
+ __ bswapq(reg);
+ SwapBits64(reg, temp1, temp2, 1, INT64_C(0x5555555555555555), assembler);
+ SwapBits64(reg, temp1, temp2, 2, INT64_C(0x3333333333333333), assembler);
+ SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler);
+}
+
// Unimplemented intrinsics.
#define UNIMPLEMENTED_INTRINSIC(Name) \
@@ -1007,19 +1380,9 @@ void IntrinsicLocationsBuilderX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UN
void IntrinsicCodeGeneratorX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
}
-UNIMPLEMENTED_INTRINSIC(IntegerReverse)
-UNIMPLEMENTED_INTRINSIC(LongReverse)
-UNIMPLEMENTED_INTRINSIC(MathFloor)
-UNIMPLEMENTED_INTRINSIC(MathCeil)
-UNIMPLEMENTED_INTRINSIC(MathRint)
-UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
-UNIMPLEMENTED_INTRINSIC(MathRoundFloat)
UNIMPLEMENTED_INTRINSIC(StringIndexOf)
UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
-UNIMPLEMENTED_INTRINSIC(UnsafeCASInt)
-UNIMPLEMENTED_INTRINSIC(UnsafeCASLong)
-UNIMPLEMENTED_INTRINSIC(UnsafeCASObject)
UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
} // namespace x86_64
diff --git a/compiler/optimizing/intrinsics_x86_64.h b/compiler/optimizing/intrinsics_x86_64.h
index dfae7fa90e..0e0e72c1fc 100644
--- a/compiler/optimizing/intrinsics_x86_64.h
+++ b/compiler/optimizing/intrinsics_x86_64.h
@@ -32,7 +32,7 @@ class X86_64Assembler;
class IntrinsicLocationsBuilderX86_64 FINAL : public IntrinsicVisitor {
public:
- explicit IntrinsicLocationsBuilderX86_64(ArenaAllocator* arena) : arena_(arena) {}
+ explicit IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64* codegen);
// Define visitor methods.
@@ -50,6 +50,7 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
private:
ArenaAllocator* arena_;
+ CodeGeneratorX86_64* codegen_;
DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderX86_64);
};
diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc
index f22b7a7e82..28c5555d57 100644
--- a/compiler/optimizing/linearize_test.cc
+++ b/compiler/optimizing/linearize_test.cc
@@ -16,6 +16,7 @@
#include <fstream>
+#include "arch/x86/instruction_set_features_x86.h"
#include "base/arena_allocator.h"
#include "base/stringprintf.h"
#include "builder.h"
@@ -46,7 +47,9 @@ static void TestCode(const uint16_t* data, const int* expected_order, size_t num
graph->TryBuildingSsa();
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc
index c102c4f02f..61d6593f2b 100644
--- a/compiler/optimizing/live_ranges_test.cc
+++ b/compiler/optimizing/live_ranges_test.cc
@@ -14,6 +14,7 @@
* limitations under the License.
*/
+#include "arch/x86/instruction_set_features_x86.h"
#include "base/arena_allocator.h"
#include "builder.h"
#include "code_generator.h"
@@ -65,7 +66,9 @@ TEST(LiveRangesTest, CFG1) {
ArenaAllocator allocator(&pool);
HGraph* graph = BuildGraph(data, &allocator);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -111,7 +114,9 @@ TEST(LiveRangesTest, CFG2) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
HGraph* graph = BuildGraph(data, &allocator);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -160,7 +165,9 @@ TEST(LiveRangesTest, CFG3) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
HGraph* graph = BuildGraph(data, &allocator);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -237,7 +244,9 @@ TEST(LiveRangesTest, Loop1) {
ArenaAllocator allocator(&pool);
HGraph* graph = BuildGraph(data, &allocator);
RemoveSuspendChecks(graph);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -315,7 +324,9 @@ TEST(LiveRangesTest, Loop2) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
HGraph* graph = BuildGraph(data, &allocator);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -391,7 +402,9 @@ TEST(LiveRangesTest, CFG4) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
HGraph* graph = BuildGraph(data, &allocator);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc
index 0b0cfde0cf..81250ca133 100644
--- a/compiler/optimizing/liveness_test.cc
+++ b/compiler/optimizing/liveness_test.cc
@@ -14,6 +14,7 @@
* limitations under the License.
*/
+#include "arch/x86/instruction_set_features_x86.h"
#include "base/arena_allocator.h"
#include "builder.h"
#include "code_generator.h"
@@ -53,7 +54,9 @@ static void TestCode(const uint16_t* data, const char* expected) {
graph->TryBuildingSsa();
// `Inline` conditions into ifs.
PrepareForRegisterAllocation(graph).Run();
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index dca612e6b7..d8a8554610 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -752,8 +752,8 @@ HInstruction* HBinaryOperation::GetLeastConstantLeft() const {
}
}
-bool HCondition::IsBeforeWhenDisregardMoves(HIf* if_) const {
- return this == if_->GetPreviousDisregardingMoves();
+bool HCondition::IsBeforeWhenDisregardMoves(HInstruction* instruction) const {
+ return this == instruction->GetPreviousDisregardingMoves();
}
bool HInstruction::Equals(HInstruction* other) const {
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 21ed3504f1..f764eb421f 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -682,6 +682,7 @@ class HLoopInformationOutwardIterator : public ValueObject {
M(ClinitCheck, Instruction) \
M(Compare, BinaryOperation) \
M(Condition, BinaryOperation) \
+ M(Deoptimize, Instruction) \
M(Div, BinaryOperation) \
M(DivZeroCheck, Instruction) \
M(DoubleConstant, Constant) \
@@ -1191,7 +1192,17 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> {
bool HasEnvironment() const { return environment_ != nullptr; }
HEnvironment* GetEnvironment() const { return environment_; }
- void SetEnvironment(HEnvironment* environment) { environment_ = environment; }
+ // Set the `environment_` field. Raw because this method does not
+ // update the uses lists.
+ void SetRawEnvironment(HEnvironment* environment) { environment_ = environment; }
+
+ // Set the environment of this instruction, copying it from `environment`. While
+ // copying, the uses lists are being updated.
+ void CopyEnvironmentFrom(HEnvironment* environment) {
+ ArenaAllocator* allocator = GetBlock()->GetGraph()->GetArena();
+ environment_ = new (allocator) HEnvironment(allocator, environment->Size());
+ environment_->CopyFrom(environment);
+ }
// Returns the number of entries in the environment. Typically, that is the
// number of dex registers in a method. It could be more in case of inlining.
@@ -1544,12 +1555,31 @@ class HIf : public HTemplateInstruction<1> {
DECLARE_INSTRUCTION(If);
- virtual bool IsIfInstruction() const { return true; }
-
private:
DISALLOW_COPY_AND_ASSIGN(HIf);
};
+// Deoptimize to interpreter, upon checking a condition.
+class HDeoptimize : public HTemplateInstruction<1> {
+ public:
+ HDeoptimize(HInstruction* cond, uint32_t dex_pc)
+ : HTemplateInstruction(SideEffects::None()),
+ dex_pc_(dex_pc) {
+ SetRawInputAt(0, cond);
+ }
+
+ bool NeedsEnvironment() const OVERRIDE { return true; }
+ bool CanThrow() const OVERRIDE { return true; }
+ uint32_t GetDexPc() const { return dex_pc_; }
+
+ DECLARE_INSTRUCTION(Deoptimize);
+
+ private:
+ uint32_t dex_pc_;
+
+ DISALLOW_COPY_AND_ASSIGN(HDeoptimize);
+};
+
class HUnaryOperation : public HExpression<1> {
public:
HUnaryOperation(Primitive::Type result_type, HInstruction* input)
@@ -1667,8 +1697,8 @@ class HCondition : public HBinaryOperation {
void ClearNeedsMaterialization() { needs_materialization_ = false; }
// For code generation purposes, returns whether this instruction is just before
- // `if_`, and disregard moves in between.
- bool IsBeforeWhenDisregardMoves(HIf* if_) const;
+ // `instruction`, and disregard moves in between.
+ bool IsBeforeWhenDisregardMoves(HInstruction* instruction) const;
DECLARE_INSTRUCTION(Condition);
@@ -2307,6 +2337,9 @@ class HNewArray : public HExpression<1> {
// Calls runtime so needs an environment.
bool NeedsEnvironment() const OVERRIDE { return true; }
+ // May throw NegativeArraySizeException, OutOfMemoryError, etc.
+ bool CanThrow() const OVERRIDE { return true; }
+
bool CanBeNull() const OVERRIDE { return false; }
QuickEntrypointEnum GetEntrypoint() const { return entrypoint_; }
diff --git a/compiler/optimizing/nodes_test.cc b/compiler/optimizing/nodes_test.cc
index 4cf22d3b2e..4e83ce576c 100644
--- a/compiler/optimizing/nodes_test.cc
+++ b/compiler/optimizing/nodes_test.cc
@@ -50,7 +50,7 @@ TEST(Node, RemoveInstruction) {
exit_block->AddInstruction(new (&allocator) HExit());
HEnvironment* environment = new (&allocator) HEnvironment(&allocator, 1);
- null_check->SetEnvironment(environment);
+ null_check->SetRawEnvironment(environment);
environment->SetRawEnvAt(0, parameter);
parameter->AddEnvUseAt(null_check->GetEnvironment(), 0);
diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc
new file mode 100644
index 0000000000..6d986ba7d3
--- /dev/null
+++ b/compiler/optimizing/optimizing_cfi_test.cc
@@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <memory>
+#include <vector>
+
+#include "arch/instruction_set.h"
+#include "cfi_test.h"
+#include "gtest/gtest.h"
+#include "optimizing/code_generator.h"
+#include "utils/assembler.h"
+
+#include "optimizing/optimizing_cfi_test_expected.inc"
+
+namespace art {
+
+// Run the tests only on host.
+#ifndef HAVE_ANDROID_OS
+
+class OptimizingCFITest : public CFITest {
+ public:
+ // Enable this flag to generate the expected outputs.
+ static constexpr bool kGenerateExpected = false;
+
+ void TestImpl(InstructionSet isa, const char* isa_str,
+ const std::vector<uint8_t>& expected_asm,
+ const std::vector<uint8_t>& expected_cfi) {
+ // Setup simple context.
+ ArenaPool pool;
+ ArenaAllocator allocator(&pool);
+ CompilerOptions opts;
+ std::unique_ptr<const InstructionSetFeatures> isa_features;
+ std::string error;
+ isa_features.reset(InstructionSetFeatures::FromVariant(isa, "default", &error));
+ HGraph graph(&allocator);
+ // Generate simple frame with some spills.
+ std::unique_ptr<CodeGenerator> code_gen(
+ CodeGenerator::Create(&graph, isa, *isa_features.get(), opts));
+ const int frame_size = 64;
+ int core_reg = 0;
+ int fp_reg = 0;
+ for (int i = 0; i < 2; i++) { // Two registers of each kind.
+ for (; core_reg < 32; core_reg++) {
+ if (code_gen->IsCoreCalleeSaveRegister(core_reg)) {
+ auto location = Location::RegisterLocation(core_reg);
+ code_gen->AddAllocatedRegister(location);
+ core_reg++;
+ break;
+ }
+ }
+ for (; fp_reg < 32; fp_reg++) {
+ if (code_gen->IsFloatingPointCalleeSaveRegister(fp_reg)) {
+ auto location = Location::FpuRegisterLocation(fp_reg);
+ code_gen->AddAllocatedRegister(location);
+ fp_reg++;
+ break;
+ }
+ }
+ }
+ code_gen->ComputeSpillMask();
+ code_gen->SetFrameSize(frame_size);
+ code_gen->GenerateFrameEntry();
+ code_gen->GetInstructionVisitor()->VisitReturnVoid(new (&allocator) HReturnVoid());
+ // Get the outputs.
+ InternalCodeAllocator code_allocator;
+ code_gen->Finalize(&code_allocator);
+ const std::vector<uint8_t>& actual_asm = code_allocator.GetMemory();
+ Assembler* opt_asm = code_gen->GetAssembler();
+ const std::vector<uint8_t>& actual_cfi = *(opt_asm->cfi().data());
+
+ if (kGenerateExpected) {
+ GenerateExpected(stdout, isa, isa_str, actual_asm, actual_cfi);
+ } else {
+ EXPECT_EQ(expected_asm, actual_asm);
+ EXPECT_EQ(expected_cfi, actual_cfi);
+ }
+ }
+
+ private:
+ class InternalCodeAllocator : public CodeAllocator {
+ public:
+ InternalCodeAllocator() {}
+
+ virtual uint8_t* Allocate(size_t size) {
+ memory_.resize(size);
+ return memory_.data();
+ }
+
+ const std::vector<uint8_t>& GetMemory() { return memory_; }
+
+ private:
+ std::vector<uint8_t> memory_;
+
+ DISALLOW_COPY_AND_ASSIGN(InternalCodeAllocator);
+ };
+};
+
+#define TEST_ISA(isa) \
+ TEST_F(OptimizingCFITest, isa) { \
+ std::vector<uint8_t> expected_asm(expected_asm_##isa, \
+ expected_asm_##isa + arraysize(expected_asm_##isa)); \
+ std::vector<uint8_t> expected_cfi(expected_cfi_##isa, \
+ expected_cfi_##isa + arraysize(expected_cfi_##isa)); \
+ TestImpl(isa, #isa, expected_asm, expected_cfi); \
+ }
+
+TEST_ISA(kThumb2)
+TEST_ISA(kArm64)
+TEST_ISA(kX86)
+TEST_ISA(kX86_64)
+
+#endif // HAVE_ANDROID_OS
+
+} // namespace art
diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc
new file mode 100644
index 0000000000..2125f6eb01
--- /dev/null
+++ b/compiler/optimizing/optimizing_cfi_test_expected.inc
@@ -0,0 +1,141 @@
+static constexpr uint8_t expected_asm_kThumb2[] = {
+ 0x60, 0xB5, 0x2D, 0xED, 0x02, 0x8A, 0x8B, 0xB0, 0x00, 0x90, 0x0B, 0xB0,
+ 0xBD, 0xEC, 0x02, 0x8A, 0x60, 0xBD,
+};
+static constexpr uint8_t expected_cfi_kThumb2[] = {
+ 0x42, 0x0E, 0x0C, 0x85, 0x03, 0x86, 0x02, 0x8E, 0x01, 0x44, 0x0E, 0x14,
+ 0x05, 0x50, 0x05, 0x05, 0x51, 0x04, 0x42, 0x0E, 0x40, 0x42, 0x0A, 0x42,
+ 0x0E, 0x14, 0x44, 0x0E, 0x0C, 0x06, 0x50, 0x06, 0x51, 0x42, 0x0B, 0x0E,
+ 0x40,
+};
+// 0x00000000: push {r5, r6, lr}
+// 0x00000002: .cfi_def_cfa_offset: 12
+// 0x00000002: .cfi_offset: r5 at cfa-12
+// 0x00000002: .cfi_offset: r6 at cfa-8
+// 0x00000002: .cfi_offset: r14 at cfa-4
+// 0x00000002: vpush.f32 {s16-s17}
+// 0x00000006: .cfi_def_cfa_offset: 20
+// 0x00000006: .cfi_offset_extended: r80 at cfa-20
+// 0x00000006: .cfi_offset_extended: r81 at cfa-16
+// 0x00000006: sub sp, sp, #44
+// 0x00000008: .cfi_def_cfa_offset: 64
+// 0x00000008: str r0, [sp, #0]
+// 0x0000000a: .cfi_remember_state
+// 0x0000000a: add sp, sp, #44
+// 0x0000000c: .cfi_def_cfa_offset: 20
+// 0x0000000c: vpop.f32 {s16-s17}
+// 0x00000010: .cfi_def_cfa_offset: 12
+// 0x00000010: .cfi_restore_extended: r80
+// 0x00000010: .cfi_restore_extended: r81
+// 0x00000010: pop {r5, r6, pc}
+// 0x00000012: .cfi_restore_state
+// 0x00000012: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kArm64[] = {
+ 0xE0, 0x0F, 0x1C, 0xB8, 0xF3, 0xD3, 0x02, 0xA9, 0xFE, 0x1F, 0x00, 0xF9,
+ 0xE8, 0xA7, 0x01, 0x6D, 0xE8, 0xA7, 0x41, 0x6D, 0xF3, 0xD3, 0x42, 0xA9,
+ 0xFE, 0x1F, 0x40, 0xF9, 0xFF, 0x03, 0x01, 0x91, 0xC0, 0x03, 0x5F, 0xD6,
+};
+static constexpr uint8_t expected_cfi_kArm64[] = {
+ 0x44, 0x0E, 0x40, 0x44, 0x93, 0x06, 0x94, 0x04, 0x44, 0x9E, 0x02, 0x44,
+ 0x05, 0x48, 0x0A, 0x05, 0x49, 0x08, 0x0A, 0x44, 0x06, 0x48, 0x06, 0x49,
+ 0x44, 0xD3, 0xD4, 0x44, 0xDE, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0x40,
+};
+// 0x00000000: str w0, [sp, #-64]!
+// 0x00000004: .cfi_def_cfa_offset: 64
+// 0x00000004: stp x19, x20, [sp, #40]
+// 0x00000008: .cfi_offset: r19 at cfa-24
+// 0x00000008: .cfi_offset: r20 at cfa-16
+// 0x00000008: str lr, [sp, #56]
+// 0x0000000c: .cfi_offset: r30 at cfa-8
+// 0x0000000c: stp d8, d9, [sp, #24]
+// 0x00000010: .cfi_offset_extended: r72 at cfa-40
+// 0x00000010: .cfi_offset_extended: r73 at cfa-32
+// 0x00000010: .cfi_remember_state
+// 0x00000010: ldp d8, d9, [sp, #24]
+// 0x00000014: .cfi_restore_extended: r72
+// 0x00000014: .cfi_restore_extended: r73
+// 0x00000014: ldp x19, x20, [sp, #40]
+// 0x00000018: .cfi_restore: r19
+// 0x00000018: .cfi_restore: r20
+// 0x00000018: ldr lr, [sp, #56]
+// 0x0000001c: .cfi_restore: r30
+// 0x0000001c: add sp, sp, #0x40 (64)
+// 0x00000020: .cfi_def_cfa_offset: 0
+// 0x00000020: ret
+// 0x00000024: .cfi_restore_state
+// 0x00000024: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kX86[] = {
+ 0x56, 0x55, 0x83, 0xEC, 0x34, 0x89, 0x04, 0x24, 0x83, 0xC4, 0x34, 0x5D,
+ 0x5E, 0xC3,
+};
+static constexpr uint8_t expected_cfi_kX86[] = {
+ 0x41, 0x0E, 0x08, 0x86, 0x02, 0x41, 0x0E, 0x0C, 0x85, 0x03, 0x43, 0x0E,
+ 0x40, 0x43, 0x0A, 0x43, 0x0E, 0x0C, 0x41, 0x0E, 0x08, 0xC5, 0x41, 0x0E,
+ 0x04, 0xC6, 0x41, 0x0B, 0x0E, 0x40,
+};
+// 0x00000000: push esi
+// 0x00000001: .cfi_def_cfa_offset: 8
+// 0x00000001: .cfi_offset: r6 at cfa-8
+// 0x00000001: push ebp
+// 0x00000002: .cfi_def_cfa_offset: 12
+// 0x00000002: .cfi_offset: r5 at cfa-12
+// 0x00000002: sub esp, 52
+// 0x00000005: .cfi_def_cfa_offset: 64
+// 0x00000005: mov [esp], eax
+// 0x00000008: .cfi_remember_state
+// 0x00000008: add esp, 52
+// 0x0000000b: .cfi_def_cfa_offset: 12
+// 0x0000000b: pop ebp
+// 0x0000000c: .cfi_def_cfa_offset: 8
+// 0x0000000c: .cfi_restore: r5
+// 0x0000000c: pop esi
+// 0x0000000d: .cfi_def_cfa_offset: 4
+// 0x0000000d: .cfi_restore: r6
+// 0x0000000d: ret
+// 0x0000000e: .cfi_restore_state
+// 0x0000000e: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kX86_64[] = {
+ 0x55, 0x53, 0x48, 0x83, 0xEC, 0x28, 0xF2, 0x44, 0x0F, 0x11, 0x6C, 0x24,
+ 0x20, 0xF2, 0x44, 0x0F, 0x11, 0x64, 0x24, 0x18, 0x89, 0x3C, 0x24, 0xF2,
+ 0x44, 0x0F, 0x10, 0x64, 0x24, 0x18, 0xF2, 0x44, 0x0F, 0x10, 0x6C, 0x24,
+ 0x20, 0x48, 0x83, 0xC4, 0x28, 0x5B, 0x5D, 0xC3,
+};
+static constexpr uint8_t expected_cfi_kX86_64[] = {
+ 0x41, 0x0E, 0x10, 0x86, 0x04, 0x41, 0x0E, 0x18, 0x83, 0x06, 0x44, 0x0E,
+ 0x40, 0x47, 0x9E, 0x08, 0x47, 0x9D, 0x0A, 0x43, 0x0A, 0x47, 0xDD, 0x47,
+ 0xDE, 0x44, 0x0E, 0x18, 0x41, 0x0E, 0x10, 0xC3, 0x41, 0x0E, 0x08, 0xC6,
+ 0x41, 0x0B, 0x0E, 0x40,
+};
+// 0x00000000: push rbp
+// 0x00000001: .cfi_def_cfa_offset: 16
+// 0x00000001: .cfi_offset: r6 at cfa-16
+// 0x00000001: push rbx
+// 0x00000002: .cfi_def_cfa_offset: 24
+// 0x00000002: .cfi_offset: r3 at cfa-24
+// 0x00000002: subq rsp, 40
+// 0x00000006: .cfi_def_cfa_offset: 64
+// 0x00000006: movsd [rsp + 32], xmm13
+// 0x0000000d: .cfi_offset: r30 at cfa-32
+// 0x0000000d: movsd [rsp + 24], xmm12
+// 0x00000014: .cfi_offset: r29 at cfa-40
+// 0x00000014: mov [rsp], edi
+// 0x00000017: .cfi_remember_state
+// 0x00000017: movsd xmm12, [rsp + 24]
+// 0x0000001e: .cfi_restore: r29
+// 0x0000001e: movsd xmm13, [rsp + 32]
+// 0x00000025: .cfi_restore: r30
+// 0x00000025: addq rsp, 40
+// 0x00000029: .cfi_def_cfa_offset: 24
+// 0x00000029: pop rbx
+// 0x0000002a: .cfi_def_cfa_offset: 16
+// 0x0000002a: .cfi_restore: r3
+// 0x0000002a: pop rbp
+// 0x0000002b: .cfi_def_cfa_offset: 8
+// 0x0000002b: .cfi_restore: r6
+// 0x0000002b: ret
+// 0x0000002c: .cfi_restore_state
+// 0x0000002c: .cfi_def_cfa_offset: 64
+
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index b2f9c65153..0e02212867 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -26,11 +26,13 @@
#include "bounds_check_elimination.h"
#include "builder.h"
#include "code_generator.h"
+#include "compiled_method.h"
#include "compiler.h"
#include "constant_folding.h"
#include "dead_code_elimination.h"
#include "dex/quick/dex_file_to_method_inliner_map.h"
#include "driver/compiler_driver.h"
+#include "driver/compiler_options.h"
#include "driver/dex_compilation_unit.h"
#include "elf_writer_quick.h"
#include "graph_visualizer.h"
@@ -48,6 +50,7 @@
#include "ssa_builder.h"
#include "ssa_phi_elimination.h"
#include "ssa_liveness_analysis.h"
+#include "utils/assembler.h"
#include "reference_type_propagation.h"
namespace art {
@@ -94,10 +97,13 @@ class PassInfoPrinter : public ValueObject {
timing_logger_enabled_(compiler_driver->GetDumpPasses()),
timing_logger_(method_name, true, true),
visualizer_enabled_(!compiler_driver->GetDumpCfgFileName().empty()),
- visualizer_(visualizer_output, graph, codegen, method_name_) {
+ visualizer_(visualizer_output, graph, codegen) {
if (strstr(method_name, kStringFilter) == nullptr) {
timing_logger_enabled_ = visualizer_enabled_ = false;
}
+ if (visualizer_enabled_) {
+ visualizer_.PrintHeader(method_name_);
+ }
}
~PassInfoPrinter() {
@@ -199,8 +205,13 @@ class OptimizingCompiler FINAL : public Compiler {
const std::vector<const art::DexFile*>& dex_files,
const std::string& android_root,
bool is_host) const OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
- return art::ElfWriterQuick32::Create(file, oat_writer, dex_files, android_root, is_host,
- *GetCompilerDriver());
+ if (kProduce64BitELFFiles && Is64BitInstructionSet(GetCompilerDriver()->GetInstructionSet())) {
+ return art::ElfWriterQuick64::Create(file, oat_writer, dex_files, android_root, is_host,
+ *GetCompilerDriver());
+ } else {
+ return art::ElfWriterQuick32::Create(file, oat_writer, dex_files, android_root, is_host,
+ *GetCompilerDriver());
+ }
}
void InitCompilationUnit(CompilationUnit& cu) const OVERRIDE;
@@ -360,6 +371,9 @@ static ArrayRef<const uint8_t> AlignVectorSize(std::vector<uint8_t>& vector) {
return ArrayRef<const uint8_t>(vector);
}
+// TODO: The function below uses too much stack space.
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wframe-larger-than="
CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph,
CodeGenerator* codegen,
@@ -385,12 +399,17 @@ CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph,
CodeVectorAllocator allocator;
codegen->CompileOptimized(&allocator);
+ DefaultSrcMap src_mapping_table;
+ if (compiler_driver->GetCompilerOptions().GetIncludeDebugSymbols()) {
+ codegen->BuildSourceMap(&src_mapping_table);
+ }
+
std::vector<uint8_t> stack_map;
codegen->BuildStackMaps(&stack_map);
compilation_stats_.RecordStat(MethodCompilationStat::kCompiledOptimized);
- return CompiledMethod::SwapAllocCompiledMethodStackMap(
+ return CompiledMethod::SwapAllocCompiledMethod(
compiler_driver,
codegen->GetInstructionSet(),
ArrayRef<const uint8_t>(allocator.GetMemory()),
@@ -400,9 +419,15 @@ CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph,
codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(),
codegen->GetCoreSpillMask(),
codegen->GetFpuSpillMask(),
- ArrayRef<const uint8_t>(stack_map));
+ &src_mapping_table,
+ ArrayRef<const uint8_t>(), // mapping_table.
+ ArrayRef<const uint8_t>(stack_map),
+ ArrayRef<const uint8_t>(), // native_gc_map.
+ ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()),
+ ArrayRef<const LinkerPatch>());
}
+#pragma GCC diagnostic pop
CompiledMethod* OptimizingCompiler::CompileBaseline(
CodeGenerator* codegen,
@@ -412,9 +437,11 @@ CompiledMethod* OptimizingCompiler::CompileBaseline(
codegen->CompileBaseline(&allocator);
std::vector<uint8_t> mapping_table;
+ codegen->BuildMappingTable(&mapping_table);
DefaultSrcMap src_mapping_table;
- bool include_debug_symbol = compiler_driver->GetCompilerOptions().GetIncludeDebugSymbols();
- codegen->BuildMappingTable(&mapping_table, include_debug_symbol ? &src_mapping_table : nullptr);
+ if (compiler_driver->GetCompilerOptions().GetIncludeDebugSymbols()) {
+ codegen->BuildSourceMap(&src_mapping_table);
+ }
std::vector<uint8_t> vmap_table;
codegen->BuildVMapTable(&vmap_table);
std::vector<uint8_t> gc_map;
@@ -435,7 +462,8 @@ CompiledMethod* OptimizingCompiler::CompileBaseline(
AlignVectorSize(mapping_table),
AlignVectorSize(vmap_table),
AlignVectorSize(gc_map),
- ArrayRef<const uint8_t>());
+ ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()),
+ ArrayRef<const LinkerPatch>());
}
CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_item,
@@ -501,6 +529,8 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite
compilation_stats_.RecordStat(MethodCompilationStat::kNotCompiledNoCodegen);
return nullptr;
}
+ codegen->GetAssembler()->cfi().SetEnabled(
+ compiler_driver->GetCompilerOptions().GetIncludeDebugSymbols());
PassInfoPrinter pass_info_printer(graph,
method_name.c_str(),
diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc
index 7d0641ec13..4936685367 100644
--- a/compiler/optimizing/parallel_move_resolver.cc
+++ b/compiler/optimizing/parallel_move_resolver.cc
@@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+#include <iostream>
#include "parallel_move_resolver.h"
#include "nodes.h"
@@ -63,39 +64,42 @@ void ParallelMoveResolver::BuildInitialMoveList(HParallelMove* parallel_move) {
}
}
+Location LowOf(Location location) {
+ if (location.IsRegisterPair()) {
+ return Location::RegisterLocation(location.low());
+ } else if (location.IsFpuRegisterPair()) {
+ return Location::FpuRegisterLocation(location.low());
+ } else if (location.IsDoubleStackSlot()) {
+ return Location::StackSlot(location.GetStackIndex());
+ } else {
+ return Location::NoLocation();
+ }
+}
+
+Location HighOf(Location location) {
+ if (location.IsRegisterPair()) {
+ return Location::RegisterLocation(location.high());
+ } else if (location.IsFpuRegisterPair()) {
+ return Location::FpuRegisterLocation(location.high());
+ } else if (location.IsDoubleStackSlot()) {
+ return Location::StackSlot(location.GetHighStackIndex(4));
+ } else {
+ return Location::NoLocation();
+ }
+}
+
// Update the source of `move`, knowing that `updated_location` has been swapped
// with `new_source`. Note that `updated_location` can be a pair, therefore if
// `move` is non-pair, we need to extract which register to use.
static void UpdateSourceOf(MoveOperands* move, Location updated_location, Location new_source) {
Location source = move->GetSource();
- if (new_source.GetKind() == source.GetKind()) {
- DCHECK(updated_location.Equals(source));
- move->SetSource(new_source);
- } else if (new_source.IsStackSlot()
- || new_source.IsDoubleStackSlot()
- || source.IsStackSlot()
- || source.IsDoubleStackSlot()) {
- // Stack slots never take part of a pair/non-pair swap.
- DCHECK(updated_location.Equals(source));
+ if (LowOf(updated_location).Equals(source)) {
+ move->SetSource(LowOf(new_source));
+ } else if (HighOf(updated_location).Equals(source)) {
+ move->SetSource(HighOf(new_source));
+ } else {
+ DCHECK(updated_location.Equals(source)) << updated_location << " " << source;
move->SetSource(new_source);
- } else if (source.IsRegister()) {
- DCHECK(new_source.IsRegisterPair()) << new_source;
- DCHECK(updated_location.IsRegisterPair()) << updated_location;
- if (updated_location.low() == source.reg()) {
- move->SetSource(Location::RegisterLocation(new_source.low()));
- } else {
- DCHECK_EQ(updated_location.high(), source.reg());
- move->SetSource(Location::RegisterLocation(new_source.high()));
- }
- } else if (source.IsFpuRegister()) {
- DCHECK(new_source.IsFpuRegisterPair()) << new_source;
- DCHECK(updated_location.IsFpuRegisterPair()) << updated_location;
- if (updated_location.low() == source.reg()) {
- move->SetSource(Location::FpuRegisterLocation(new_source.low()));
- } else {
- DCHECK_EQ(updated_location.high(), source.reg());
- move->SetSource(Location::FpuRegisterLocation(new_source.high()));
- }
}
}
@@ -265,6 +269,20 @@ int ParallelMoveResolver::AllocateScratchRegister(int blocked,
}
+int ParallelMoveResolver::AllocateScratchRegister(int blocked,
+ int register_count) {
+ int scratch = -1;
+ for (int reg = 0; reg < register_count; ++reg) {
+ if ((blocked != reg) && IsScratchLocation(Location::RegisterLocation(reg))) {
+ scratch = reg;
+ break;
+ }
+ }
+
+ return scratch;
+}
+
+
ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope(
ParallelMoveResolver* resolver, int blocked, int if_scratch, int number_of_registers)
: resolver_(resolver),
@@ -278,6 +296,16 @@ ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope(
}
+ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope(
+ ParallelMoveResolver* resolver, int blocked, int number_of_registers)
+ : resolver_(resolver),
+ reg_(kNoRegister),
+ spilled_(false) {
+ // We don't want to spill a register if none are free.
+ reg_ = resolver_->AllocateScratchRegister(blocked, number_of_registers);
+}
+
+
ParallelMoveResolver::ScratchRegisterScope::~ScratchRegisterScope() {
if (spilled_) {
resolver_->RestoreScratch(reg_);
diff --git a/compiler/optimizing/parallel_move_resolver.h b/compiler/optimizing/parallel_move_resolver.h
index 3fa1b37afd..173cffc71e 100644
--- a/compiler/optimizing/parallel_move_resolver.h
+++ b/compiler/optimizing/parallel_move_resolver.h
@@ -42,10 +42,15 @@ class ParallelMoveResolver : public ValueObject {
protected:
class ScratchRegisterScope : public ValueObject {
public:
+ // Spill a scratch register if no regs are free.
ScratchRegisterScope(ParallelMoveResolver* resolver,
int blocked,
int if_scratch,
int number_of_registers);
+ // Grab a scratch register only if available.
+ ScratchRegisterScope(ParallelMoveResolver* resolver,
+ int blocked,
+ int number_of_registers);
~ScratchRegisterScope();
int GetRegister() const { return reg_; }
@@ -62,6 +67,8 @@ class ParallelMoveResolver : public ValueObject {
// Allocate a scratch register for performing a move. The method will try to use
// a register that is the destination of a move, but that move has not been emitted yet.
int AllocateScratchRegister(int blocked, int if_scratch, int register_count, bool* spilled);
+ // As above, but return -1 if no free register.
+ int AllocateScratchRegister(int blocked, int register_count);
// Emit a move.
virtual void EmitMove(size_t index) = 0;
diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc
index 817a44b184..5c502f7ef4 100644
--- a/compiler/optimizing/parallel_move_test.cc
+++ b/compiler/optimizing/parallel_move_test.cc
@@ -31,8 +31,13 @@ class TestParallelMoveResolver : public ParallelMoveResolver {
message_ << "C";
} else if (location.IsPair()) {
message_ << location.low() << "," << location.high();
- } else {
+ } else if (location.IsRegister()) {
message_ << location.reg();
+ } else if (location.IsStackSlot()) {
+ message_ << location.GetStackIndex() << "(sp)";
+ } else {
+ message_ << "2x" << location.GetStackIndex() << "(sp)";
+ DCHECK(location.IsDoubleStackSlot()) << location;
}
}
@@ -279,6 +284,26 @@ TEST(ParallelMoveTest, Pairs) {
resolver.EmitNativeCode(moves);
ASSERT_STREQ("(0,1 <-> 2,3)", resolver.GetMessage().c_str());
}
+
+ {
+ // Test involving registers used in single context and pair context.
+ TestParallelMoveResolver resolver(&allocator);
+ HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
+ moves->AddMove(
+ Location::RegisterLocation(10),
+ Location::RegisterLocation(5),
+ nullptr);
+ moves->AddMove(
+ Location::RegisterPairLocation(4, 5),
+ Location::DoubleStackSlot(32),
+ nullptr);
+ moves->AddMove(
+ Location::DoubleStackSlot(32),
+ Location::RegisterPairLocation(10, 11),
+ nullptr);
+ resolver.EmitNativeCode(moves);
+ ASSERT_STREQ("(2x32(sp) <-> 10,11) (4,5 <-> 2x32(sp)) (4 -> 5)", resolver.GetMessage().c_str());
+ }
}
} // namespace art
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index 2d9a2bf330..f5d8d82571 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -60,11 +60,11 @@ void PrepareForRegisterAllocation::VisitClinitCheck(HClinitCheck* check) {
void PrepareForRegisterAllocation::VisitCondition(HCondition* condition) {
bool needs_materialization = false;
- if (!condition->GetUses().HasOnlyOneUse()) {
+ if (!condition->GetUses().HasOnlyOneUse() || !condition->GetEnvUses().IsEmpty()) {
needs_materialization = true;
} else {
HInstruction* user = condition->GetUses().GetFirst()->GetUser();
- if (!user->IsIf()) {
+ if (!user->IsIf() && !user->IsDeoptimize()) {
needs_materialization = true;
} else {
// TODO: if there is no intervening instructions with side-effect between this condition
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index cf38bd3f8c..4bca43499f 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -1408,26 +1408,36 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) {
// Walk over all uses covered by this interval, and update the location
// information.
- while (use != nullptr && use->GetPosition() <= current->GetEnd()) {
- LocationSummary* locations = use->GetUser()->GetLocations();
- if (use->GetIsEnvironment()) {
- locations->SetEnvironmentAt(use->GetInputIndex(), source);
- } else {
- Location expected_location = locations->InAt(use->GetInputIndex());
- // The expected (actual) location may be invalid in case the input is unused. Currently
- // this only happens for intrinsics.
- if (expected_location.IsValid()) {
- if (expected_location.IsUnallocated()) {
- locations->SetInAt(use->GetInputIndex(), source);
- } else if (!expected_location.IsConstant()) {
- AddInputMoveFor(interval->GetDefinedBy(), use->GetUser(), source, expected_location);
- }
+
+ LiveRange* range = current->GetFirstRange();
+ while (range != nullptr) {
+ while (use != nullptr && use->GetPosition() < range->GetStart()) {
+ DCHECK(use->GetIsEnvironment());
+ use = use->GetNext();
+ }
+ while (use != nullptr && use->GetPosition() <= range->GetEnd()) {
+ DCHECK(current->Covers(use->GetPosition()) || (use->GetPosition() == range->GetEnd()));
+ LocationSummary* locations = use->GetUser()->GetLocations();
+ if (use->GetIsEnvironment()) {
+ locations->SetEnvironmentAt(use->GetInputIndex(), source);
} else {
- DCHECK(use->GetUser()->IsInvoke());
- DCHECK(use->GetUser()->AsInvoke()->GetIntrinsic() != Intrinsics::kNone);
+ Location expected_location = locations->InAt(use->GetInputIndex());
+ // The expected (actual) location may be invalid in case the input is unused. Currently
+ // this only happens for intrinsics.
+ if (expected_location.IsValid()) {
+ if (expected_location.IsUnallocated()) {
+ locations->SetInAt(use->GetInputIndex(), source);
+ } else if (!expected_location.IsConstant()) {
+ AddInputMoveFor(interval->GetDefinedBy(), use->GetUser(), source, expected_location);
+ }
+ } else {
+ DCHECK(use->GetUser()->IsInvoke());
+ DCHECK(use->GetUser()->AsInvoke()->GetIntrinsic() != Intrinsics::kNone);
+ }
}
+ use = use->GetNext();
}
- use = use->GetNext();
+ range = range->GetNext();
}
// If the next interval starts just after this one, and has a register,
@@ -1503,7 +1513,15 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) {
}
current = next_sibling;
} while (current != nullptr);
- DCHECK(use == nullptr);
+
+ if (kIsDebugBuild) {
+ // Following uses can only be environment uses. The location for
+ // these environments will be none.
+ while (use != nullptr) {
+ DCHECK(use->GetIsEnvironment());
+ use = use->GetNext();
+ }
+ }
}
void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval,
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index 7c3a0357d6..3951439881 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -14,6 +14,7 @@
* limitations under the License.
*/
+#include "arch/x86/instruction_set_features_x86.h"
#include "base/arena_allocator.h"
#include "builder.h"
#include "code_generator.h"
@@ -42,7 +43,9 @@ static bool Check(const uint16_t* data) {
const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
builder.BuildGraph(*item);
graph->TryBuildingSsa();
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
RegisterAllocator register_allocator(&allocator, &codegen, liveness);
@@ -58,7 +61,9 @@ TEST(RegisterAllocatorTest, ValidateIntervals) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
HGraph* graph = new (&allocator) HGraph(&allocator);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
GrowableArray<LiveInterval*> intervals(&allocator, 0);
// Test with two intervals of the same range.
@@ -298,7 +303,9 @@ TEST(RegisterAllocatorTest, Loop3) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
HGraph* graph = BuildSSAGraph(data, &allocator);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
RegisterAllocator register_allocator(&allocator, &codegen, liveness);
@@ -330,7 +337,9 @@ TEST(RegisterAllocatorTest, FirstRegisterUse) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
HGraph* graph = BuildSSAGraph(data, &allocator);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -383,7 +392,9 @@ TEST(RegisterAllocatorTest, DeadPhi) {
ArenaAllocator allocator(&pool);
HGraph* graph = BuildSSAGraph(data, &allocator);
SsaDeadPhiElimination(graph).Run();
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
RegisterAllocator register_allocator(&allocator, &codegen, liveness);
@@ -405,7 +416,9 @@ TEST(RegisterAllocatorTest, FreeUntil) {
ArenaAllocator allocator(&pool);
HGraph* graph = BuildSSAGraph(data, &allocator);
SsaDeadPhiElimination(graph).Run();
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
RegisterAllocator register_allocator(&allocator, &codegen, liveness);
@@ -507,7 +520,9 @@ TEST(RegisterAllocatorTest, PhiHint) {
{
HGraph* graph = BuildIfElseWithPhi(&allocator, &phi, &input1, &input2);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -522,7 +537,9 @@ TEST(RegisterAllocatorTest, PhiHint) {
{
HGraph* graph = BuildIfElseWithPhi(&allocator, &phi, &input1, &input2);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -539,7 +556,9 @@ TEST(RegisterAllocatorTest, PhiHint) {
{
HGraph* graph = BuildIfElseWithPhi(&allocator, &phi, &input1, &input2);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -556,7 +575,9 @@ TEST(RegisterAllocatorTest, PhiHint) {
{
HGraph* graph = BuildIfElseWithPhi(&allocator, &phi, &input1, &input2);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -608,7 +629,9 @@ TEST(RegisterAllocatorTest, ExpectedInRegisterHint) {
{
HGraph* graph = BuildFieldReturn(&allocator, &field, &ret);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -621,7 +644,9 @@ TEST(RegisterAllocatorTest, ExpectedInRegisterHint) {
{
HGraph* graph = BuildFieldReturn(&allocator, &field, &ret);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -671,7 +696,9 @@ TEST(RegisterAllocatorTest, SameAsFirstInputHint) {
{
HGraph* graph = BuildTwoSubs(&allocator, &first_sub, &second_sub);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -685,7 +712,9 @@ TEST(RegisterAllocatorTest, SameAsFirstInputHint) {
{
HGraph* graph = BuildTwoSubs(&allocator, &first_sub, &second_sub);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -734,7 +763,9 @@ TEST(RegisterAllocatorTest, ExpectedExactInRegisterAndSameOutputHint) {
{
HGraph* graph = BuildDiv(&allocator, &div);
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -822,7 +853,9 @@ TEST(RegisterAllocatorTest, SpillInactive) {
locations = new (&allocator) LocationSummary(fourth->GetDefinedBy(), LocationSummary::kNoCall);
locations->SetOut(Location::RequiresRegister());
- x86::CodeGeneratorX86 codegen(graph, CompilerOptions());
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
SsaLivenessAnalysis liveness(*graph, &codegen);
RegisterAllocator register_allocator(&allocator, &codegen, liveness);
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index fcc4e69b37..e154ea4ee6 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -487,7 +487,7 @@ void SsaBuilder::VisitInstruction(HInstruction* instruction) {
HEnvironment* environment = new (GetGraph()->GetArena()) HEnvironment(
GetGraph()->GetArena(), current_locals_->Size());
environment->CopyFrom(current_locals_);
- instruction->SetEnvironment(environment);
+ instruction->SetRawEnvironment(environment);
}
void SsaBuilder::VisitTemporary(HTemporary* temp) {
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index 0f3973e5fb..95da6ef551 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -218,28 +218,34 @@ void SsaLivenessAnalysis::ComputeLiveRanges() {
current->GetLiveInterval()->SetFrom(current->GetLifetimePosition());
}
- // All inputs of an instruction must be live.
- for (size_t i = 0, e = current->InputCount(); i < e; ++i) {
- HInstruction* input = current->InputAt(i);
- // Some instructions 'inline' their inputs, that is they do not need
- // to be materialized.
- if (input->HasSsaIndex()) {
- live_in->SetBit(input->GetSsaIndex());
- input->GetLiveInterval()->AddUse(current, i, false);
- }
- }
-
+ // Process the environment first, because we know their uses come after
+ // or at the same liveness position of inputs.
if (current->HasEnvironment()) {
// Handle environment uses. See statements (b) and (c) of the
// SsaLivenessAnalysis.
HEnvironment* environment = current->GetEnvironment();
for (size_t i = 0, e = environment->Size(); i < e; ++i) {
HInstruction* instruction = environment->GetInstructionAt(i);
- if (ShouldBeLiveForEnvironment(instruction)) {
+ bool should_be_live = ShouldBeLiveForEnvironment(instruction);
+ if (should_be_live) {
DCHECK(instruction->HasSsaIndex());
live_in->SetBit(instruction->GetSsaIndex());
- instruction->GetLiveInterval()->AddUse(current, i, true);
}
+ if (instruction != nullptr) {
+ instruction->GetLiveInterval()->AddUse(
+ current, i, /* is_environment */ true, should_be_live);
+ }
+ }
+ }
+
+ // All inputs of an instruction must be live.
+ for (size_t i = 0, e = current->InputCount(); i < e; ++i) {
+ HInstruction* input = current->InputAt(i);
+ // Some instructions 'inline' their inputs, that is they do not need
+ // to be materialized.
+ if (input->HasSsaIndex()) {
+ live_in->SetBit(input->GetSsaIndex());
+ input->GetLiveInterval()->AddUse(current, i, /* is_environment */ false);
}
}
}
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index bc78dc2e76..d2da84c0c0 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -189,7 +189,10 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
AddRange(position, position + 1);
}
- void AddUse(HInstruction* instruction, size_t input_index, bool is_environment) {
+ void AddUse(HInstruction* instruction,
+ size_t input_index,
+ bool is_environment,
+ bool keep_alive = false) {
// Set the use within the instruction.
size_t position = instruction->GetLifetimePosition() + 1;
LocationSummary* locations = instruction->GetLocations();
@@ -211,6 +214,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
&& (first_use_->GetPosition() < position)) {
// The user uses the instruction multiple times, and one use dies before the other.
// We update the use list so that the latter is first.
+ DCHECK(!is_environment);
UsePosition* cursor = first_use_;
while ((cursor->GetNext() != nullptr) && (cursor->GetNext()->GetPosition() < position)) {
cursor = cursor->GetNext();
@@ -225,6 +229,15 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
return;
}
+ first_use_ = new (allocator_) UsePosition(
+ instruction, input_index, is_environment, position, first_use_);
+
+ if (is_environment && !keep_alive) {
+ // If this environment use does not keep the instruction live, it does not
+ // affect the live range of that instruction.
+ return;
+ }
+
size_t start_block_position = instruction->GetBlock()->GetLifetimeStart();
if (first_range_ == nullptr) {
// First time we see a use of that interval.
@@ -246,8 +259,6 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
// and the check line 205 would succeed.
first_range_ = new (allocator_) LiveRange(start_block_position, position, first_range_);
}
- first_use_ = new (allocator_) UsePosition(
- instruction, input_index, is_environment, position, first_use_);
}
void AddPhiUse(HInstruction* instruction, size_t input_index, HBasicBlock* block) {
@@ -425,9 +436,11 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
UsePosition* use = first_use_;
size_t end = GetEnd();
while (use != nullptr && use->GetPosition() <= end) {
- size_t use_position = use->GetPosition();
- if (use_position > position) {
- return use_position;
+ if (!use->GetIsEnvironment()) {
+ size_t use_position = use->GetPosition();
+ if (use_position > position) {
+ return use_position;
+ }
}
use = use->GetNext();
}
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index 5818a37a46..a73c8d77f3 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -27,6 +27,32 @@
namespace art {
+// Helper to build art::StackMapStream::LocationCatalogEntriesIndices.
+class LocationCatalogEntriesIndicesEmptyFn {
+ public:
+ void MakeEmpty(std::pair<DexRegisterLocation, size_t>& item) const {
+ item.first = DexRegisterLocation::None();
+ }
+ bool IsEmpty(const std::pair<DexRegisterLocation, size_t>& item) const {
+ return item.first == DexRegisterLocation::None();
+ }
+};
+
+// Hash function for art::StackMapStream::LocationCatalogEntriesIndices.
+// This hash function does not create collisions.
+class DexRegisterLocationHashFn {
+ public:
+ size_t operator()(DexRegisterLocation key) const {
+ // Concatenate `key`s fields to create a 64-bit value to be hashed.
+ int64_t kind_and_value =
+ (static_cast<int64_t>(key.kind_) << 32) | static_cast<int64_t>(key.value_);
+ return inner_hash_fn_(kind_and_value);
+ }
+ private:
+ std::hash<int64_t> inner_hash_fn_;
+};
+
+
/**
* Collects and builds stack maps for a method. All the stack maps
* for a method are placed in a CodeInfo object.
@@ -36,11 +62,13 @@ class StackMapStream : public ValueObject {
explicit StackMapStream(ArenaAllocator* allocator)
: allocator_(allocator),
stack_maps_(allocator, 10),
+ location_catalog_entries_(allocator, 4),
dex_register_locations_(allocator, 10 * 4),
inline_infos_(allocator, 2),
stack_mask_max_(-1),
dex_pc_max_(0),
native_pc_offset_max_(0),
+ register_mask_max_(0),
number_of_stack_maps_with_inline_info_(0),
dex_map_hash_to_stack_map_indices_(std::less<uint32_t>(), allocator->Adapter()) {}
@@ -101,6 +129,7 @@ class StackMapStream : public ValueObject {
dex_pc_max_ = std::max(dex_pc_max_, dex_pc);
native_pc_offset_max_ = std::max(native_pc_offset_max_, native_pc_offset);
+ register_mask_max_ = std::max(register_mask_max_, register_mask);
}
void AddInlineInfoEntry(uint32_t method_index) {
@@ -111,6 +140,7 @@ class StackMapStream : public ValueObject {
size_t ComputeNeededSize() {
size_t size = CodeInfo::kFixedSize
+ + ComputeDexRegisterLocationCatalogSize()
+ ComputeStackMapsSize()
+ ComputeDexRegisterMapsSize()
+ ComputeInlineInfoSize();
@@ -128,24 +158,43 @@ class StackMapStream : public ValueObject {
ComputeInlineInfoSize(),
ComputeDexRegisterMapsSize(),
dex_pc_max_,
- native_pc_offset_max_);
+ native_pc_offset_max_,
+ register_mask_max_);
+ }
+
+ // Compute the size of the Dex register location catalog of `entry`.
+ size_t ComputeDexRegisterLocationCatalogSize() const {
+ size_t size = DexRegisterLocationCatalog::kFixedSize;
+ for (size_t location_catalog_entry_index = 0;
+ location_catalog_entry_index < location_catalog_entries_.Size();
+ ++location_catalog_entry_index) {
+ DexRegisterLocation dex_register_location =
+ location_catalog_entries_.Get(location_catalog_entry_index);
+ size += DexRegisterLocationCatalog::EntrySize(dex_register_location);
+ }
+ return size;
}
- // Compute the size of the Dex register map of `entry`.
size_t ComputeDexRegisterMapSize(const StackMapEntry& entry) const {
+ // Size of the map in bytes.
size_t size = DexRegisterMap::kFixedSize;
- // Add the bit mask for the dex register liveness.
- size += DexRegisterMap::LiveBitMaskSize(entry.num_dex_registers);
- for (size_t dex_register_number = 0, index_in_dex_register_locations = 0;
+ // Add the live bit mask for the Dex register liveness.
+ size += DexRegisterMap::GetLiveBitMaskSize(entry.num_dex_registers);
+ // Compute the size of the set of live Dex register entries.
+ size_t number_of_live_dex_registers = 0;
+ for (size_t dex_register_number = 0;
dex_register_number < entry.num_dex_registers;
++dex_register_number) {
if (entry.live_dex_registers_mask->IsBitSet(dex_register_number)) {
- DexRegisterLocation dex_register_location = dex_register_locations_.Get(
- entry.dex_register_locations_start_index + index_in_dex_register_locations);
- size += DexRegisterMap::EntrySize(dex_register_location);
- index_in_dex_register_locations++;
+ ++number_of_live_dex_registers;
}
}
+ size_t map_entries_size_in_bits =
+ DexRegisterMap::SingleEntrySizeInBits(location_catalog_entries_.Size())
+ * number_of_live_dex_registers;
+ size_t map_entries_size_in_bytes =
+ RoundUp(map_entries_size_in_bits, kBitsPerByte) / kBitsPerByte;
+ size += map_entries_size_in_bytes;
return size;
}
@@ -168,8 +217,16 @@ class StackMapStream : public ValueObject {
+ (number_of_stack_maps_with_inline_info_ * InlineInfo::kFixedSize);
}
+ size_t ComputeDexRegisterLocationCatalogStart() const {
+ return CodeInfo::kFixedSize;
+ }
+
+ size_t ComputeStackMapsStart() const {
+ return ComputeDexRegisterLocationCatalogStart() + ComputeDexRegisterLocationCatalogSize();
+ }
+
size_t ComputeDexRegisterMapsStart() {
- return CodeInfo::kFixedSize + ComputeStackMapsSize();
+ return ComputeStackMapsStart() + ComputeStackMapsSize();
}
size_t ComputeInlineInfoStart() {
@@ -194,11 +251,32 @@ class StackMapStream : public ValueObject {
ComputeInlineInfoStart(),
inline_info_size);
- code_info.SetEncoding(
- inline_info_size, dex_register_map_size, dex_pc_max_, native_pc_offset_max_);
+ code_info.SetEncoding(inline_info_size,
+ dex_register_map_size,
+ dex_pc_max_,
+ native_pc_offset_max_,
+ register_mask_max_);
code_info.SetNumberOfStackMaps(stack_maps_.Size());
code_info.SetStackMaskSize(stack_mask_size);
- DCHECK_EQ(code_info.StackMapsSize(), ComputeStackMapsSize());
+ DCHECK_EQ(code_info.GetStackMapsSize(), ComputeStackMapsSize());
+
+ // Set the Dex register location catalog.
+ code_info.SetNumberOfDexRegisterLocationCatalogEntries(
+ location_catalog_entries_.Size());
+ MemoryRegion dex_register_location_catalog_region = region.Subregion(
+ ComputeDexRegisterLocationCatalogStart(),
+ ComputeDexRegisterLocationCatalogSize());
+ DexRegisterLocationCatalog dex_register_location_catalog(dex_register_location_catalog_region);
+ // Offset in `dex_register_location_catalog` where to store the next
+ // register location.
+ size_t location_catalog_offset = DexRegisterLocationCatalog::kFixedSize;
+ for (size_t i = 0, e = location_catalog_entries_.Size(); i < e; ++i) {
+ DexRegisterLocation dex_register_location = location_catalog_entries_.Get(i);
+ dex_register_location_catalog.SetRegisterInfo(location_catalog_offset, dex_register_location);
+ location_catalog_offset += DexRegisterLocationCatalog::EntrySize(dex_register_location);
+ }
+ // Ensure we reached the end of the Dex registers location_catalog.
+ DCHECK_EQ(location_catalog_offset, dex_register_location_catalog_region.size());
uintptr_t next_dex_register_map_offset = 0;
uintptr_t next_inline_info_offset = 0;
@@ -234,25 +312,25 @@ class StackMapStream : public ValueObject {
stack_map.SetDexRegisterMapOffset(
code_info, register_region.start() - dex_register_locations_region.start());
- // Offset in `dex_register_map` where to store the next register entry.
- size_t offset = DexRegisterMap::kFixedSize;
- dex_register_map.SetLiveBitMask(offset,
- entry.num_dex_registers,
- *entry.live_dex_registers_mask);
- offset += DexRegisterMap::LiveBitMaskSize(entry.num_dex_registers);
+ // Set the live bit mask.
+ dex_register_map.SetLiveBitMask(entry.num_dex_registers, *entry.live_dex_registers_mask);
+
+ // Set the dex register location mapping data.
for (size_t dex_register_number = 0, index_in_dex_register_locations = 0;
dex_register_number < entry.num_dex_registers;
++dex_register_number) {
if (entry.live_dex_registers_mask->IsBitSet(dex_register_number)) {
- DexRegisterLocation dex_register_location = dex_register_locations_.Get(
- entry.dex_register_locations_start_index + index_in_dex_register_locations);
- dex_register_map.SetRegisterInfo(offset, dex_register_location);
- offset += DexRegisterMap::EntrySize(dex_register_location);
+ size_t location_catalog_entry_index =
+ dex_register_locations_.Get(entry.dex_register_locations_start_index
+ + index_in_dex_register_locations);
+ dex_register_map.SetLocationCatalogEntryIndex(
+ index_in_dex_register_locations,
+ location_catalog_entry_index,
+ entry.num_dex_registers,
+ location_catalog_entries_.Size());
++index_in_dex_register_locations;
}
}
- // Ensure we reached the end of the Dex registers region.
- DCHECK_EQ(offset, register_region.size());
}
}
@@ -282,12 +360,31 @@ class StackMapStream : public ValueObject {
}
void AddDexRegisterEntry(uint16_t dex_register, DexRegisterLocation::Kind kind, int32_t value) {
+ StackMapEntry entry = stack_maps_.Get(stack_maps_.Size() - 1);
+ DCHECK_LT(dex_register, entry.num_dex_registers);
+
if (kind != DexRegisterLocation::Kind::kNone) {
// Ensure we only use non-compressed location kind at this stage.
DCHECK(DexRegisterLocation::IsShortLocationKind(kind))
<< DexRegisterLocation::PrettyDescriptor(kind);
- dex_register_locations_.Add(DexRegisterLocation(kind, value));
- StackMapEntry entry = stack_maps_.Get(stack_maps_.Size() - 1);
+ DexRegisterLocation location(kind, value);
+
+ // Look for Dex register `location` in the location catalog (using the
+ // companion hash map of locations to indices). Use its index if it
+ // is already in the location catalog. If not, insert it (in the
+ // location catalog and the hash map) and use the newly created index.
+ auto it = location_catalog_entries_indices_.Find(location);
+ if (it != location_catalog_entries_indices_.end()) {
+ // Retrieve the index from the hash map.
+ dex_register_locations_.Add(it->second);
+ } else {
+ // Create a new entry in the location catalog and the hash map.
+ size_t index = location_catalog_entries_.Size();
+ location_catalog_entries_.Add(location);
+ dex_register_locations_.Add(index);
+ location_catalog_entries_indices_.Insert(std::make_pair(location, index));
+ }
+
entry.live_dex_registers_mask->SetBit(dex_register);
entry.dex_register_map_hash += (1 << dex_register);
entry.dex_register_map_hash += static_cast<uint32_t>(value);
@@ -354,9 +451,9 @@ class StackMapStream : public ValueObject {
return false;
}
if (a.live_dex_registers_mask->IsBitSet(i)) {
- DexRegisterLocation a_loc = dex_register_locations_.Get(
+ size_t a_loc = dex_register_locations_.Get(
a.dex_register_locations_start_index + index_in_dex_register_locations);
- DexRegisterLocation b_loc = dex_register_locations_.Get(
+ size_t b_loc = dex_register_locations_.Get(
b.dex_register_locations_start_index + index_in_dex_register_locations);
if (a_loc != b_loc) {
return false;
@@ -369,21 +466,29 @@ class StackMapStream : public ValueObject {
ArenaAllocator* allocator_;
GrowableArray<StackMapEntry> stack_maps_;
- GrowableArray<DexRegisterLocation> dex_register_locations_;
+
+ // A catalog of unique [location_kind, register_value] pairs (per method).
+ GrowableArray<DexRegisterLocation> location_catalog_entries_;
+ // Map from Dex register location catalog entries to their indices in the
+ // location catalog.
+ typedef HashMap<DexRegisterLocation, size_t, LocationCatalogEntriesIndicesEmptyFn,
+ DexRegisterLocationHashFn> LocationCatalogEntriesIndices;
+ LocationCatalogEntriesIndices location_catalog_entries_indices_;
+
+ // A set of concatenated maps of Dex register locations indices to
+ // `location_catalog_entries_`.
+ GrowableArray<size_t> dex_register_locations_;
GrowableArray<InlineInfoEntry> inline_infos_;
int stack_mask_max_;
uint32_t dex_pc_max_;
uint32_t native_pc_offset_max_;
+ uint32_t register_mask_max_;
size_t number_of_stack_maps_with_inline_info_;
ArenaSafeMap<uint32_t, GrowableArray<uint32_t>> dex_map_hash_to_stack_map_indices_;
static constexpr uint32_t kNoSameDexMapFound = -1;
- ART_FRIEND_TEST(StackMapTest, Test1);
- ART_FRIEND_TEST(StackMapTest, Test2);
- ART_FRIEND_TEST(StackMapTest, TestNonLiveDexRegisters);
-
DISALLOW_COPY_AND_ASSIGN(StackMapStream);
};
diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc
index e5a9790254..8d160bc81e 100644
--- a/compiler/optimizing/stack_map_test.cc
+++ b/compiler/optimizing/stack_map_test.cc
@@ -31,6 +31,8 @@ static bool SameBits(MemoryRegion region, const BitVector& bit_vector) {
return true;
}
+using Kind = DexRegisterLocation::Kind;
+
TEST(StackMapTest, Test1) {
ArenaPool pool;
ArenaAllocator arena(&pool);
@@ -39,8 +41,8 @@ TEST(StackMapTest, Test1) {
ArenaBitVector sp_mask(&arena, 0, false);
size_t number_of_dex_registers = 2;
stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
- stream.AddDexRegisterEntry(0, DexRegisterLocation::Kind::kInStack, 0);
- stream.AddDexRegisterEntry(1, DexRegisterLocation::Kind::kConstant, -2);
+ stream.AddDexRegisterEntry(0, Kind::kInStack, 0); // Short location.
+ stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Short location.
size_t size = stream.ComputeNeededSize();
void* memory = arena.Alloc(size, kArenaAllocMisc);
@@ -51,6 +53,16 @@ TEST(StackMapTest, Test1) {
ASSERT_EQ(0u, code_info.GetStackMaskSize());
ASSERT_EQ(1u, code_info.GetNumberOfStackMaps());
+ uint32_t number_of_location_catalog_entries =
+ code_info.GetNumberOfDexRegisterLocationCatalogEntries();
+ ASSERT_EQ(2u, number_of_location_catalog_entries);
+ DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog();
+ // The Dex register location catalog contains:
+ // - one 1-byte short Dex register location, and
+ // - one 5-byte large Dex register location.
+ size_t expected_location_catalog_size = 1u + 5u;
+ ASSERT_EQ(expected_location_catalog_size, location_catalog.Size());
+
StackMap stack_map = code_info.GetStackMapAt(0);
ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0)));
ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64)));
@@ -62,14 +74,40 @@ TEST(StackMapTest, Test1) {
ASSERT_TRUE(SameBits(stack_mask, sp_mask));
ASSERT_TRUE(stack_map.HasDexRegisterMap(code_info));
- DexRegisterMap dex_registers = code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers);
- ASSERT_EQ(7u, dex_registers.Size());
- DexRegisterLocation location0 = dex_registers.GetLocationKindAndValue(0, number_of_dex_registers);
- DexRegisterLocation location1 = dex_registers.GetLocationKindAndValue(1, number_of_dex_registers);
- ASSERT_EQ(DexRegisterLocation::Kind::kInStack, location0.GetKind());
- ASSERT_EQ(DexRegisterLocation::Kind::kConstant, location1.GetKind());
- ASSERT_EQ(DexRegisterLocation::Kind::kInStack, location0.GetInternalKind());
- ASSERT_EQ(DexRegisterLocation::Kind::kConstantLargeValue, location1.GetInternalKind());
+ DexRegisterMap dex_register_map =
+ code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers);
+ ASSERT_TRUE(dex_register_map.IsDexRegisterLive(0));
+ ASSERT_TRUE(dex_register_map.IsDexRegisterLive(1));
+ ASSERT_EQ(2u, dex_register_map.GetNumberOfLiveDexRegisters(number_of_dex_registers));
+ // The Dex register map contains:
+ // - one 1-byte live bit mask, and
+ // - one 1-byte set of location catalog entry indices composed of two 2-bit values.
+ size_t expected_dex_register_map_size = 1u + 1u;
+ ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size());
+
+ ASSERT_EQ(Kind::kInStack,
+ dex_register_map.GetLocationKind(0, number_of_dex_registers, code_info));
+ ASSERT_EQ(Kind::kConstant,
+ dex_register_map.GetLocationKind(1, number_of_dex_registers, code_info));
+ ASSERT_EQ(Kind::kInStack,
+ dex_register_map.GetLocationInternalKind(0, number_of_dex_registers, code_info));
+ ASSERT_EQ(Kind::kConstantLargeValue,
+ dex_register_map.GetLocationInternalKind(1, number_of_dex_registers, code_info));
+ ASSERT_EQ(0, dex_register_map.GetStackOffsetInBytes(0, number_of_dex_registers, code_info));
+ ASSERT_EQ(-2, dex_register_map.GetConstant(1, number_of_dex_registers, code_info));
+
+ size_t index0 = dex_register_map.GetLocationCatalogEntryIndex(
+ 0, number_of_dex_registers, number_of_location_catalog_entries);
+ size_t index1 = dex_register_map.GetLocationCatalogEntryIndex(
+ 1, number_of_dex_registers, number_of_location_catalog_entries);
+ ASSERT_EQ(0u, index0);
+ ASSERT_EQ(1u, index1);
+ DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0);
+ DexRegisterLocation location1 = location_catalog.GetDexRegisterLocation(index1);
+ ASSERT_EQ(Kind::kInStack, location0.GetKind());
+ ASSERT_EQ(Kind::kConstant, location1.GetKind());
+ ASSERT_EQ(Kind::kInStack, location0.GetInternalKind());
+ ASSERT_EQ(Kind::kConstantLargeValue, location1.GetInternalKind());
ASSERT_EQ(0, location0.GetValue());
ASSERT_EQ(-2, location1.GetValue());
@@ -86,8 +124,8 @@ TEST(StackMapTest, Test2) {
sp_mask1.SetBit(4);
size_t number_of_dex_registers = 2;
stream.AddStackMapEntry(0, 64, 0x3, &sp_mask1, number_of_dex_registers, 2);
- stream.AddDexRegisterEntry(0, DexRegisterLocation::Kind::kInStack, 0);
- stream.AddDexRegisterEntry(1, DexRegisterLocation::Kind::kConstant, -2);
+ stream.AddDexRegisterEntry(0, Kind::kInStack, 0); // Short location.
+ stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Large location.
stream.AddInlineInfoEntry(42);
stream.AddInlineInfoEntry(82);
@@ -95,8 +133,8 @@ TEST(StackMapTest, Test2) {
sp_mask2.SetBit(3);
sp_mask1.SetBit(8);
stream.AddStackMapEntry(1, 128, 0xFF, &sp_mask2, number_of_dex_registers, 0);
- stream.AddDexRegisterEntry(0, DexRegisterLocation::Kind::kInRegister, 18);
- stream.AddDexRegisterEntry(1, DexRegisterLocation::Kind::kInFpuRegister, 3);
+ stream.AddDexRegisterEntry(0, Kind::kInRegister, 18); // Short location.
+ stream.AddDexRegisterEntry(1, Kind::kInFpuRegister, 3); // Short location.
size_t size = stream.ComputeNeededSize();
void* memory = arena.Alloc(size, kArenaAllocMisc);
@@ -107,6 +145,16 @@ TEST(StackMapTest, Test2) {
ASSERT_EQ(1u, code_info.GetStackMaskSize());
ASSERT_EQ(2u, code_info.GetNumberOfStackMaps());
+ uint32_t number_of_location_catalog_entries =
+ code_info.GetNumberOfDexRegisterLocationCatalogEntries();
+ ASSERT_EQ(4u, number_of_location_catalog_entries);
+ DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog();
+ // The Dex register location catalog contains:
+ // - three 1-byte short Dex register locations, and
+ // - one 5-byte large Dex register location.
+ size_t expected_location_catalog_size = 3u * 1u + 5u;
+ ASSERT_EQ(expected_location_catalog_size, location_catalog.Size());
+
// First stack map.
{
StackMap stack_map = code_info.GetStackMapAt(0);
@@ -120,17 +168,40 @@ TEST(StackMapTest, Test2) {
ASSERT_TRUE(SameBits(stack_mask, sp_mask1));
ASSERT_TRUE(stack_map.HasDexRegisterMap(code_info));
- DexRegisterMap dex_registers =
+ DexRegisterMap dex_register_map =
code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers);
- ASSERT_EQ(7u, dex_registers.Size());
- DexRegisterLocation location0 =
- dex_registers.GetLocationKindAndValue(0, number_of_dex_registers);
- DexRegisterLocation location1 =
- dex_registers.GetLocationKindAndValue(1, number_of_dex_registers);
- ASSERT_EQ(DexRegisterLocation::Kind::kInStack, location0.GetKind());
- ASSERT_EQ(DexRegisterLocation::Kind::kConstant, location1.GetKind());
- ASSERT_EQ(DexRegisterLocation::Kind::kInStack, location0.GetInternalKind());
- ASSERT_EQ(DexRegisterLocation::Kind::kConstantLargeValue, location1.GetInternalKind());
+ ASSERT_TRUE(dex_register_map.IsDexRegisterLive(0));
+ ASSERT_TRUE(dex_register_map.IsDexRegisterLive(1));
+ ASSERT_EQ(2u, dex_register_map.GetNumberOfLiveDexRegisters(number_of_dex_registers));
+ // The Dex register map contains:
+ // - one 1-byte live bit mask, and
+ // - one 1-byte set of location catalog entry indices composed of two 2-bit values.
+ size_t expected_dex_register_map_size = 1u + 1u;
+ ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size());
+
+ ASSERT_EQ(Kind::kInStack,
+ dex_register_map.GetLocationKind(0, number_of_dex_registers, code_info));
+ ASSERT_EQ(Kind::kConstant,
+ dex_register_map.GetLocationKind(1, number_of_dex_registers, code_info));
+ ASSERT_EQ(Kind::kInStack,
+ dex_register_map.GetLocationInternalKind(0, number_of_dex_registers, code_info));
+ ASSERT_EQ(Kind::kConstantLargeValue,
+ dex_register_map.GetLocationInternalKind(1, number_of_dex_registers, code_info));
+ ASSERT_EQ(0, dex_register_map.GetStackOffsetInBytes(0, number_of_dex_registers, code_info));
+ ASSERT_EQ(-2, dex_register_map.GetConstant(1, number_of_dex_registers, code_info));
+
+ size_t index0 = dex_register_map.GetLocationCatalogEntryIndex(
+ 0, number_of_dex_registers, number_of_location_catalog_entries);
+ size_t index1 = dex_register_map.GetLocationCatalogEntryIndex(
+ 1, number_of_dex_registers, number_of_location_catalog_entries);
+ ASSERT_EQ(0u, index0);
+ ASSERT_EQ(1u, index1);
+ DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0);
+ DexRegisterLocation location1 = location_catalog.GetDexRegisterLocation(index1);
+ ASSERT_EQ(Kind::kInStack, location0.GetKind());
+ ASSERT_EQ(Kind::kConstant, location1.GetKind());
+ ASSERT_EQ(Kind::kInStack, location0.GetInternalKind());
+ ASSERT_EQ(Kind::kConstantLargeValue, location1.GetInternalKind());
ASSERT_EQ(0, location0.GetValue());
ASSERT_EQ(-2, location1.GetValue());
@@ -154,17 +225,40 @@ TEST(StackMapTest, Test2) {
ASSERT_TRUE(SameBits(stack_mask, sp_mask2));
ASSERT_TRUE(stack_map.HasDexRegisterMap(code_info));
- DexRegisterMap dex_registers =
+ DexRegisterMap dex_register_map =
code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers);
- ASSERT_EQ(3u, dex_registers.Size());
- DexRegisterLocation location0 =
- dex_registers.GetLocationKindAndValue(0, number_of_dex_registers);
- DexRegisterLocation location1 =
- dex_registers.GetLocationKindAndValue(1, number_of_dex_registers);
- ASSERT_EQ(DexRegisterLocation::Kind::kInRegister, location0.GetKind());
- ASSERT_EQ(DexRegisterLocation::Kind::kInFpuRegister, location1.GetKind());
- ASSERT_EQ(DexRegisterLocation::Kind::kInRegister, location0.GetInternalKind());
- ASSERT_EQ(DexRegisterLocation::Kind::kInFpuRegister, location1.GetInternalKind());
+ ASSERT_TRUE(dex_register_map.IsDexRegisterLive(0));
+ ASSERT_TRUE(dex_register_map.IsDexRegisterLive(1));
+ ASSERT_EQ(2u, dex_register_map.GetNumberOfLiveDexRegisters(number_of_dex_registers));
+ // The Dex register map contains:
+ // - one 1-byte live bit mask, and
+ // - one 1-byte set of location catalog entry indices composed of two 2-bit values.
+ size_t expected_dex_register_map_size = 1u + 1u;
+ ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size());
+
+ ASSERT_EQ(Kind::kInRegister,
+ dex_register_map.GetLocationKind(0, number_of_dex_registers, code_info));
+ ASSERT_EQ(Kind::kInFpuRegister,
+ dex_register_map.GetLocationKind(1, number_of_dex_registers, code_info));
+ ASSERT_EQ(Kind::kInRegister,
+ dex_register_map.GetLocationInternalKind(0, number_of_dex_registers, code_info));
+ ASSERT_EQ(Kind::kInFpuRegister,
+ dex_register_map.GetLocationInternalKind(1, number_of_dex_registers, code_info));
+ ASSERT_EQ(18, dex_register_map.GetMachineRegister(0, number_of_dex_registers, code_info));
+ ASSERT_EQ(3, dex_register_map.GetMachineRegister(1, number_of_dex_registers, code_info));
+
+ size_t index0 = dex_register_map.GetLocationCatalogEntryIndex(
+ 0, number_of_dex_registers, number_of_location_catalog_entries);
+ size_t index1 = dex_register_map.GetLocationCatalogEntryIndex(
+ 1, number_of_dex_registers, number_of_location_catalog_entries);
+ ASSERT_EQ(2u, index0);
+ ASSERT_EQ(3u, index1);
+ DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0);
+ DexRegisterLocation location1 = location_catalog.GetDexRegisterLocation(index1);
+ ASSERT_EQ(Kind::kInRegister, location0.GetKind());
+ ASSERT_EQ(Kind::kInFpuRegister, location1.GetKind());
+ ASSERT_EQ(Kind::kInRegister, location0.GetInternalKind());
+ ASSERT_EQ(Kind::kInFpuRegister, location1.GetInternalKind());
ASSERT_EQ(18, location0.GetValue());
ASSERT_EQ(3, location1.GetValue());
@@ -180,8 +274,8 @@ TEST(StackMapTest, TestNonLiveDexRegisters) {
ArenaBitVector sp_mask(&arena, 0, false);
uint32_t number_of_dex_registers = 2;
stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
- stream.AddDexRegisterEntry(0, DexRegisterLocation::Kind::kNone, 0);
- stream.AddDexRegisterEntry(1, DexRegisterLocation::Kind::kConstant, -2);
+ stream.AddDexRegisterEntry(0, Kind::kNone, 0); // No location.
+ stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Large location.
size_t size = stream.ComputeNeededSize();
void* memory = arena.Alloc(size, kArenaAllocMisc);
@@ -189,14 +283,62 @@ TEST(StackMapTest, TestNonLiveDexRegisters) {
stream.FillIn(region);
CodeInfo code_info(region);
+ ASSERT_EQ(0u, code_info.GetStackMaskSize());
+ ASSERT_EQ(1u, code_info.GetNumberOfStackMaps());
+
+ uint32_t number_of_location_catalog_entries =
+ code_info.GetNumberOfDexRegisterLocationCatalogEntries();
+ ASSERT_EQ(1u, number_of_location_catalog_entries);
+ DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog();
+ // The Dex register location catalog contains:
+ // - one 5-byte large Dex register location.
+ size_t expected_location_catalog_size = 5u;
+ ASSERT_EQ(expected_location_catalog_size, location_catalog.Size());
+
StackMap stack_map = code_info.GetStackMapAt(0);
+ ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0)));
+ ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64)));
+ ASSERT_EQ(0u, stack_map.GetDexPc(code_info));
+ ASSERT_EQ(64u, stack_map.GetNativePcOffset(code_info));
+ ASSERT_EQ(0x3u, stack_map.GetRegisterMask(code_info));
+
ASSERT_TRUE(stack_map.HasDexRegisterMap(code_info));
- DexRegisterMap dex_registers = code_info.GetDexRegisterMapOf(stack_map, 2);
- ASSERT_EQ(DexRegisterLocation::Kind::kNone,
- dex_registers.GetLocationKind(0, number_of_dex_registers));
- ASSERT_EQ(DexRegisterLocation::Kind::kConstant,
- dex_registers.GetLocationKind(1, number_of_dex_registers));
- ASSERT_EQ(-2, dex_registers.GetConstant(1, number_of_dex_registers));
+ DexRegisterMap dex_register_map =
+ code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers);
+ ASSERT_FALSE(dex_register_map.IsDexRegisterLive(0));
+ ASSERT_TRUE(dex_register_map.IsDexRegisterLive(1));
+ ASSERT_EQ(1u, dex_register_map.GetNumberOfLiveDexRegisters(number_of_dex_registers));
+ // The Dex register map contains:
+ // - one 1-byte live bit mask.
+ // No space is allocated for the sole location catalog entry index, as it is useless.
+ size_t expected_dex_register_map_size = 1u + 0u;
+ ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size());
+
+ ASSERT_EQ(Kind::kNone,
+ dex_register_map.GetLocationKind(0, number_of_dex_registers, code_info));
+ ASSERT_EQ(Kind::kConstant,
+ dex_register_map.GetLocationKind(1, number_of_dex_registers, code_info));
+ ASSERT_EQ(Kind::kNone,
+ dex_register_map.GetLocationInternalKind(0, number_of_dex_registers, code_info));
+ ASSERT_EQ(Kind::kConstantLargeValue,
+ dex_register_map.GetLocationInternalKind(1, number_of_dex_registers, code_info));
+ ASSERT_EQ(-2, dex_register_map.GetConstant(1, number_of_dex_registers, code_info));
+
+ size_t index0 = dex_register_map.GetLocationCatalogEntryIndex(
+ 0, number_of_dex_registers, number_of_location_catalog_entries);
+ size_t index1 = dex_register_map.GetLocationCatalogEntryIndex(
+ 1, number_of_dex_registers, number_of_location_catalog_entries);
+ ASSERT_EQ(DexRegisterLocationCatalog::kNoLocationEntryIndex, index0);
+ ASSERT_EQ(0u, index1);
+ DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0);
+ DexRegisterLocation location1 = location_catalog.GetDexRegisterLocation(index1);
+ ASSERT_EQ(Kind::kNone, location0.GetKind());
+ ASSERT_EQ(Kind::kConstant, location1.GetKind());
+ ASSERT_EQ(Kind::kNone, location0.GetInternalKind());
+ ASSERT_EQ(Kind::kConstantLargeValue, location1.GetInternalKind());
+ ASSERT_EQ(0, location0.GetValue());
+ ASSERT_EQ(-2, location1.GetValue());
+
ASSERT_FALSE(stack_map.HasInlineInfo(code_info));
}
@@ -209,14 +351,21 @@ TEST(StackMapTest, DexRegisterMapOffsetOverflow) {
StackMapStream stream(&arena);
ArenaBitVector sp_mask(&arena, 0, false);
- uint32_t number_of_dex_registers = 0xEA;
+ uint32_t number_of_dex_registers = 1024;
+ // Create the first stack map (and its Dex register map).
stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
- for (uint32_t i = 0; i < number_of_dex_registers - 9; ++i) {
- stream.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kConstant, 0);
+ uint32_t number_of_dex_live_registers_in_dex_register_map_0 = number_of_dex_registers - 8;
+ for (uint32_t i = 0; i < number_of_dex_live_registers_in_dex_register_map_0; ++i) {
+ // Use two different Dex register locations to populate this map,
+ // as using a single value (in the whole CodeInfo object) would
+ // make this Dex register mapping data empty (see
+ // art::DexRegisterMap::SingleEntrySizeInBits).
+ stream.AddDexRegisterEntry(i, Kind::kConstant, i % 2); // Short location.
}
+ // Create the second stack map (and its Dex register map).
stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
for (uint32_t i = 0; i < number_of_dex_registers; ++i) {
- stream.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kConstant, 0);
+ stream.AddDexRegisterEntry(i, Kind::kConstant, 0); // Short location.
}
size_t size = stream.ComputeNeededSize();
@@ -225,10 +374,35 @@ TEST(StackMapTest, DexRegisterMapOffsetOverflow) {
stream.FillIn(region);
CodeInfo code_info(region);
- StackMap stack_map = code_info.GetStackMapAt(1);
- ASSERT_TRUE(stack_map.HasDexRegisterMap(code_info));
- ASSERT_NE(stack_map.GetDexRegisterMapOffset(code_info), StackMap::kNoDexRegisterMap);
- ASSERT_EQ(stack_map.GetDexRegisterMapOffset(code_info), StackMap::kNoDexRegisterMapSmallEncoding);
+ // The location catalog contains two entries (DexRegisterLocation(kConstant, 0)
+ // and DexRegisterLocation(kConstant, 1)), therefore the location catalog index
+ // has a size of 1 bit.
+ uint32_t number_of_location_catalog_entries =
+ code_info.GetNumberOfDexRegisterLocationCatalogEntries();
+ ASSERT_EQ(2u, number_of_location_catalog_entries);
+ ASSERT_EQ(1u, DexRegisterMap::SingleEntrySizeInBits(number_of_location_catalog_entries));
+
+ // The first Dex register map contains:
+ // - a live register bit mask for 1024 registers (that is, 128 bytes of
+ // data); and
+ // - Dex register mapping information for 1016 1-bit Dex (live) register
+ // locations (that is, 127 bytes of data).
+ // Hence it has a size of 255 bytes, and therefore...
+ ASSERT_EQ(128u, DexRegisterMap::GetLiveBitMaskSize(number_of_dex_registers));
+ StackMap stack_map0 = code_info.GetStackMapAt(0);
+ DexRegisterMap dex_register_map0 =
+ code_info.GetDexRegisterMapOf(stack_map0, number_of_dex_registers);
+ ASSERT_EQ(127u, dex_register_map0.GetLocationMappingDataSize(number_of_dex_registers,
+ number_of_location_catalog_entries));
+ ASSERT_EQ(255u, dex_register_map0.Size());
+
+ StackMap stack_map1 = code_info.GetStackMapAt(1);
+ ASSERT_TRUE(stack_map1.HasDexRegisterMap(code_info));
+ // ...the offset of the second Dex register map (relative to the
+ // beginning of the Dex register maps region) is 255 (i.e.,
+ // kNoDexRegisterMapSmallEncoding).
+ ASSERT_NE(stack_map1.GetDexRegisterMapOffset(code_info), StackMap::kNoDexRegisterMap);
+ ASSERT_EQ(stack_map1.GetDexRegisterMapOffset(code_info), 0xFFu);
}
TEST(StackMapTest, TestShareDexRegisterMap) {
@@ -240,16 +414,16 @@ TEST(StackMapTest, TestShareDexRegisterMap) {
uint32_t number_of_dex_registers = 2;
// First stack map.
stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
- stream.AddDexRegisterEntry(0, DexRegisterLocation::Kind::kInRegister, 0);
- stream.AddDexRegisterEntry(1, DexRegisterLocation::Kind::kConstant, -2);
+ stream.AddDexRegisterEntry(0, Kind::kInRegister, 0); // Short location.
+ stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Large location.
// Second stack map, which should share the same dex register map.
stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
- stream.AddDexRegisterEntry(0, DexRegisterLocation::Kind::kInRegister, 0);
- stream.AddDexRegisterEntry(1, DexRegisterLocation::Kind::kConstant, -2);
+ stream.AddDexRegisterEntry(0, Kind::kInRegister, 0); // Short location.
+ stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Large location.
// Third stack map (doesn't share the dex register map).
stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
- stream.AddDexRegisterEntry(0, DexRegisterLocation::Kind::kInRegister, 2);
- stream.AddDexRegisterEntry(1, DexRegisterLocation::Kind::kConstant, -2);
+ stream.AddDexRegisterEntry(0, Kind::kInRegister, 2); // Short location.
+ stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Large location.
size_t size = stream.ComputeNeededSize();
void* memory = arena.Alloc(size, kArenaAllocMisc);
@@ -260,20 +434,20 @@ TEST(StackMapTest, TestShareDexRegisterMap) {
// Verify first stack map.
StackMap sm0 = ci.GetStackMapAt(0);
DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm0, number_of_dex_registers);
- ASSERT_EQ(0, dex_registers0.GetMachineRegister(0, number_of_dex_registers));
- ASSERT_EQ(-2, dex_registers0.GetConstant(1, number_of_dex_registers));
+ ASSERT_EQ(0, dex_registers0.GetMachineRegister(0, number_of_dex_registers, ci));
+ ASSERT_EQ(-2, dex_registers0.GetConstant(1, number_of_dex_registers, ci));
// Verify second stack map.
StackMap sm1 = ci.GetStackMapAt(1);
DexRegisterMap dex_registers1 = ci.GetDexRegisterMapOf(sm1, number_of_dex_registers);
- ASSERT_EQ(0, dex_registers1.GetMachineRegister(0, number_of_dex_registers));
- ASSERT_EQ(-2, dex_registers1.GetConstant(1, number_of_dex_registers));
+ ASSERT_EQ(0, dex_registers1.GetMachineRegister(0, number_of_dex_registers, ci));
+ ASSERT_EQ(-2, dex_registers1.GetConstant(1, number_of_dex_registers, ci));
// Verify third stack map.
StackMap sm2 = ci.GetStackMapAt(2);
DexRegisterMap dex_registers2 = ci.GetDexRegisterMapOf(sm2, number_of_dex_registers);
- ASSERT_EQ(2, dex_registers2.GetMachineRegister(0, number_of_dex_registers));
- ASSERT_EQ(-2, dex_registers2.GetConstant(1, number_of_dex_registers));
+ ASSERT_EQ(2, dex_registers2.GetMachineRegister(0, number_of_dex_registers, ci));
+ ASSERT_EQ(-2, dex_registers2.GetConstant(1, number_of_dex_registers, ci));
// Verify dex register map offsets.
ASSERT_EQ(sm0.GetDexRegisterMapOffset(ci), sm1.GetDexRegisterMapOffset(ci));
@@ -281,4 +455,39 @@ TEST(StackMapTest, TestShareDexRegisterMap) {
ASSERT_NE(sm1.GetDexRegisterMapOffset(ci), sm2.GetDexRegisterMapOffset(ci));
}
+TEST(StackMapTest, TestNoDexRegisterMap) {
+ ArenaPool pool;
+ ArenaAllocator arena(&pool);
+ StackMapStream stream(&arena);
+
+ ArenaBitVector sp_mask(&arena, 0, false);
+ uint32_t number_of_dex_registers = 0;
+ stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
+
+ size_t size = stream.ComputeNeededSize();
+ void* memory = arena.Alloc(size, kArenaAllocMisc);
+ MemoryRegion region(memory, size);
+ stream.FillIn(region);
+
+ CodeInfo code_info(region);
+ ASSERT_EQ(0u, code_info.GetStackMaskSize());
+ ASSERT_EQ(1u, code_info.GetNumberOfStackMaps());
+
+ uint32_t number_of_location_catalog_entries =
+ code_info.GetNumberOfDexRegisterLocationCatalogEntries();
+ ASSERT_EQ(0u, number_of_location_catalog_entries);
+ DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog();
+ ASSERT_EQ(0u, location_catalog.Size());
+
+ StackMap stack_map = code_info.GetStackMapAt(0);
+ ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0)));
+ ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64)));
+ ASSERT_EQ(0u, stack_map.GetDexPc(code_info));
+ ASSERT_EQ(64u, stack_map.GetNativePcOffset(code_info));
+ ASSERT_EQ(0x3u, stack_map.GetRegisterMask(code_info));
+
+ ASSERT_FALSE(stack_map.HasDexRegisterMap(code_info));
+ ASSERT_FALSE(stack_map.HasInlineInfo(code_info));
+}
+
} // namespace art
diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc
index a02191bc13..c41066027d 100644
--- a/compiler/utils/arm/assembler_arm.cc
+++ b/compiler/utils/arm/assembler_arm.cc
@@ -89,7 +89,6 @@ uint32_t ShifterOperand::encodingArm() const {
} else {
return immed_;
}
- break;
case kRegister:
if (is_shift_) {
uint32_t shift_type;
@@ -121,7 +120,6 @@ uint32_t ShifterOperand::encodingArm() const {
// Simple register
return static_cast<uint32_t>(rm_);
}
- break;
default:
// Can't get here.
LOG(FATAL) << "Invalid shifter operand for ARM";
@@ -156,13 +154,11 @@ uint32_t ShifterOperand::encodingThumb() const {
// Simple register
return static_cast<uint32_t>(rm_);
}
- break;
default:
// Can't get here.
LOG(FATAL) << "Invalid shifter operand for thumb";
- return 0;
+ UNREACHABLE();
}
- return 0;
}
uint32_t Address::encodingArm() const {
@@ -374,40 +370,46 @@ void ArmAssembler::Pad(uint32_t bytes) {
}
}
+static dwarf::Reg DWARFReg(Register reg) {
+ return dwarf::Reg::ArmCore(static_cast<int>(reg));
+}
+
+static dwarf::Reg DWARFReg(SRegister reg) {
+ return dwarf::Reg::ArmFp(static_cast<int>(reg));
+}
+
constexpr size_t kFramePointerSize = 4;
void ArmAssembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
const std::vector<ManagedRegister>& callee_save_regs,
const ManagedRegisterEntrySpills& entry_spills) {
+ CHECK_EQ(buffer_.Size(), 0U); // Nothing emitted yet
CHECK_ALIGNED(frame_size, kStackAlignment);
CHECK_EQ(R0, method_reg.AsArm().AsCoreRegister());
// Push callee saves and link register.
- RegList push_list = 1 << LR;
- size_t pushed_values = 1;
- int32_t min_s = kNumberOfSRegisters;
- int32_t max_s = -1;
- for (size_t i = 0; i < callee_save_regs.size(); i++) {
- if (callee_save_regs.at(i).AsArm().IsCoreRegister()) {
- Register reg = callee_save_regs.at(i).AsArm().AsCoreRegister();
- push_list |= 1 << reg;
- pushed_values++;
+ RegList core_spill_mask = 1 << LR;
+ uint32_t fp_spill_mask = 0;
+ for (const ManagedRegister& reg : callee_save_regs) {
+ if (reg.AsArm().IsCoreRegister()) {
+ core_spill_mask |= 1 << reg.AsArm().AsCoreRegister();
} else {
- CHECK(callee_save_regs.at(i).AsArm().IsSRegister());
- min_s = std::min(static_cast<int>(callee_save_regs.at(i).AsArm().AsSRegister()), min_s);
- max_s = std::max(static_cast<int>(callee_save_regs.at(i).AsArm().AsSRegister()), max_s);
+ fp_spill_mask |= 1 << reg.AsArm().AsSRegister();
}
}
- PushList(push_list);
- if (max_s != -1) {
- pushed_values += 1 + max_s - min_s;
- vpushs(static_cast<SRegister>(min_s), 1 + max_s - min_s);
+ PushList(core_spill_mask);
+ cfi_.AdjustCFAOffset(POPCOUNT(core_spill_mask) * kFramePointerSize);
+ cfi_.RelOffsetForMany(DWARFReg(Register(0)), 0, core_spill_mask, kFramePointerSize);
+ if (fp_spill_mask != 0) {
+ vpushs(SRegister(CTZ(fp_spill_mask)), POPCOUNT(fp_spill_mask));
+ cfi_.AdjustCFAOffset(POPCOUNT(fp_spill_mask) * kFramePointerSize);
+ cfi_.RelOffsetForMany(DWARFReg(SRegister(0)), 0, fp_spill_mask, kFramePointerSize);
}
// Increase frame to required size.
+ int pushed_values = POPCOUNT(core_spill_mask) + POPCOUNT(fp_spill_mask);
CHECK_GT(frame_size, pushed_values * kFramePointerSize); // Must at least have space for Method*.
- size_t adjust = frame_size - (pushed_values * kFramePointerSize);
- IncreaseFrameSize(adjust);
+ IncreaseFrameSize(frame_size - pushed_values * kFramePointerSize); // handles CFI as well.
// Write out Method*.
StoreToOffset(kStoreWord, R0, SP, 0);
@@ -436,46 +438,46 @@ void ArmAssembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
void ArmAssembler::RemoveFrame(size_t frame_size,
const std::vector<ManagedRegister>& callee_save_regs) {
CHECK_ALIGNED(frame_size, kStackAlignment);
+ cfi_.RememberState();
+
// Compute callee saves to pop and PC.
- RegList pop_list = 1 << PC;
- size_t pop_values = 1;
- int32_t min_s = kNumberOfSRegisters;
- int32_t max_s = -1;
- for (size_t i = 0; i < callee_save_regs.size(); i++) {
- if (callee_save_regs.at(i).AsArm().IsCoreRegister()) {
- Register reg = callee_save_regs.at(i).AsArm().AsCoreRegister();
- pop_list |= 1 << reg;
- pop_values++;
+ RegList core_spill_mask = 1 << PC;
+ uint32_t fp_spill_mask = 0;
+ for (const ManagedRegister& reg : callee_save_regs) {
+ if (reg.AsArm().IsCoreRegister()) {
+ core_spill_mask |= 1 << reg.AsArm().AsCoreRegister();
} else {
- CHECK(callee_save_regs.at(i).AsArm().IsSRegister());
- min_s = std::min(static_cast<int>(callee_save_regs.at(i).AsArm().AsSRegister()), min_s);
- max_s = std::max(static_cast<int>(callee_save_regs.at(i).AsArm().AsSRegister()), max_s);
+ fp_spill_mask |= 1 << reg.AsArm().AsSRegister();
}
}
- if (max_s != -1) {
- pop_values += 1 + max_s - min_s;
- }
-
// Decrease frame to start of callee saves.
+ int pop_values = POPCOUNT(core_spill_mask) + POPCOUNT(fp_spill_mask);
CHECK_GT(frame_size, pop_values * kFramePointerSize);
- size_t adjust = frame_size - (pop_values * kFramePointerSize);
- DecreaseFrameSize(adjust);
+ DecreaseFrameSize(frame_size - (pop_values * kFramePointerSize)); // handles CFI as well.
- if (max_s != -1) {
- vpops(static_cast<SRegister>(min_s), 1 + max_s - min_s);
+ if (fp_spill_mask != 0) {
+ vpops(SRegister(CTZ(fp_spill_mask)), POPCOUNT(fp_spill_mask));
+ cfi_.AdjustCFAOffset(-kFramePointerSize * POPCOUNT(fp_spill_mask));
+ cfi_.RestoreMany(DWARFReg(SRegister(0)), fp_spill_mask);
}
// Pop callee saves and PC.
- PopList(pop_list);
+ PopList(core_spill_mask);
+
+ // The CFI should be restored for any code that follows the exit block.
+ cfi_.RestoreState();
+ cfi_.DefCFAOffset(frame_size);
}
void ArmAssembler::IncreaseFrameSize(size_t adjust) {
AddConstant(SP, -adjust);
+ cfi_.AdjustCFAOffset(adjust);
}
void ArmAssembler::DecreaseFrameSize(size_t adjust) {
AddConstant(SP, adjust);
+ cfi_.AdjustCFAOffset(-adjust);
}
void ArmAssembler::Store(FrameOffset dest, ManagedRegister msrc, size_t size) {
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index a894319c99..3b42f63509 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -373,24 +373,34 @@ void Thumb2Assembler::ldrsh(Register rd, const Address& ad, Condition cond) {
void Thumb2Assembler::ldrd(Register rd, const Address& ad, Condition cond) {
+ ldrd(rd, Register(rd + 1), ad, cond);
+}
+
+
+void Thumb2Assembler::ldrd(Register rd, Register rd2, const Address& ad, Condition cond) {
CheckCondition(cond);
- CHECK_EQ(rd % 2, 0);
+ // Encoding T1.
// This is different from other loads. The encoding is like ARM.
int32_t encoding = B31 | B30 | B29 | B27 | B22 | B20 |
static_cast<int32_t>(rd) << 12 |
- (static_cast<int32_t>(rd) + 1) << 8 |
+ static_cast<int32_t>(rd2) << 8 |
ad.encodingThumbLdrdStrd();
Emit32(encoding);
}
void Thumb2Assembler::strd(Register rd, const Address& ad, Condition cond) {
+ strd(rd, Register(rd + 1), ad, cond);
+}
+
+
+void Thumb2Assembler::strd(Register rd, Register rd2, const Address& ad, Condition cond) {
CheckCondition(cond);
- CHECK_EQ(rd % 2, 0);
+ // Encoding T1.
// This is different from other loads. The encoding is like ARM.
int32_t encoding = B31 | B30 | B29 | B27 | B22 |
static_cast<int32_t>(rd) << 12 |
- (static_cast<int32_t>(rd) + 1) << 8 |
+ static_cast<int32_t>(rd2) << 8 |
ad.encodingThumbLdrdStrd();
Emit32(encoding);
}
@@ -683,7 +693,7 @@ void Thumb2Assembler::Emit16(int16_t value) {
bool Thumb2Assembler::Is32BitDataProcessing(Condition cond ATTRIBUTE_UNUSED,
Opcode opcode,
- bool set_cc ATTRIBUTE_UNUSED,
+ bool set_cc,
Register rn,
Register rd,
const ShifterOperand& so) {
@@ -749,7 +759,6 @@ bool Thumb2Assembler::Is32BitDataProcessing(Condition cond ATTRIBUTE_UNUSED,
break;
case TEQ:
return true;
- break;
case ADD:
case SUB:
break;
@@ -2614,14 +2623,16 @@ void Thumb2Assembler::StoreToOffset(StoreOperandType type,
Register tmp_reg = kNoRegister;
if (!Address::CanHoldStoreOffsetThumb(type, offset)) {
CHECK_NE(base, IP);
- if (reg != IP) {
+ if (reg != IP &&
+ (type != kStoreWordPair || reg + 1 != IP)) {
tmp_reg = IP;
} else {
- // Be careful not to use IP twice (for `reg` and to build the
- // Address object used by the store instruction(s) below).
- // Instead, save R5 on the stack (or R6 if R5 is not available),
- // use it as secondary temporary register, and restore it after
- // the store instruction has been emitted.
+ // Be careful not to use IP twice (for `reg` (or `reg` + 1 in
+ // the case of a word-pair store)) and to build the Address
+ // object used by the store instruction(s) below). Instead,
+ // save R5 on the stack (or R6 if R5 is not available), use it
+ // as secondary temporary register, and restore it after the
+ // store instruction has been emitted.
tmp_reg = base != R5 ? R5 : R6;
Push(tmp_reg);
if (base == SP) {
diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h
index 81dd13894f..e33c240dbf 100644
--- a/compiler/utils/arm/assembler_thumb2.h
+++ b/compiler/utils/arm/assembler_thumb2.h
@@ -135,9 +135,17 @@ class Thumb2Assembler FINAL : public ArmAssembler {
void ldrsb(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
void ldrsh(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
+ // Load/store register dual instructions using registers `rd` and `rd` + 1.
void ldrd(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
void strd(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
+ // Load/store register dual instructions using registers `rd` and `rd2`.
+ // Note that contrary to the ARM A1 encoding, the Thumb-2 T1 encoding
+ // does not require `rd` to be even, nor `rd2' to be equal to `rd` + 1.
+ void ldrd(Register rd, Register rd2, const Address& ad, Condition cond);
+ void strd(Register rd, Register rd2, const Address& ad, Condition cond);
+
+
void ldm(BlockAddressMode am, Register base,
RegList regs, Condition cond = AL) OVERRIDE;
void stm(BlockAddressMode am, Register base,
diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc
index 813996b0db..5f5561a499 100644
--- a/compiler/utils/arm/assembler_thumb2_test.cc
+++ b/compiler/utils/arm/assembler_thumb2_test.cc
@@ -247,4 +247,103 @@ TEST_F(AssemblerThumb2Test, add) {
DriverStr(expected, "add");
}
+TEST_F(AssemblerThumb2Test, StoreWordToThumbOffset) {
+ arm::StoreOperandType type = arm::kStoreWord;
+ int32_t offset = 4092;
+ ASSERT_TRUE(arm::Address::CanHoldStoreOffsetThumb(type, offset));
+
+ __ StoreToOffset(type, arm::R0, arm::SP, offset);
+ __ StoreToOffset(type, arm::IP, arm::SP, offset);
+ __ StoreToOffset(type, arm::IP, arm::R5, offset);
+
+ const char* expected =
+ "str r0, [sp, #4092]\n"
+ "str ip, [sp, #4092]\n"
+ "str ip, [r5, #4092]\n";
+ DriverStr(expected, "StoreWordToThumbOffset");
+}
+
+TEST_F(AssemblerThumb2Test, StoreWordToNonThumbOffset) {
+ arm::StoreOperandType type = arm::kStoreWord;
+ int32_t offset = 4096;
+ ASSERT_FALSE(arm::Address::CanHoldStoreOffsetThumb(type, offset));
+
+ __ StoreToOffset(type, arm::R0, arm::SP, offset);
+ __ StoreToOffset(type, arm::IP, arm::SP, offset);
+ __ StoreToOffset(type, arm::IP, arm::R5, offset);
+
+ const char* expected =
+ "mov ip, #4096\n" // LoadImmediate(ip, 4096)
+ "add ip, ip, sp\n"
+ "str r0, [ip, #0]\n"
+
+ "str r5, [sp, #-4]!\n" // Push(r5)
+ "movw r5, #4100\n" // LoadImmediate(r5, 4096 + kRegisterSize)
+ "add r5, r5, sp\n"
+ "str ip, [r5, #0]\n"
+ "ldr r5, [sp], #4\n" // Pop(r5)
+
+ "str r6, [sp, #-4]!\n" // Push(r6)
+ "mov r6, #4096\n" // LoadImmediate(r6, 4096)
+ "add r6, r6, r5\n"
+ "str ip, [r6, #0]\n"
+ "ldr r6, [sp], #4\n"; // Pop(r6)
+ DriverStr(expected, "StoreWordToNonThumbOffset");
+}
+
+TEST_F(AssemblerThumb2Test, StoreWordPairToThumbOffset) {
+ arm::StoreOperandType type = arm::kStoreWordPair;
+ int32_t offset = 1020;
+ ASSERT_TRUE(arm::Address::CanHoldStoreOffsetThumb(type, offset));
+
+ __ StoreToOffset(type, arm::R0, arm::SP, offset);
+ // We cannot use IP (i.e. R12) as first source register, as it would
+ // force us to use SP (i.e. R13) as second source register, which
+ // would have an "unpredictable" effect according to the ARMv7
+ // specification (the T1 encoding describes the result as
+ // UNPREDICTABLE when of the source registers is R13).
+ //
+ // So we use (R11, IP) (e.g. (R11, R12)) as source registers in the
+ // following instructions.
+ __ StoreToOffset(type, arm::R11, arm::SP, offset);
+ __ StoreToOffset(type, arm::R11, arm::R5, offset);
+
+ const char* expected =
+ "strd r0, r1, [sp, #1020]\n"
+ "strd r11, ip, [sp, #1020]\n"
+ "strd r11, ip, [r5, #1020]\n";
+ DriverStr(expected, "StoreWordPairToThumbOffset");
+}
+
+TEST_F(AssemblerThumb2Test, StoreWordPairToNonThumbOffset) {
+ arm::StoreOperandType type = arm::kStoreWordPair;
+ int32_t offset = 1024;
+ ASSERT_FALSE(arm::Address::CanHoldStoreOffsetThumb(type, offset));
+
+ __ StoreToOffset(type, arm::R0, arm::SP, offset);
+ // Same comment as in AssemblerThumb2Test.StoreWordPairToThumbOffset
+ // regarding the use of (R11, IP) (e.g. (R11, R12)) as source
+ // registers in the following instructions.
+ __ StoreToOffset(type, arm::R11, arm::SP, offset);
+ __ StoreToOffset(type, arm::R11, arm::R5, offset);
+
+ const char* expected =
+ "mov ip, #1024\n" // LoadImmediate(ip, 1024)
+ "add ip, ip, sp\n"
+ "strd r0, r1, [ip, #0]\n"
+
+ "str r5, [sp, #-4]!\n" // Push(r5)
+ "movw r5, #1028\n" // LoadImmediate(r5, 1024 + kRegisterSize)
+ "add r5, r5, sp\n"
+ "strd r11, ip, [r5, #0]\n"
+ "ldr r5, [sp], #4\n" // Pop(r5)
+
+ "str r6, [sp, #-4]!\n" // Push(r6)
+ "mov r6, #1024\n" // LoadImmediate(r6, 1024)
+ "add r6, r6, r5\n"
+ "strd r11, ip, [r6, #0]\n"
+ "ldr r6, [sp], #4\n"; // Pop(r6)
+ DriverStr(expected, "StoreWordPairToNonThumbOffset");
+}
+
} // namespace art
diff --git a/compiler/utils/arm/managed_register_arm.h b/compiler/utils/arm/managed_register_arm.h
index a496c87150..5fde9e8856 100644
--- a/compiler/utils/arm/managed_register_arm.h
+++ b/compiler/utils/arm/managed_register_arm.h
@@ -19,6 +19,7 @@
#include "base/logging.h"
#include "constants_arm.h"
+#include "dwarf/register.h"
#include "utils/managed_register.h"
namespace art {
diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc
index 58c73674da..fbd04114e4 100644
--- a/compiler/utils/arm64/assembler_arm64.cc
+++ b/compiler/utils/arm64/assembler_arm64.cc
@@ -63,12 +63,14 @@ void Arm64Assembler::GetCurrentThread(FrameOffset offset, ManagedRegister /* scr
void Arm64Assembler::IncreaseFrameSize(size_t adjust) {
CHECK_ALIGNED(adjust, kStackAlignment);
AddConstant(SP, -adjust);
+ cfi().AdjustCFAOffset(adjust);
}
// See Arm64 PCS Section 5.2.2.1.
void Arm64Assembler::DecreaseFrameSize(size_t adjust) {
CHECK_ALIGNED(adjust, kStackAlignment);
AddConstant(SP, adjust);
+ cfi().AdjustCFAOffset(-adjust);
}
void Arm64Assembler::AddConstant(XRegister rd, int32_t value, Condition cond) {
@@ -638,6 +640,14 @@ void Arm64Assembler::EmitExceptionPoll(Arm64Exception *exception) {
___ Brk();
}
+static dwarf::Reg DWARFReg(XRegister reg) {
+ return dwarf::Reg::Arm64Core(static_cast<int>(reg));
+}
+
+static dwarf::Reg DWARFReg(DRegister reg) {
+ return dwarf::Reg::Arm64Fp(static_cast<int>(reg));
+}
+
constexpr size_t kFramePointerSize = 8;
constexpr unsigned int kJniRefSpillRegsSize = 11 + 8;
@@ -660,45 +670,20 @@ void Arm64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
// TUNING: Use stp.
// Note: Must match Arm64JniCallingConvention::CoreSpillMask().
size_t reg_offset = frame_size;
- reg_offset -= 8;
- StoreToOffset(LR, SP, reg_offset);
- reg_offset -= 8;
- StoreToOffset(X29, SP, reg_offset);
- reg_offset -= 8;
- StoreToOffset(X28, SP, reg_offset);
- reg_offset -= 8;
- StoreToOffset(X27, SP, reg_offset);
- reg_offset -= 8;
- StoreToOffset(X26, SP, reg_offset);
- reg_offset -= 8;
- StoreToOffset(X25, SP, reg_offset);
- reg_offset -= 8;
- StoreToOffset(X24, SP, reg_offset);
- reg_offset -= 8;
- StoreToOffset(X23, SP, reg_offset);
- reg_offset -= 8;
- StoreToOffset(X22, SP, reg_offset);
- reg_offset -= 8;
- StoreToOffset(X21, SP, reg_offset);
- reg_offset -= 8;
- StoreToOffset(X20, SP, reg_offset);
-
- reg_offset -= 8;
- StoreDToOffset(D15, SP, reg_offset);
- reg_offset -= 8;
- StoreDToOffset(D14, SP, reg_offset);
- reg_offset -= 8;
- StoreDToOffset(D13, SP, reg_offset);
- reg_offset -= 8;
- StoreDToOffset(D12, SP, reg_offset);
- reg_offset -= 8;
- StoreDToOffset(D11, SP, reg_offset);
- reg_offset -= 8;
- StoreDToOffset(D10, SP, reg_offset);
- reg_offset -= 8;
- StoreDToOffset(D9, SP, reg_offset);
- reg_offset -= 8;
- StoreDToOffset(D8, SP, reg_offset);
+ static constexpr XRegister x_spills[] = {
+ LR, X29, X28, X27, X26, X25, X24, X23, X22, X21, X20 };
+ for (size_t i = 0; i < arraysize(x_spills); i++) {
+ XRegister reg = x_spills[i];
+ reg_offset -= 8;
+ StoreToOffset(reg, SP, reg_offset);
+ cfi_.RelOffset(DWARFReg(reg), reg_offset);
+ }
+ for (int d = 15; d >= 8; d--) {
+ DRegister reg = static_cast<DRegister>(d);
+ reg_offset -= 8;
+ StoreDToOffset(reg, SP, reg_offset);
+ cfi_.RelOffset(DWARFReg(reg), reg_offset);
+ }
// Move TR(Caller saved) to ETR(Callee saved). The original (ETR)X21 has been saved on stack.
// This way we make sure that TR is not trashed by native code.
@@ -734,6 +719,7 @@ void Arm64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
void Arm64Assembler::RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs) {
CHECK_ALIGNED(frame_size, kStackAlignment);
+ cfi_.RememberState();
// For now we only check that the size of the frame is greater than the spill size.
CHECK_EQ(callee_save_regs.size(), kJniRefSpillRegsSize);
@@ -748,51 +734,30 @@ void Arm64Assembler::RemoveFrame(size_t frame_size, const std::vector<ManagedReg
// TUNING: Use ldp.
// Note: Must match Arm64JniCallingConvention::CoreSpillMask().
size_t reg_offset = frame_size;
- reg_offset -= 8;
- LoadFromOffset(LR, SP, reg_offset);
- reg_offset -= 8;
- LoadFromOffset(X29, SP, reg_offset);
- reg_offset -= 8;
- LoadFromOffset(X28, SP, reg_offset);
- reg_offset -= 8;
- LoadFromOffset(X27, SP, reg_offset);
- reg_offset -= 8;
- LoadFromOffset(X26, SP, reg_offset);
- reg_offset -= 8;
- LoadFromOffset(X25, SP, reg_offset);
- reg_offset -= 8;
- LoadFromOffset(X24, SP, reg_offset);
- reg_offset -= 8;
- LoadFromOffset(X23, SP, reg_offset);
- reg_offset -= 8;
- LoadFromOffset(X22, SP, reg_offset);
- reg_offset -= 8;
- LoadFromOffset(X21, SP, reg_offset);
- reg_offset -= 8;
- LoadFromOffset(X20, SP, reg_offset);
-
- reg_offset -= 8;
- LoadDFromOffset(D15, SP, reg_offset);
- reg_offset -= 8;
- LoadDFromOffset(D14, SP, reg_offset);
- reg_offset -= 8;
- LoadDFromOffset(D13, SP, reg_offset);
- reg_offset -= 8;
- LoadDFromOffset(D12, SP, reg_offset);
- reg_offset -= 8;
- LoadDFromOffset(D11, SP, reg_offset);
- reg_offset -= 8;
- LoadDFromOffset(D10, SP, reg_offset);
- reg_offset -= 8;
- LoadDFromOffset(D9, SP, reg_offset);
- reg_offset -= 8;
- LoadDFromOffset(D8, SP, reg_offset);
+ static constexpr XRegister x_spills[] = {
+ LR, X29, X28, X27, X26, X25, X24, X23, X22, X21, X20 };
+ for (size_t i = 0; i < arraysize(x_spills); i++) {
+ XRegister reg = x_spills[i];
+ reg_offset -= 8;
+ LoadFromOffset(reg, SP, reg_offset);
+ cfi_.Restore(DWARFReg(reg));
+ }
+ for (int d = 15; d >= 8; d--) {
+ DRegister reg = static_cast<DRegister>(d);
+ reg_offset -= 8;
+ LoadDFromOffset(reg, SP, reg_offset);
+ cfi_.Restore(DWARFReg(reg));
+ }
// Decrease frame size to start of callee saved regs.
DecreaseFrameSize(frame_size);
// Pop callee saved and return to LR.
___ Ret();
+
+ // The CFI should be restored for any code that follows the exit block.
+ cfi_.RestoreState();
+ cfi_.DefCFAOffset(frame_size);
}
} // namespace arm64
diff --git a/compiler/utils/arm64/assembler_arm64.h b/compiler/utils/arm64/assembler_arm64.h
index a69be2599e..8973b9ca8a 100644
--- a/compiler/utils/arm64/assembler_arm64.h
+++ b/compiler/utils/arm64/assembler_arm64.h
@@ -30,9 +30,11 @@
// TODO: make vixl clean wrt -Wshadow.
#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunknown-pragmas"
#pragma GCC diagnostic ignored "-Wshadow"
-#include "a64/macro-assembler-a64.h"
-#include "a64/disasm-a64.h"
+#pragma GCC diagnostic ignored "-Wmissing-noreturn"
+#include "vixl/a64/macro-assembler-a64.h"
+#include "vixl/a64/disasm-a64.h"
#pragma GCC diagnostic pop
namespace art {
diff --git a/compiler/utils/arm64/managed_register_arm64.h b/compiler/utils/arm64/managed_register_arm64.h
index e1d6f3179d..62c1d4dbee 100644
--- a/compiler/utils/arm64/managed_register_arm64.h
+++ b/compiler/utils/arm64/managed_register_arm64.h
@@ -19,6 +19,7 @@
#include "base/logging.h"
#include "constants_arm64.h"
+#include "dwarf/register.h"
#include "utils/managed_register.h"
namespace art {
diff --git a/compiler/utils/array_ref.h b/compiler/utils/array_ref.h
index b1b0ee5e53..ff5a77c97a 100644
--- a/compiler/utils/array_ref.h
+++ b/compiler/utils/array_ref.h
@@ -89,6 +89,8 @@ class ArrayRef {
: array_(v.data()), size_(v.size()) {
}
+ ArrayRef(const ArrayRef&) = default;
+
// Assignment operators.
ArrayRef& operator=(const ArrayRef& other) {
diff --git a/compiler/utils/assembler.cc b/compiler/utils/assembler.cc
index 5340dd3a25..36342c61c5 100644
--- a/compiler/utils/assembler.cc
+++ b/compiler/utils/assembler.cc
@@ -105,6 +105,9 @@ void AssemblerBuffer::ExtendCapacity() {
CHECK_EQ(Size(), old_size);
}
+void DebugFrameOpCodeWriterForAssembler::ImplicitlyAdvancePC() {
+ this->AdvancePC(assembler_->CodeSize());
+}
Assembler* Assembler::Create(InstructionSet instruction_set) {
switch (instruction_set) {
diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h
index 923ecdbd9d..ebafd3dd1e 100644
--- a/compiler/utils/assembler.h
+++ b/compiler/utils/assembler.h
@@ -29,6 +29,7 @@
#include "offsets.h"
#include "x86/constants_x86.h"
#include "x86_64/constants_x86_64.h"
+#include "dwarf/debug_frame_opcode_writer.h"
namespace art {
@@ -354,6 +355,23 @@ class AssemblerBuffer {
friend class AssemblerFixup;
};
+// The purpose of this class is to ensure that we do not have to explicitly
+// call the AdvancePC method (which is good for convenience and correctness).
+class DebugFrameOpCodeWriterForAssembler FINAL
+ : public dwarf::DebugFrameOpCodeWriter<> {
+ public:
+ // This method is called the by the opcode writers.
+ virtual void ImplicitlyAdvancePC() FINAL;
+
+ explicit DebugFrameOpCodeWriterForAssembler(Assembler* buffer)
+ : dwarf::DebugFrameOpCodeWriter<>(),
+ assembler_(buffer) {
+ }
+
+ private:
+ Assembler* assembler_;
+};
+
class Assembler {
public:
static Assembler* Create(InstructionSet instruction_set);
@@ -504,18 +522,20 @@ class Assembler {
// and branch to a ExceptionSlowPath if it is.
virtual void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) = 0;
- virtual void InitializeFrameDescriptionEntry() {}
- virtual void FinalizeFrameDescriptionEntry() {}
- // Give a vector containing FDE data, or null if not used. Note: the assembler must take care
- // of handling the lifecycle.
- virtual std::vector<uint8_t>* GetFrameDescriptionEntry() { return nullptr; }
-
virtual ~Assembler() {}
+ /**
+ * @brief Buffer of DWARF's Call Frame Information opcodes.
+ * @details It is used by debuggers and other tools to unwind the call stack.
+ */
+ DebugFrameOpCodeWriterForAssembler& cfi() { return cfi_; }
+
protected:
- Assembler() : buffer_() {}
+ Assembler() : buffer_(), cfi_(this) {}
AssemblerBuffer buffer_;
+
+ DebugFrameOpCodeWriterForAssembler cfi_;
};
} // namespace art
diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h
index 6f8b3012a4..3fe1a31d70 100644
--- a/compiler/utils/assembler_test.h
+++ b/compiler/utils/assembler_test.h
@@ -44,7 +44,9 @@ static std::string tmpnam_;
enum class RegisterView { // private
kUsePrimaryName,
- kUseSecondaryName
+ kUseSecondaryName,
+ kUseTertiaryName,
+ kUseQuaternaryName,
};
template<typename Ass, typename Reg, typename FPReg, typename Imm>
@@ -97,6 +99,15 @@ class AssemblerTest : public testing::Test {
fmt);
}
+ std::string Repeatrb(void (Ass::*f)(Reg, Reg), std::string fmt) {
+ return RepeatTemplatedRegisters<Reg, Reg>(f,
+ GetRegisters(),
+ GetRegisters(),
+ &AssemblerTest::GetRegName<RegisterView::kUseSecondaryName>,
+ &AssemblerTest::GetRegName<RegisterView::kUseQuaternaryName>,
+ fmt);
+ }
+
std::string RepeatRr(void (Ass::*f)(Reg, Reg), std::string fmt) {
return RepeatTemplatedRegisters<Reg, Reg>(f,
GetRegisters(),
@@ -123,6 +134,16 @@ class AssemblerTest : public testing::Test {
fmt);
}
+ std::string RepeatFFI(void (Ass::*f)(FPReg, FPReg, const Imm&), size_t imm_bytes, std::string fmt) {
+ return RepeatTemplatedRegistersImm<FPReg, FPReg>(f,
+ GetFPRegisters(),
+ GetFPRegisters(),
+ &AssemblerTest::GetFPRegName,
+ &AssemblerTest::GetFPRegName,
+ imm_bytes,
+ fmt);
+ }
+
std::string RepeatFR(void (Ass::*f)(FPReg, Reg), std::string fmt) {
return RepeatTemplatedRegisters<FPReg, Reg>(f,
GetFPRegisters(),
@@ -230,6 +251,18 @@ class AssemblerTest : public testing::Test {
UNREACHABLE();
}
+ // Tertiary register names are the tertiary view on registers, e.g., 16b on 64b systems.
+ virtual std::string GetTertiaryRegisterName(const Reg& reg ATTRIBUTE_UNUSED) {
+ UNIMPLEMENTED(FATAL) << "Architecture does not support tertiary registers";
+ UNREACHABLE();
+ }
+
+ // Quaternary register names are the quaternary view on registers, e.g., 8b on 64b systems.
+ virtual std::string GetQuaternaryRegisterName(const Reg& reg ATTRIBUTE_UNUSED) {
+ UNIMPLEMENTED(FATAL) << "Architecture does not support quaternary registers";
+ UNREACHABLE();
+ }
+
std::string GetRegisterName(const Reg& reg) {
return GetRegName<RegisterView::kUsePrimaryName>(reg);
}
@@ -448,6 +481,57 @@ class AssemblerTest : public testing::Test {
return str;
}
+ template <typename Reg1, typename Reg2>
+ std::string RepeatTemplatedRegistersImm(void (Ass::*f)(Reg1, Reg2, const Imm&),
+ const std::vector<Reg1*> reg1_registers,
+ const std::vector<Reg2*> reg2_registers,
+ std::string (AssemblerTest::*GetName1)(const Reg1&),
+ std::string (AssemblerTest::*GetName2)(const Reg2&),
+ size_t imm_bytes,
+ std::string fmt) {
+ std::vector<int64_t> imms = CreateImmediateValues(imm_bytes);
+ WarnOnCombinations(reg1_registers.size() * reg2_registers.size() * imms.size());
+
+ std::string str;
+ for (auto reg1 : reg1_registers) {
+ for (auto reg2 : reg2_registers) {
+ for (int64_t imm : imms) {
+ Imm new_imm = CreateImmediate(imm);
+ (assembler_.get()->*f)(*reg1, *reg2, new_imm);
+ std::string base = fmt;
+
+ std::string reg1_string = (this->*GetName1)(*reg1);
+ size_t reg1_index;
+ while ((reg1_index = base.find(REG1_TOKEN)) != std::string::npos) {
+ base.replace(reg1_index, ConstexprStrLen(REG1_TOKEN), reg1_string);
+ }
+
+ std::string reg2_string = (this->*GetName2)(*reg2);
+ size_t reg2_index;
+ while ((reg2_index = base.find(REG2_TOKEN)) != std::string::npos) {
+ base.replace(reg2_index, ConstexprStrLen(REG2_TOKEN), reg2_string);
+ }
+
+ size_t imm_index = base.find(IMM_TOKEN);
+ if (imm_index != std::string::npos) {
+ std::ostringstream sreg;
+ sreg << imm;
+ std::string imm_string = sreg.str();
+ base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string);
+ }
+
+ if (str.size() > 0) {
+ str += "\n";
+ }
+ str += base;
+ }
+ }
+ }
+ // Add a newline at the end.
+ str += "\n";
+ return str;
+ }
+
template <RegisterView kRegView>
std::string GetRegName(const Reg& reg) {
std::ostringstream sreg;
@@ -459,6 +543,14 @@ class AssemblerTest : public testing::Test {
case RegisterView::kUseSecondaryName:
sreg << GetSecondaryRegisterName(reg);
break;
+
+ case RegisterView::kUseTertiaryName:
+ sreg << GetTertiaryRegisterName(reg);
+ break;
+
+ case RegisterView::kUseQuaternaryName:
+ sreg << GetQuaternaryRegisterName(reg);
+ break;
}
return sreg.str();
}
diff --git a/compiler/utils/dex_cache_arrays_layout-inl.h b/compiler/utils/dex_cache_arrays_layout-inl.h
new file mode 100644
index 0000000000..7d02ce35d8
--- /dev/null
+++ b/compiler/utils/dex_cache_arrays_layout-inl.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_DEX_CACHE_ARRAYS_LAYOUT_INL_H_
+#define ART_COMPILER_UTILS_DEX_CACHE_ARRAYS_LAYOUT_INL_H_
+
+#include "dex_cache_arrays_layout.h"
+
+#include "base/logging.h"
+#include "globals.h"
+#include "mirror/array-inl.h"
+#include "primitive.h"
+#include "utils.h"
+
+namespace mirror {
+class ArtField;
+class ArtMethod;
+class Class;
+class String;
+} // namespace mirror
+
+namespace art {
+
+inline DexCacheArraysLayout::DexCacheArraysLayout(const DexFile* dex_file)
+ : /* types_offset_ is always 0u */
+ methods_offset_(types_offset_ + ArraySize<mirror::Class>(dex_file->NumTypeIds())),
+ strings_offset_(methods_offset_ + ArraySize<mirror::ArtMethod>(dex_file->NumMethodIds())),
+ fields_offset_(strings_offset_ + ArraySize<mirror::String>(dex_file->NumStringIds())),
+ size_(fields_offset_ + ArraySize<mirror::ArtField>(dex_file->NumFieldIds())) {
+}
+
+inline size_t DexCacheArraysLayout::TypeOffset(uint32_t type_idx) const {
+ return types_offset_ + ElementOffset<mirror::Class>(type_idx);
+}
+
+inline size_t DexCacheArraysLayout::MethodOffset(uint32_t method_idx) const {
+ return methods_offset_ + ElementOffset<mirror::ArtMethod>(method_idx);
+}
+
+inline size_t DexCacheArraysLayout::StringOffset(uint32_t string_idx) const {
+ return strings_offset_ + ElementOffset<mirror::String>(string_idx);
+}
+
+inline size_t DexCacheArraysLayout::FieldOffset(uint32_t field_idx) const {
+ return fields_offset_ + ElementOffset<mirror::ArtField>(field_idx);
+}
+
+template <typename MirrorType>
+inline size_t DexCacheArraysLayout::ElementOffset(uint32_t idx) {
+ return mirror::Array::DataOffset(sizeof(mirror::HeapReference<MirrorType>)).Uint32Value() +
+ sizeof(mirror::HeapReference<MirrorType>) * idx;
+}
+
+template <typename MirrorType>
+inline size_t DexCacheArraysLayout::ArraySize(uint32_t num_elements) {
+ size_t array_size = mirror::ComputeArraySize(
+ num_elements, ComponentSizeShiftWidth<sizeof(mirror::HeapReference<MirrorType>)>());
+ DCHECK_NE(array_size, 0u); // No overflow expected for dex cache arrays.
+ return RoundUp(array_size, kObjectAlignment);
+}
+
+} // namespace art
+
+#endif // ART_COMPILER_UTILS_DEX_CACHE_ARRAYS_LAYOUT_INL_H_
diff --git a/compiler/utils/dex_cache_arrays_layout.h b/compiler/utils/dex_cache_arrays_layout.h
new file mode 100644
index 0000000000..b461256f63
--- /dev/null
+++ b/compiler/utils/dex_cache_arrays_layout.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_DEX_CACHE_ARRAYS_LAYOUT_H_
+#define ART_COMPILER_UTILS_DEX_CACHE_ARRAYS_LAYOUT_H_
+
+namespace art {
+
+/**
+ * @class DexCacheArraysLayout
+ * @details This class provides the layout information for the type, method, field and
+ * string arrays for a DexCache with a fixed arrays' layout (such as in the boot image),
+ */
+class DexCacheArraysLayout {
+ public:
+ // Construct an invalid layout.
+ DexCacheArraysLayout()
+ : /* types_offset_ is always 0u */
+ methods_offset_(0u),
+ strings_offset_(0u),
+ fields_offset_(0u),
+ size_(0u) {
+ }
+
+ // Construct a layout for a particular dex file.
+ explicit DexCacheArraysLayout(const DexFile* dex_file);
+
+ bool Valid() const {
+ return Size() != 0u;
+ }
+
+ size_t Size() const {
+ return size_;
+ }
+
+ size_t TypesOffset() const {
+ return types_offset_;
+ }
+
+ size_t TypeOffset(uint32_t type_idx) const;
+
+ size_t MethodsOffset() const {
+ return methods_offset_;
+ }
+
+ size_t MethodOffset(uint32_t method_idx) const;
+
+ size_t StringsOffset() const {
+ return strings_offset_;
+ }
+
+ size_t StringOffset(uint32_t string_idx) const;
+
+ size_t FieldsOffset() const {
+ return fields_offset_;
+ }
+
+ size_t FieldOffset(uint32_t field_idx) const;
+
+ private:
+ static constexpr size_t types_offset_ = 0u;
+ const size_t methods_offset_;
+ const size_t strings_offset_;
+ const size_t fields_offset_;
+ const size_t size_;
+
+ template <typename MirrorType>
+ static size_t ElementOffset(uint32_t idx);
+
+ template <typename MirrorType>
+ static size_t ArraySize(uint32_t num_elements);
+};
+
+} // namespace art
+
+#endif // ART_COMPILER_UTILS_DEX_CACHE_ARRAYS_LAYOUT_H_
diff --git a/compiler/utils/dwarf_cfi.cc b/compiler/utils/dwarf_cfi.cc
deleted file mode 100644
index a7e09c6517..0000000000
--- a/compiler/utils/dwarf_cfi.cc
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "leb128.h"
-#include "utils.h"
-
-#include "dwarf_cfi.h"
-
-namespace art {
-
-void DW_CFA_advance_loc(std::vector<uint8_t>* buf, uint32_t increment) {
- if (increment < 64) {
- // Encoding in opcode.
- buf->push_back(0x1 << 6 | increment);
- } else if (increment < 256) {
- // Single byte delta.
- buf->push_back(0x02);
- buf->push_back(increment);
- } else if (increment < 256 * 256) {
- // Two byte delta.
- buf->push_back(0x03);
- buf->push_back(increment & 0xff);
- buf->push_back((increment >> 8) & 0xff);
- } else {
- // Four byte delta.
- buf->push_back(0x04);
- Push32(buf, increment);
- }
-}
-
-void DW_CFA_offset_extended_sf(std::vector<uint8_t>* buf, int reg, int32_t offset) {
- buf->push_back(0x11);
- EncodeUnsignedLeb128(reg, buf);
- EncodeSignedLeb128(offset, buf);
-}
-
-void DW_CFA_offset(std::vector<uint8_t>* buf, int reg, uint32_t offset) {
- buf->push_back((0x2 << 6) | reg);
- EncodeUnsignedLeb128(offset, buf);
-}
-
-void DW_CFA_def_cfa_offset(std::vector<uint8_t>* buf, int32_t offset) {
- buf->push_back(0x0e);
- EncodeUnsignedLeb128(offset, buf);
-}
-
-void DW_CFA_remember_state(std::vector<uint8_t>* buf) {
- buf->push_back(0x0a);
-}
-
-void DW_CFA_restore_state(std::vector<uint8_t>* buf) {
- buf->push_back(0x0b);
-}
-
-void WriteFDEHeader(std::vector<uint8_t>* buf, bool is_64bit) {
- // 'length' (filled in by other functions).
- if (is_64bit) {
- Push32(buf, 0xffffffff); // Indicates 64bit
- Push32(buf, 0);
- Push32(buf, 0);
- } else {
- Push32(buf, 0);
- }
-
- // 'CIE_pointer' (filled in by linker).
- if (is_64bit) {
- Push32(buf, 0);
- Push32(buf, 0);
- } else {
- Push32(buf, 0);
- }
-
- // 'initial_location' (filled in by linker).
- if (is_64bit) {
- Push32(buf, 0);
- Push32(buf, 0);
- } else {
- Push32(buf, 0);
- }
-
- // 'address_range' (filled in by other functions).
- if (is_64bit) {
- Push32(buf, 0);
- Push32(buf, 0);
- } else {
- Push32(buf, 0);
- }
-
- // Augmentation length: 0
- buf->push_back(0);
-}
-
-void WriteFDEAddressRange(std::vector<uint8_t>* buf, uint64_t data, bool is_64bit) {
- const size_t kOffsetOfAddressRange = is_64bit? 28 : 12;
- CHECK(buf->size() >= kOffsetOfAddressRange + (is_64bit? 8 : 4));
-
- uint8_t *p = buf->data() + kOffsetOfAddressRange;
- if (is_64bit) {
- p[0] = data;
- p[1] = data >> 8;
- p[2] = data >> 16;
- p[3] = data >> 24;
- p[4] = data >> 32;
- p[5] = data >> 40;
- p[6] = data >> 48;
- p[7] = data >> 56;
- } else {
- p[0] = data;
- p[1] = data >> 8;
- p[2] = data >> 16;
- p[3] = data >> 24;
- }
-}
-
-void WriteCFILength(std::vector<uint8_t>* buf, bool is_64bit) {
- uint64_t length = is_64bit ? buf->size() - 12 : buf->size() - 4;
- DCHECK_EQ((length & 0x3), 0U);
-
- uint8_t *p = is_64bit? buf->data() + 4 : buf->data();
- if (is_64bit) {
- p[0] = length;
- p[1] = length >> 8;
- p[2] = length >> 16;
- p[3] = length >> 24;
- p[4] = length >> 32;
- p[5] = length >> 40;
- p[6] = length >> 48;
- p[7] = length >> 56;
- } else {
- p[0] = length;
- p[1] = length >> 8;
- p[2] = length >> 16;
- p[3] = length >> 24;
- }
-}
-
-void PadCFI(std::vector<uint8_t>* buf) {
- while (buf->size() & 0x3) {
- buf->push_back(0);
- }
-}
-
-} // namespace art
diff --git a/compiler/utils/dwarf_cfi.h b/compiler/utils/dwarf_cfi.h
deleted file mode 100644
index 0c8b1516dd..0000000000
--- a/compiler/utils/dwarf_cfi.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_UTILS_DWARF_CFI_H_
-#define ART_COMPILER_UTILS_DWARF_CFI_H_
-
-#include <vector>
-
-namespace art {
-
-/**
- * @brief Enter a 'DW_CFA_advance_loc' into an FDE buffer
- * @param buf FDE buffer.
- * @param increment Amount by which to increase the current location.
- */
-void DW_CFA_advance_loc(std::vector<uint8_t>* buf, uint32_t increment);
-
-/**
- * @brief Enter a 'DW_CFA_offset_extended_sf' into an FDE buffer
- * @param buf FDE buffer.
- * @param reg Register number.
- * @param offset Offset of register address from CFA.
- */
-void DW_CFA_offset_extended_sf(std::vector<uint8_t>* buf, int reg, int32_t offset);
-
-/**
- * @brief Enter a 'DW_CFA_offset' into an FDE buffer
- * @param buf FDE buffer.
- * @param reg Register number.
- * @param offset Offset of register address from CFA.
- */
-void DW_CFA_offset(std::vector<uint8_t>* buf, int reg, uint32_t offset);
-
-/**
- * @brief Enter a 'DW_CFA_def_cfa_offset' into an FDE buffer
- * @param buf FDE buffer.
- * @param offset New offset of CFA.
- */
-void DW_CFA_def_cfa_offset(std::vector<uint8_t>* buf, int32_t offset);
-
-/**
- * @brief Enter a 'DW_CFA_remember_state' into an FDE buffer
- * @param buf FDE buffer.
- */
-void DW_CFA_remember_state(std::vector<uint8_t>* buf);
-
-/**
- * @brief Enter a 'DW_CFA_restore_state' into an FDE buffer
- * @param buf FDE buffer.
- */
-void DW_CFA_restore_state(std::vector<uint8_t>* buf);
-
-/**
- * @brief Write FDE header into an FDE buffer
- * @param buf FDE buffer.
- * @param is_64bit If FDE is for 64bit application.
- */
-void WriteFDEHeader(std::vector<uint8_t>* buf, bool is_64bit);
-
-/**
- * @brief Set 'address_range' field of an FDE buffer
- * @param buf FDE buffer.
- * @param data Data value.
- * @param is_64bit If FDE is for 64bit application.
- */
-void WriteFDEAddressRange(std::vector<uint8_t>* buf, uint64_t data, bool is_64bit);
-
-/**
- * @brief Set 'length' field of an FDE buffer
- * @param buf FDE buffer.
- * @param is_64bit If FDE is for 64bit application.
- */
-void WriteCFILength(std::vector<uint8_t>* buf, bool is_64bit);
-
-/**
- * @brief Pad an FDE buffer with 0 until its size is a multiple of 4
- * @param buf FDE buffer.
- */
-void PadCFI(std::vector<uint8_t>* buf);
-} // namespace art
-
-#endif // ART_COMPILER_UTILS_DWARF_CFI_H_
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index b5437b0eda..709a911f6a 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -536,6 +536,10 @@ void MipsAssembler::StoreDToOffset(DRegister reg, Register base, int32_t offset)
Sdc1(reg, base, offset);
}
+static dwarf::Reg DWARFReg(Register reg) {
+ return dwarf::Reg::MipsCore(static_cast<int>(reg));
+}
+
constexpr size_t kFramePointerSize = 4;
void MipsAssembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
@@ -549,10 +553,12 @@ void MipsAssembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
// Push callee saves and return address
int stack_offset = frame_size - kFramePointerSize;
StoreToOffset(kStoreWord, RA, SP, stack_offset);
+ cfi_.RelOffset(DWARFReg(RA), stack_offset);
for (int i = callee_save_regs.size() - 1; i >= 0; --i) {
stack_offset -= kFramePointerSize;
Register reg = callee_save_regs.at(i).AsMips().AsCoreRegister();
StoreToOffset(kStoreWord, reg, SP, stack_offset);
+ cfi_.RelOffset(DWARFReg(reg), stack_offset);
}
// Write out Method*.
@@ -568,31 +574,40 @@ void MipsAssembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
void MipsAssembler::RemoveFrame(size_t frame_size,
const std::vector<ManagedRegister>& callee_save_regs) {
CHECK_ALIGNED(frame_size, kStackAlignment);
+ cfi_.RememberState();
// Pop callee saves and return address
int stack_offset = frame_size - (callee_save_regs.size() * kFramePointerSize) - kFramePointerSize;
for (size_t i = 0; i < callee_save_regs.size(); ++i) {
Register reg = callee_save_regs.at(i).AsMips().AsCoreRegister();
LoadFromOffset(kLoadWord, reg, SP, stack_offset);
+ cfi_.Restore(DWARFReg(reg));
stack_offset += kFramePointerSize;
}
LoadFromOffset(kLoadWord, RA, SP, stack_offset);
+ cfi_.Restore(DWARFReg(RA));
// Decrease frame to required size.
DecreaseFrameSize(frame_size);
// Then jump to the return address.
Jr(RA);
+
+ // The CFI should be restored for any code that follows the exit block.
+ cfi_.RestoreState();
+ cfi_.DefCFAOffset(frame_size);
}
void MipsAssembler::IncreaseFrameSize(size_t adjust) {
CHECK_ALIGNED(adjust, kStackAlignment);
AddConstant(SP, SP, -adjust);
+ cfi_.AdjustCFAOffset(adjust);
}
void MipsAssembler::DecreaseFrameSize(size_t adjust) {
CHECK_ALIGNED(adjust, kStackAlignment);
AddConstant(SP, SP, adjust);
+ cfi_.AdjustCFAOffset(-adjust);
}
void MipsAssembler::Store(FrameOffset dest, ManagedRegister msrc, size_t size) {
diff --git a/compiler/utils/mips/managed_register_mips.h b/compiler/utils/mips/managed_register_mips.h
index dd55cc4e6a..40d39e3386 100644
--- a/compiler/utils/mips/managed_register_mips.h
+++ b/compiler/utils/mips/managed_register_mips.h
@@ -18,6 +18,7 @@
#define ART_COMPILER_UTILS_MIPS_MANAGED_REGISTER_MIPS_H_
#include "constants_mips.h"
+#include "dwarf/register.h"
#include "utils/managed_register.h"
namespace art {
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index 233ae7db3c..282ab96ce4 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -568,6 +568,10 @@ void Mips64Assembler::StoreFpuToOffset(StoreOperandType type, FpuRegister reg, G
}
}
+static dwarf::Reg DWARFReg(GpuRegister reg) {
+ return dwarf::Reg::Mips64Core(static_cast<int>(reg));
+}
+
constexpr size_t kFramePointerSize = 8;
void Mips64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
@@ -581,10 +585,12 @@ void Mips64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
// Push callee saves and return address
int stack_offset = frame_size - kFramePointerSize;
StoreToOffset(kStoreDoubleword, RA, SP, stack_offset);
+ cfi_.RelOffset(DWARFReg(RA), stack_offset);
for (int i = callee_save_regs.size() - 1; i >= 0; --i) {
stack_offset -= kFramePointerSize;
GpuRegister reg = callee_save_regs.at(i).AsMips64().AsGpuRegister();
StoreToOffset(kStoreDoubleword, reg, SP, stack_offset);
+ cfi_.RelOffset(DWARFReg(reg), stack_offset);
}
// Write out Method*.
@@ -612,31 +618,40 @@ void Mips64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
void Mips64Assembler::RemoveFrame(size_t frame_size,
const std::vector<ManagedRegister>& callee_save_regs) {
CHECK_ALIGNED(frame_size, kStackAlignment);
+ cfi_.RememberState();
// Pop callee saves and return address
int stack_offset = frame_size - (callee_save_regs.size() * kFramePointerSize) - kFramePointerSize;
for (size_t i = 0; i < callee_save_regs.size(); ++i) {
GpuRegister reg = callee_save_regs.at(i).AsMips64().AsGpuRegister();
LoadFromOffset(kLoadDoubleword, reg, SP, stack_offset);
+ cfi_.Restore(DWARFReg(reg));
stack_offset += kFramePointerSize;
}
LoadFromOffset(kLoadDoubleword, RA, SP, stack_offset);
+ cfi_.Restore(DWARFReg(RA));
// Decrease frame to required size.
DecreaseFrameSize(frame_size);
// Then jump to the return address.
Jr(RA);
+
+ // The CFI should be restored for any code that follows the exit block.
+ cfi_.RestoreState();
+ cfi_.DefCFAOffset(frame_size);
}
void Mips64Assembler::IncreaseFrameSize(size_t adjust) {
CHECK_ALIGNED(adjust, kStackAlignment);
AddConstant64(SP, SP, -adjust);
+ cfi_.AdjustCFAOffset(adjust);
}
void Mips64Assembler::DecreaseFrameSize(size_t adjust) {
CHECK_ALIGNED(adjust, kStackAlignment);
AddConstant64(SP, SP, adjust);
+ cfi_.AdjustCFAOffset(-adjust);
}
void Mips64Assembler::Store(FrameOffset dest, ManagedRegister msrc, size_t size) {
@@ -1025,7 +1040,7 @@ void Mips64ExceptionSlowPath::Emit(Assembler* sasm) {
__ Move(A0, scratch_.AsGpuRegister());
// Set up call to Thread::Current()->pDeliverException
__ LoadFromOffset(kLoadDoubleword, T9, S1,
- QUICK_ENTRYPOINT_OFFSET(4, pDeliverException).Int32Value());
+ QUICK_ENTRYPOINT_OFFSET(8, pDeliverException).Int32Value());
__ Jr(T9);
// Call never returns
__ Break();
diff --git a/compiler/utils/mips64/managed_register_mips64.h b/compiler/utils/mips64/managed_register_mips64.h
index 924a928389..4c4705bbfb 100644
--- a/compiler/utils/mips64/managed_register_mips64.h
+++ b/compiler/utils/mips64/managed_register_mips64.h
@@ -18,6 +18,7 @@
#define ART_COMPILER_UTILS_MIPS64_MANAGED_REGISTER_MIPS64_H_
#include "constants_mips64.h"
+#include "dwarf/register.h"
#include "utils/managed_register.h"
namespace art {
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 5773459ff5..f8bba07f84 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -20,7 +20,6 @@
#include "entrypoints/quick/quick_entrypoints.h"
#include "memory_region.h"
#include "thread.h"
-#include "utils/dwarf_cfi.h"
namespace art {
namespace x86 {
@@ -695,6 +694,28 @@ void X86Assembler::ucomisd(XmmRegister a, XmmRegister b) {
}
+void X86Assembler::roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x3A);
+ EmitUint8(0x0B);
+ EmitXmmRegisterOperand(dst, src);
+ EmitUint8(imm.value());
+}
+
+
+void X86Assembler::roundss(XmmRegister dst, XmmRegister src, const Immediate& imm) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x3A);
+ EmitUint8(0x0A);
+ EmitXmmRegisterOperand(dst, src);
+ EmitUint8(imm.value());
+}
+
+
void X86Assembler::sqrtsd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF2);
@@ -1264,32 +1285,62 @@ void X86Assembler::decl(const Address& address) {
void X86Assembler::shll(Register reg, const Immediate& imm) {
- EmitGenericShift(4, reg, imm);
+ EmitGenericShift(4, Operand(reg), imm);
}
void X86Assembler::shll(Register operand, Register shifter) {
- EmitGenericShift(4, operand, shifter);
+ EmitGenericShift(4, Operand(operand), shifter);
+}
+
+
+void X86Assembler::shll(const Address& address, const Immediate& imm) {
+ EmitGenericShift(4, address, imm);
+}
+
+
+void X86Assembler::shll(const Address& address, Register shifter) {
+ EmitGenericShift(4, address, shifter);
}
void X86Assembler::shrl(Register reg, const Immediate& imm) {
- EmitGenericShift(5, reg, imm);
+ EmitGenericShift(5, Operand(reg), imm);
}
void X86Assembler::shrl(Register operand, Register shifter) {
- EmitGenericShift(5, operand, shifter);
+ EmitGenericShift(5, Operand(operand), shifter);
+}
+
+
+void X86Assembler::shrl(const Address& address, const Immediate& imm) {
+ EmitGenericShift(5, address, imm);
+}
+
+
+void X86Assembler::shrl(const Address& address, Register shifter) {
+ EmitGenericShift(5, address, shifter);
}
void X86Assembler::sarl(Register reg, const Immediate& imm) {
- EmitGenericShift(7, reg, imm);
+ EmitGenericShift(7, Operand(reg), imm);
}
void X86Assembler::sarl(Register operand, Register shifter) {
- EmitGenericShift(7, operand, shifter);
+ EmitGenericShift(7, Operand(operand), shifter);
+}
+
+
+void X86Assembler::sarl(const Address& address, const Immediate& imm) {
+ EmitGenericShift(7, address, imm);
+}
+
+
+void X86Assembler::sarl(const Address& address, Register shifter) {
+ EmitGenericShift(7, address, shifter);
}
@@ -1302,6 +1353,15 @@ void X86Assembler::shld(Register dst, Register src, Register shifter) {
}
+void X86Assembler::shld(Register dst, Register src, const Immediate& imm) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x0F);
+ EmitUint8(0xA4);
+ EmitRegisterOperand(src, dst);
+ EmitUint8(imm.value() & 0xFF);
+}
+
+
void X86Assembler::shrd(Register dst, Register src, Register shifter) {
DCHECK_EQ(ECX, shifter);
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
@@ -1311,6 +1371,15 @@ void X86Assembler::shrd(Register dst, Register src, Register shifter) {
}
+void X86Assembler::shrd(Register dst, Register src, const Immediate& imm) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x0F);
+ EmitUint8(0xAC);
+ EmitRegisterOperand(src, dst);
+ EmitUint8(imm.value() & 0xFF);
+}
+
+
void X86Assembler::negl(Register reg) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF7);
@@ -1445,6 +1514,15 @@ void X86Assembler::cmpxchgl(const Address& address, Register reg) {
EmitOperand(reg, address);
}
+
+void X86Assembler::cmpxchg8b(const Address& address) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x0F);
+ EmitUint8(0xC7);
+ EmitOperand(1, address);
+}
+
+
void X86Assembler::mfence() {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x0F);
@@ -1585,38 +1663,32 @@ void X86Assembler::EmitLabelLink(Label* label) {
void X86Assembler::EmitGenericShift(int reg_or_opcode,
- Register reg,
+ const Operand& operand,
const Immediate& imm) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
CHECK(imm.is_int8());
if (imm.value() == 1) {
EmitUint8(0xD1);
- EmitOperand(reg_or_opcode, Operand(reg));
+ EmitOperand(reg_or_opcode, operand);
} else {
EmitUint8(0xC1);
- EmitOperand(reg_or_opcode, Operand(reg));
+ EmitOperand(reg_or_opcode, operand);
EmitUint8(imm.value() & 0xFF);
}
}
void X86Assembler::EmitGenericShift(int reg_or_opcode,
- Register operand,
+ const Operand& operand,
Register shifter) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
CHECK_EQ(shifter, ECX);
EmitUint8(0xD3);
- EmitOperand(reg_or_opcode, Operand(operand));
-}
-
-void X86Assembler::InitializeFrameDescriptionEntry() {
- WriteFDEHeader(&cfi_info_, false /* is_64bit */);
+ EmitOperand(reg_or_opcode, operand);
}
-void X86Assembler::FinalizeFrameDescriptionEntry() {
- WriteFDEAddressRange(&cfi_info_, buffer_.Size(), false /* is_64bit */);
- PadCFI(&cfi_info_);
- WriteCFILength(&cfi_info_, false /* is_64bit */);
+static dwarf::Reg DWARFReg(Register reg) {
+ return dwarf::Reg::X86Core(static_cast<int>(reg));
}
constexpr size_t kFramePointerSize = 4;
@@ -1624,54 +1696,33 @@ constexpr size_t kFramePointerSize = 4;
void X86Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
const std::vector<ManagedRegister>& spill_regs,
const ManagedRegisterEntrySpills& entry_spills) {
- cfi_cfa_offset_ = kFramePointerSize; // Only return address on stack
- cfi_pc_ = buffer_.Size(); // Nothing emitted yet
- DCHECK_EQ(cfi_pc_, 0U);
-
- uint32_t reg_offset = 1;
+ DCHECK_EQ(buffer_.Size(), 0U); // Nothing emitted yet.
+ cfi_.SetCurrentCFAOffset(4); // Return address on stack.
CHECK_ALIGNED(frame_size, kStackAlignment);
int gpr_count = 0;
for (int i = spill_regs.size() - 1; i >= 0; --i) {
- x86::X86ManagedRegister spill = spill_regs.at(i).AsX86();
- DCHECK(spill.IsCpuRegister());
- pushl(spill.AsCpuRegister());
+ Register spill = spill_regs.at(i).AsX86().AsCpuRegister();
+ pushl(spill);
gpr_count++;
-
- // DW_CFA_advance_loc
- DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_);
- cfi_pc_ = buffer_.Size();
- // DW_CFA_def_cfa_offset
- cfi_cfa_offset_ += kFramePointerSize;
- DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_);
- // DW_CFA_offset reg offset
- reg_offset++;
- DW_CFA_offset(&cfi_info_, spill_regs.at(i).AsX86().DWARFRegId(), reg_offset);
+ cfi_.AdjustCFAOffset(kFramePointerSize);
+ cfi_.RelOffset(DWARFReg(spill), 0);
}
- // return address then method on stack
+ // return address then method on stack.
int32_t adjust = frame_size - (gpr_count * kFramePointerSize) -
sizeof(StackReference<mirror::ArtMethod>) /*method*/ -
kFramePointerSize /*return address*/;
addl(ESP, Immediate(-adjust));
- // DW_CFA_advance_loc
- DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_);
- cfi_pc_ = buffer_.Size();
- // DW_CFA_def_cfa_offset
- cfi_cfa_offset_ += adjust;
- DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_);
-
+ cfi_.AdjustCFAOffset(adjust);
pushl(method_reg.AsX86().AsCpuRegister());
- // DW_CFA_advance_loc
- DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_);
- cfi_pc_ = buffer_.Size();
- // DW_CFA_def_cfa_offset
- cfi_cfa_offset_ += kFramePointerSize;
- DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_);
+ cfi_.AdjustCFAOffset(kFramePointerSize);
+ DCHECK_EQ(static_cast<size_t>(cfi_.GetCurrentCFAOffset()), frame_size);
for (size_t i = 0; i < entry_spills.size(); ++i) {
ManagedRegisterSpill spill = entry_spills.at(i);
if (spill.AsX86().IsCpuRegister()) {
- movl(Address(ESP, frame_size + spill.getSpillOffset()), spill.AsX86().AsCpuRegister());
+ int offset = frame_size + spill.getSpillOffset();
+ movl(Address(ESP, offset), spill.AsX86().AsCpuRegister());
} else {
DCHECK(spill.AsX86().IsXmmRegister());
if (spill.getSize() == 8) {
@@ -1687,30 +1738,33 @@ void X86Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
void X86Assembler::RemoveFrame(size_t frame_size,
const std::vector<ManagedRegister>& spill_regs) {
CHECK_ALIGNED(frame_size, kStackAlignment);
- addl(ESP, Immediate(frame_size - (spill_regs.size() * kFramePointerSize) -
- sizeof(StackReference<mirror::ArtMethod>)));
+ cfi_.RememberState();
+ int adjust = frame_size - (spill_regs.size() * kFramePointerSize) -
+ sizeof(StackReference<mirror::ArtMethod>);
+ addl(ESP, Immediate(adjust));
+ cfi_.AdjustCFAOffset(-adjust);
for (size_t i = 0; i < spill_regs.size(); ++i) {
- x86::X86ManagedRegister spill = spill_regs.at(i).AsX86();
- DCHECK(spill.IsCpuRegister());
- popl(spill.AsCpuRegister());
+ Register spill = spill_regs.at(i).AsX86().AsCpuRegister();
+ popl(spill);
+ cfi_.AdjustCFAOffset(-static_cast<int>(kFramePointerSize));
+ cfi_.Restore(DWARFReg(spill));
}
ret();
+ // The CFI should be restored for any code that follows the exit block.
+ cfi_.RestoreState();
+ cfi_.DefCFAOffset(frame_size);
}
void X86Assembler::IncreaseFrameSize(size_t adjust) {
CHECK_ALIGNED(adjust, kStackAlignment);
addl(ESP, Immediate(-adjust));
- // DW_CFA_advance_loc
- DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_);
- cfi_pc_ = buffer_.Size();
- // DW_CFA_def_cfa_offset
- cfi_cfa_offset_ += adjust;
- DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_);
+ cfi_.AdjustCFAOffset(adjust);
}
void X86Assembler::DecreaseFrameSize(size_t adjust) {
CHECK_ALIGNED(adjust, kStackAlignment);
addl(ESP, Immediate(adjust));
+ cfi_.AdjustCFAOffset(-adjust);
}
void X86Assembler::Store(FrameOffset offs, ManagedRegister msrc, size_t size) {
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 6ccf2e365d..37acb6ef16 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -205,7 +205,7 @@ class Address : public Operand {
class X86Assembler FINAL : public Assembler {
public:
- explicit X86Assembler() : cfi_cfa_offset_(0), cfi_pc_(0) {}
+ explicit X86Assembler() {}
virtual ~X86Assembler() {}
/*
@@ -312,6 +312,9 @@ class X86Assembler FINAL : public Assembler {
void ucomiss(XmmRegister a, XmmRegister b);
void ucomisd(XmmRegister a, XmmRegister b);
+ void roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm);
+ void roundss(XmmRegister dst, XmmRegister src, const Immediate& imm);
+
void sqrtsd(XmmRegister dst, XmmRegister src);
void sqrtss(XmmRegister dst, XmmRegister src);
@@ -426,12 +429,20 @@ class X86Assembler FINAL : public Assembler {
void shll(Register reg, const Immediate& imm);
void shll(Register operand, Register shifter);
+ void shll(const Address& address, const Immediate& imm);
+ void shll(const Address& address, Register shifter);
void shrl(Register reg, const Immediate& imm);
void shrl(Register operand, Register shifter);
+ void shrl(const Address& address, const Immediate& imm);
+ void shrl(const Address& address, Register shifter);
void sarl(Register reg, const Immediate& imm);
void sarl(Register operand, Register shifter);
+ void sarl(const Address& address, const Immediate& imm);
+ void sarl(const Address& address, Register shifter);
void shld(Register dst, Register src, Register shifter);
+ void shld(Register dst, Register src, const Immediate& imm);
void shrd(Register dst, Register src, Register shifter);
+ void shrd(Register dst, Register src, const Immediate& imm);
void negl(Register reg);
void notl(Register reg);
@@ -454,6 +465,7 @@ class X86Assembler FINAL : public Assembler {
X86Assembler* lock();
void cmpxchgl(const Address& address, Register reg);
+ void cmpxchg8b(const Address& address);
void mfence();
@@ -473,6 +485,10 @@ class X86Assembler FINAL : public Assembler {
lock()->cmpxchgl(address, reg);
}
+ void LockCmpxchg8b(const Address& address) {
+ lock()->cmpxchg8b(address);
+ }
+
//
// Misc. functionality
//
@@ -596,12 +612,6 @@ class X86Assembler FINAL : public Assembler {
// and branch to a ExceptionSlowPath if it is.
void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE;
- void InitializeFrameDescriptionEntry() OVERRIDE;
- void FinalizeFrameDescriptionEntry() OVERRIDE;
- std::vector<uint8_t>* GetFrameDescriptionEntry() OVERRIDE {
- return &cfi_info_;
- }
-
private:
inline void EmitUint8(uint8_t value);
inline void EmitInt32(int32_t value);
@@ -617,11 +627,8 @@ class X86Assembler FINAL : public Assembler {
void EmitLabelLink(Label* label);
void EmitNearLabelLink(Label* label);
- void EmitGenericShift(int rm, Register reg, const Immediate& imm);
- void EmitGenericShift(int rm, Register operand, Register shifter);
-
- std::vector<uint8_t> cfi_info_;
- uint32_t cfi_cfa_offset_, cfi_pc_;
+ void EmitGenericShift(int rm, const Operand& operand, const Immediate& imm);
+ void EmitGenericShift(int rm, const Operand& operand, Register shifter);
DISALLOW_COPY_AND_ASSIGN(X86Assembler);
};
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index fccb510afb..dba3b6ba67 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -127,4 +127,49 @@ TEST_F(AssemblerX86Test, LoadLongConstant) {
DriverStr(expected, "LoadLongConstant");
}
+TEST_F(AssemblerX86Test, LockCmpxchgl) {
+ GetAssembler()->LockCmpxchgl(x86::Address(
+ x86::Register(x86::EDI), x86::Register(x86::EBX), x86::TIMES_4, 12),
+ x86::Register(x86::ESI));
+ GetAssembler()->LockCmpxchgl(x86::Address(
+ x86::Register(x86::EDI), x86::Register(x86::ESI), x86::TIMES_4, 12),
+ x86::Register(x86::ESI));
+ GetAssembler()->LockCmpxchgl(x86::Address(
+ x86::Register(x86::EDI), x86::Register(x86::ESI), x86::TIMES_4, 12),
+ x86::Register(x86::EDI));
+ GetAssembler()->LockCmpxchgl(x86::Address(
+ x86::Register(x86::EBP), 0), x86::Register(x86::ESI));
+ GetAssembler()->LockCmpxchgl(x86::Address(
+ x86::Register(x86::EBP), x86::Register(x86::ESI), x86::TIMES_1, 0),
+ x86::Register(x86::ESI));
+ const char* expected =
+ "lock cmpxchgl %ESI, 0xc(%EDI,%EBX,4)\n"
+ "lock cmpxchgl %ESI, 0xc(%EDI,%ESI,4)\n"
+ "lock cmpxchgl %EDI, 0xc(%EDI,%ESI,4)\n"
+ "lock cmpxchgl %ESI, (%EBP)\n"
+ "lock cmpxchgl %ESI, (%EBP,%ESI,1)\n";
+
+ DriverStr(expected, "lock_cmpxchgl");
+}
+
+TEST_F(AssemblerX86Test, LockCmpxchg8b) {
+ GetAssembler()->LockCmpxchg8b(x86::Address(
+ x86::Register(x86::EDI), x86::Register(x86::EBX), x86::TIMES_4, 12));
+ GetAssembler()->LockCmpxchg8b(x86::Address(
+ x86::Register(x86::EDI), x86::Register(x86::ESI), x86::TIMES_4, 12));
+ GetAssembler()->LockCmpxchg8b(x86::Address(
+ x86::Register(x86::EDI), x86::Register(x86::ESI), x86::TIMES_4, 12));
+ GetAssembler()->LockCmpxchg8b(x86::Address(x86::Register(x86::EBP), 0));
+ GetAssembler()->LockCmpxchg8b(x86::Address(
+ x86::Register(x86::EBP), x86::Register(x86::ESI), x86::TIMES_1, 0));
+ const char* expected =
+ "lock cmpxchg8b 0xc(%EDI,%EBX,4)\n"
+ "lock cmpxchg8b 0xc(%EDI,%ESI,4)\n"
+ "lock cmpxchg8b 0xc(%EDI,%ESI,4)\n"
+ "lock cmpxchg8b (%EBP)\n"
+ "lock cmpxchg8b (%EBP,%ESI,1)\n";
+
+ DriverStr(expected, "lock_cmpxchg8b");
+}
+
} // namespace art
diff --git a/compiler/utils/x86/managed_register_x86.h b/compiler/utils/x86/managed_register_x86.h
index 5d46ee25cd..4e8c41e217 100644
--- a/compiler/utils/x86/managed_register_x86.h
+++ b/compiler/utils/x86/managed_register_x86.h
@@ -18,6 +18,7 @@
#define ART_COMPILER_UTILS_X86_MANAGED_REGISTER_X86_H_
#include "constants_x86.h"
+#include "dwarf/register.h"
#include "utils/managed_register.h"
namespace art {
@@ -88,14 +89,6 @@ const int kNumberOfAllocIds = kNumberOfCpuAllocIds + kNumberOfXmmAllocIds +
// There is a one-to-one mapping between ManagedRegister and register id.
class X86ManagedRegister : public ManagedRegister {
public:
- int DWARFRegId() const {
- CHECK(IsCpuRegister());
- // For all the X86 registers we care about:
- // EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI,
- // DWARF register id is the same as id_.
- return static_cast<int>(id_);
- }
-
ByteRegister AsByteRegister() const {
CHECK(IsCpuRegister());
CHECK_LT(AsCpuRegister(), ESP); // ESP, EBP, ESI and EDI cannot be encoded as byte registers.
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index b8c757c05d..638659d635 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -20,7 +20,6 @@
#include "entrypoints/quick/quick_entrypoints.h"
#include "memory_region.h"
#include "thread.h"
-#include "utils/dwarf_cfi.h"
namespace art {
namespace x86_64 {
@@ -210,7 +209,9 @@ void X86_64Assembler::movzxb(CpuRegister dst, CpuRegister src) {
void X86_64Assembler::movzxb(CpuRegister dst, const Address& src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- EmitOptionalByteRegNormalizingRex32(dst, src);
+ // Byte register is only in the source register form, so we don't use
+ // EmitOptionalByteRegNormalizingRex32(dst, src);
+ EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0xB6);
EmitOperand(dst.LowBits(), src);
@@ -228,7 +229,9 @@ void X86_64Assembler::movsxb(CpuRegister dst, CpuRegister src) {
void X86_64Assembler::movsxb(CpuRegister dst, const Address& src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- EmitOptionalByteRegNormalizingRex32(dst, src);
+ // Byte register is only in the source register form, so we don't use
+ // EmitOptionalByteRegNormalizingRex32(dst, src);
+ EmitOptionalRex32(dst, src);
EmitUint8(0x0F);
EmitUint8(0xBE);
EmitOperand(dst.LowBits(), src);
@@ -796,6 +799,30 @@ void X86_64Assembler::ucomisd(XmmRegister a, XmmRegister b) {
}
+void X86_64Assembler::roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x3A);
+ EmitUint8(0x0B);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+ EmitUint8(imm.value());
+}
+
+
+void X86_64Assembler::roundss(XmmRegister dst, XmmRegister src, const Immediate& imm) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x3A);
+ EmitUint8(0x0A);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+ EmitUint8(imm.value());
+}
+
+
void X86_64Assembler::sqrtsd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF2);
@@ -1838,11 +1865,22 @@ X86_64Assembler* X86_64Assembler::lock() {
void X86_64Assembler::cmpxchgl(const Address& address, CpuRegister reg) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitOptionalRex32(reg, address);
+ EmitUint8(0x0F);
+ EmitUint8(0xB1);
+ EmitOperand(reg.LowBits(), address);
+}
+
+
+void X86_64Assembler::cmpxchgq(const Address& address, CpuRegister reg) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitRex64(reg, address);
EmitUint8(0x0F);
EmitUint8(0xB1);
EmitOperand(reg.LowBits(), address);
}
+
void X86_64Assembler::mfence() {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x0F);
@@ -1941,6 +1979,10 @@ void X86_64Assembler::EmitOperand(uint8_t reg_or_opcode, const Operand& operand)
for (int i = 1; i < length; i++) {
EmitUint8(operand.encoding_[i]);
}
+ AssemblerFixup* fixup = operand.GetFixup();
+ if (fixup != nullptr) {
+ EmitFixup(fixup);
+ }
}
@@ -2139,11 +2181,18 @@ void X86_64Assembler::EmitRex64(CpuRegister dst, const Operand& operand) {
}
void X86_64Assembler::EmitOptionalByteRegNormalizingRex32(CpuRegister dst, CpuRegister src) {
- EmitOptionalRex(true, false, dst.NeedsRex(), false, src.NeedsRex());
+ // For src, SPL, BPL, SIL, DIL need the rex prefix.
+ bool force = src.AsRegister() > 3;
+ EmitOptionalRex(force, false, dst.NeedsRex(), false, src.NeedsRex());
}
void X86_64Assembler::EmitOptionalByteRegNormalizingRex32(CpuRegister dst, const Operand& operand) {
- uint8_t rex = 0x40 | operand.rex(); // REX.0000
+ uint8_t rex = operand.rex();
+ // For dst, SPL, BPL, SIL, DIL need the rex prefix.
+ bool force = dst.AsRegister() > 3;
+ if (force) {
+ rex |= 0x40; // REX.0000
+ }
if (dst.NeedsRex()) {
rex |= 0x44; // REX.0R00
}
@@ -2152,14 +2201,11 @@ void X86_64Assembler::EmitOptionalByteRegNormalizingRex32(CpuRegister dst, const
}
}
-void X86_64Assembler::InitializeFrameDescriptionEntry() {
- WriteFDEHeader(&cfi_info_, true /* is_64bit */);
+static dwarf::Reg DWARFReg(Register reg) {
+ return dwarf::Reg::X86_64Core(static_cast<int>(reg));
}
-
-void X86_64Assembler::FinalizeFrameDescriptionEntry() {
- WriteFDEAddressRange(&cfi_info_, buffer_.Size(), true /* is_64bit */);
- PadCFI(&cfi_info_);
- WriteCFILength(&cfi_info_, true /* is_64bit */);
+static dwarf::Reg DWARFReg(FloatRegister reg) {
+ return dwarf::Reg::X86_64Fp(static_cast<int>(reg));
}
constexpr size_t kFramePointerSize = 8;
@@ -2167,11 +2213,8 @@ constexpr size_t kFramePointerSize = 8;
void X86_64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
const std::vector<ManagedRegister>& spill_regs,
const ManagedRegisterEntrySpills& entry_spills) {
- cfi_cfa_offset_ = kFramePointerSize; // Only return address on stack
- cfi_pc_ = buffer_.Size(); // Nothing emitted yet
- DCHECK_EQ(cfi_pc_, 0U);
-
- uint32_t reg_offset = 1;
+ DCHECK_EQ(buffer_.Size(), 0U); // Nothing emitted yet.
+ cfi_.SetCurrentCFAOffset(8); // Return address on stack.
CHECK_ALIGNED(frame_size, kStackAlignment);
int gpr_count = 0;
for (int i = spill_regs.size() - 1; i >= 0; --i) {
@@ -2179,29 +2222,16 @@ void X86_64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
if (spill.IsCpuRegister()) {
pushq(spill.AsCpuRegister());
gpr_count++;
-
- // DW_CFA_advance_loc
- DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_);
- cfi_pc_ = buffer_.Size();
- // DW_CFA_def_cfa_offset
- cfi_cfa_offset_ += kFramePointerSize;
- DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_);
- // DW_CFA_offset reg offset
- reg_offset++;
- DW_CFA_offset(&cfi_info_, spill.DWARFRegId(), reg_offset);
+ cfi_.AdjustCFAOffset(kFramePointerSize);
+ cfi_.RelOffset(DWARFReg(spill.AsCpuRegister().AsRegister()), 0);
}
}
- // return address then method on stack
+ // return address then method on stack.
int64_t rest_of_frame = static_cast<int64_t>(frame_size)
- (gpr_count * kFramePointerSize)
- kFramePointerSize /*return address*/;
subq(CpuRegister(RSP), Immediate(rest_of_frame));
- // DW_CFA_advance_loc
- DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_);
- cfi_pc_ = buffer_.Size();
- // DW_CFA_def_cfa_offset
- cfi_cfa_offset_ += rest_of_frame;
- DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_);
+ cfi_.AdjustCFAOffset(rest_of_frame);
// spill xmms
int64_t offset = rest_of_frame;
@@ -2210,6 +2240,7 @@ void X86_64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
if (spill.IsXmmRegister()) {
offset -= sizeof(double);
movsd(Address(CpuRegister(RSP), offset), spill.AsXmmRegister());
+ cfi_.RelOffset(DWARFReg(spill.AsXmmRegister().AsFloatRegister()), offset);
}
}
@@ -2241,6 +2272,7 @@ void X86_64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
void X86_64Assembler::RemoveFrame(size_t frame_size,
const std::vector<ManagedRegister>& spill_regs) {
CHECK_ALIGNED(frame_size, kStackAlignment);
+ cfi_.RememberState();
int gpr_count = 0;
// unspill xmms
int64_t offset = static_cast<int64_t>(frame_size) - (spill_regs.size() * kFramePointerSize) - 2 * kFramePointerSize;
@@ -2249,34 +2281,38 @@ void X86_64Assembler::RemoveFrame(size_t frame_size,
if (spill.IsXmmRegister()) {
offset += sizeof(double);
movsd(spill.AsXmmRegister(), Address(CpuRegister(RSP), offset));
+ cfi_.Restore(DWARFReg(spill.AsXmmRegister().AsFloatRegister()));
} else {
gpr_count++;
}
}
- addq(CpuRegister(RSP), Immediate(static_cast<int64_t>(frame_size) - (gpr_count * kFramePointerSize) - kFramePointerSize));
+ int adjust = static_cast<int>(frame_size) - (gpr_count * kFramePointerSize) - kFramePointerSize;
+ addq(CpuRegister(RSP), Immediate(adjust));
+ cfi_.AdjustCFAOffset(-adjust);
for (size_t i = 0; i < spill_regs.size(); ++i) {
x86_64::X86_64ManagedRegister spill = spill_regs.at(i).AsX86_64();
if (spill.IsCpuRegister()) {
popq(spill.AsCpuRegister());
+ cfi_.AdjustCFAOffset(-static_cast<int>(kFramePointerSize));
+ cfi_.Restore(DWARFReg(spill.AsCpuRegister().AsRegister()));
}
}
ret();
+ // The CFI should be restored for any code that follows the exit block.
+ cfi_.RestoreState();
+ cfi_.DefCFAOffset(frame_size);
}
void X86_64Assembler::IncreaseFrameSize(size_t adjust) {
CHECK_ALIGNED(adjust, kStackAlignment);
addq(CpuRegister(RSP), Immediate(-static_cast<int64_t>(adjust)));
- // DW_CFA_advance_loc
- DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_);
- cfi_pc_ = buffer_.Size();
- // DW_CFA_def_cfa_offset
- cfi_cfa_offset_ += adjust;
- DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_);
+ cfi_.AdjustCFAOffset(adjust);
}
void X86_64Assembler::DecreaseFrameSize(size_t adjust) {
CHECK_ALIGNED(adjust, kStackAlignment);
addq(CpuRegister(RSP), Immediate(adjust));
+ cfi_.AdjustCFAOffset(-adjust);
}
void X86_64Assembler::Store(FrameOffset offs, ManagedRegister msrc, size_t size) {
@@ -2704,5 +2740,55 @@ void X86_64ExceptionSlowPath::Emit(Assembler *sasm) {
#undef __
}
+void X86_64Assembler::AddConstantArea() {
+ const std::vector<int32_t>& area = constant_area_.GetBuffer();
+ for (size_t i = 0, e = area.size(); i < e; i++) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitInt32(area[i]);
+ }
+}
+
+int ConstantArea::AddInt32(int32_t v) {
+ for (size_t i = 0, e = buffer_.size(); i < e; i++) {
+ if (v == buffer_[i]) {
+ return i * elem_size_;
+ }
+ }
+
+ // Didn't match anything.
+ int result = buffer_.size() * elem_size_;
+ buffer_.push_back(v);
+ return result;
+}
+
+int ConstantArea::AddInt64(int64_t v) {
+ int32_t v_low = v;
+ int32_t v_high = v >> 32;
+ if (buffer_.size() > 1) {
+ // Ensure we don't pass the end of the buffer.
+ for (size_t i = 0, e = buffer_.size() - 1; i < e; i++) {
+ if (v_low == buffer_[i] && v_high == buffer_[i + 1]) {
+ return i * elem_size_;
+ }
+ }
+ }
+
+ // Didn't match anything.
+ int result = buffer_.size() * elem_size_;
+ buffer_.push_back(v_low);
+ buffer_.push_back(v_high);
+ return result;
+}
+
+int ConstantArea::AddDouble(double v) {
+ // Treat the value as a 64-bit integer value.
+ return AddInt64(bit_cast<int64_t, double>(v));
+}
+
+int ConstantArea::AddFloat(float v) {
+ // Treat the value as a 32-bit integer value.
+ return AddInt32(bit_cast<int32_t, float>(v));
+}
+
} // namespace x86_64
} // namespace art
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index e2fd5fbb80..15b8b15c74 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -97,9 +97,13 @@ class Operand : public ValueObject {
&& (reg.NeedsRex() == ((rex_ & 1) != 0)); // REX.000B bits match.
}
+ AssemblerFixup* GetFixup() const {
+ return fixup_;
+ }
+
protected:
// Operand can be sub classed (e.g: Address).
- Operand() : rex_(0), length_(0) { }
+ Operand() : rex_(0), length_(0), fixup_(nullptr) { }
void SetModRM(uint8_t mod_in, CpuRegister rm_in) {
CHECK_EQ(mod_in & ~3, 0);
@@ -136,12 +140,17 @@ class Operand : public ValueObject {
length_ += disp_size;
}
+ void SetFixup(AssemblerFixup* fixup) {
+ fixup_ = fixup;
+ }
+
private:
uint8_t rex_;
uint8_t length_;
uint8_t encoding_[6];
+ AssemblerFixup* fixup_;
- explicit Operand(CpuRegister reg) : rex_(0), length_(0) { SetModRM(3, reg); }
+ explicit Operand(CpuRegister reg) : rex_(0), length_(0), fixup_(nullptr) { SetModRM(3, reg); }
// Get the operand encoding byte at the given index.
uint8_t encoding_at(int index_in) const {
@@ -226,12 +235,25 @@ class Address : public Operand {
result.SetSIB(TIMES_1, CpuRegister(RSP), CpuRegister(RBP));
result.SetDisp32(addr);
} else {
+ // RIP addressing is done using RBP as the base register.
+ // The value in RBP isn't used. Instead the offset is added to RIP.
result.SetModRM(0, CpuRegister(RBP));
result.SetDisp32(addr);
}
return result;
}
+ // An RIP relative address that will be fixed up later.
+ static Address RIP(AssemblerFixup* fixup) {
+ Address result;
+ // RIP addressing is done using RBP as the base register.
+ // The value in RBP isn't used. Instead the offset is added to RIP.
+ result.SetModRM(0, CpuRegister(RBP));
+ result.SetDisp32(0);
+ result.SetFixup(fixup);
+ return result;
+ }
+
// If no_rip is true then the Absolute address isn't RIP relative.
static Address Absolute(ThreadOffset<8> addr, bool no_rip = false) {
return Absolute(addr.Int32Value(), no_rip);
@@ -242,9 +264,46 @@ class Address : public Operand {
};
+/**
+ * Class to handle constant area values.
+ */
+class ConstantArea {
+ public:
+ ConstantArea() {}
+
+ // Add a double to the constant area, returning the offset into
+ // the constant area where the literal resides.
+ int AddDouble(double v);
+
+ // Add a float to the constant area, returning the offset into
+ // the constant area where the literal resides.
+ int AddFloat(float v);
+
+ // Add an int32_t to the constant area, returning the offset into
+ // the constant area where the literal resides.
+ int AddInt32(int32_t v);
+
+ // Add an int64_t to the constant area, returning the offset into
+ // the constant area where the literal resides.
+ int AddInt64(int64_t v);
+
+ int GetSize() const {
+ return buffer_.size() * elem_size_;
+ }
+
+ const std::vector<int32_t>& GetBuffer() const {
+ return buffer_;
+ }
+
+ private:
+ static constexpr size_t elem_size_ = sizeof(int32_t);
+ std::vector<int32_t> buffer_;
+};
+
+
class X86_64Assembler FINAL : public Assembler {
public:
- X86_64Assembler() : cfi_cfa_offset_(0), cfi_pc_(0) {}
+ X86_64Assembler() {}
virtual ~X86_64Assembler() {}
/*
@@ -353,6 +412,9 @@ class X86_64Assembler FINAL : public Assembler {
void ucomiss(XmmRegister a, XmmRegister b);
void ucomisd(XmmRegister a, XmmRegister b);
+ void roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm);
+ void roundss(XmmRegister dst, XmmRegister src, const Immediate& imm);
+
void sqrtsd(XmmRegister dst, XmmRegister src);
void sqrtss(XmmRegister dst, XmmRegister src);
@@ -515,6 +577,7 @@ class X86_64Assembler FINAL : public Assembler {
X86_64Assembler* lock();
void cmpxchgl(const Address& address, CpuRegister reg);
+ void cmpxchgq(const Address& address, CpuRegister reg);
void mfence();
@@ -537,6 +600,10 @@ class X86_64Assembler FINAL : public Assembler {
lock()->cmpxchgl(address, reg);
}
+ void LockCmpxchgq(const Address& address, CpuRegister reg) {
+ lock()->cmpxchgq(address, reg);
+ }
+
//
// Misc. functionality
//
@@ -661,11 +728,27 @@ class X86_64Assembler FINAL : public Assembler {
// and branch to a ExceptionSlowPath if it is.
void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE;
- void InitializeFrameDescriptionEntry() OVERRIDE;
- void FinalizeFrameDescriptionEntry() OVERRIDE;
- std::vector<uint8_t>* GetFrameDescriptionEntry() OVERRIDE {
- return &cfi_info_;
- }
+ // Add a double to the constant area, returning the offset into
+ // the constant area where the literal resides.
+ int AddDouble(double v) { return constant_area_.AddDouble(v); }
+
+ // Add a float to the constant area, returning the offset into
+ // the constant area where the literal resides.
+ int AddFloat(float v) { return constant_area_.AddFloat(v); }
+
+ // Add an int32_t to the constant area, returning the offset into
+ // the constant area where the literal resides.
+ int AddInt32(int32_t v) { return constant_area_.AddInt32(v); }
+
+ // Add an int64_t to the constant area, returning the offset into
+ // the constant area where the literal resides.
+ int AddInt64(int64_t v) { return constant_area_.AddInt64(v); }
+
+ // Add the contents of the constant area to the assembler buffer.
+ void AddConstantArea();
+
+ // Is the constant area empty? Return true if there are no literals in the constant area.
+ bool IsConstantAreaEmpty() const { return constant_area_.GetSize() == 0; }
private:
void EmitUint8(uint8_t value);
@@ -712,8 +795,7 @@ class X86_64Assembler FINAL : public Assembler {
void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, CpuRegister src);
void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, const Operand& operand);
- std::vector<uint8_t> cfi_info_;
- uint32_t cfi_cfa_offset_, cfi_pc_;
+ ConstantArea constant_area_;
DISALLOW_COPY_AND_ASSIGN(X86_64Assembler);
};
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index c2052c7732..116190a832 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -174,6 +174,40 @@ class AssemblerX86_64Test : public AssemblerTest<x86_64::X86_64Assembler, x86_64
secondary_register_names_.emplace(x86_64::CpuRegister(x86_64::R14), "r14d");
secondary_register_names_.emplace(x86_64::CpuRegister(x86_64::R15), "r15d");
+ tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::RAX), "ax");
+ tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::RBX), "bx");
+ tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::RCX), "cx");
+ tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::RDX), "dx");
+ tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::RBP), "bp");
+ tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::RSP), "sp");
+ tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::RSI), "si");
+ tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::RDI), "di");
+ tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::R8), "r8w");
+ tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::R9), "r9w");
+ tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::R10), "r10w");
+ tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::R11), "r11w");
+ tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::R12), "r12w");
+ tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::R13), "r13w");
+ tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::R14), "r14w");
+ tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::R15), "r15w");
+
+ quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::RAX), "al");
+ quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::RBX), "bl");
+ quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::RCX), "cl");
+ quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::RDX), "dl");
+ quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::RBP), "bpl");
+ quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::RSP), "spl");
+ quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::RSI), "sil");
+ quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::RDI), "dil");
+ quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R8), "r8b");
+ quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R9), "r9b");
+ quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R10), "r10b");
+ quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R11), "r11b");
+ quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R12), "r12b");
+ quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R13), "r13b");
+ quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R14), "r14b");
+ quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R15), "r15b");
+
fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM0));
fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM1));
fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM2));
@@ -216,9 +250,21 @@ class AssemblerX86_64Test : public AssemblerTest<x86_64::X86_64Assembler, x86_64
return secondary_register_names_[reg];
}
+ std::string GetTertiaryRegisterName(const x86_64::CpuRegister& reg) OVERRIDE {
+ CHECK(tertiary_register_names_.find(reg) != tertiary_register_names_.end());
+ return tertiary_register_names_[reg];
+ }
+
+ std::string GetQuaternaryRegisterName(const x86_64::CpuRegister& reg) OVERRIDE {
+ CHECK(quaternary_register_names_.find(reg) != quaternary_register_names_.end());
+ return quaternary_register_names_[reg];
+ }
+
private:
std::vector<x86_64::CpuRegister*> registers_;
std::map<x86_64::CpuRegister, std::string, X86_64CpuRegisterCompare> secondary_register_names_;
+ std::map<x86_64::CpuRegister, std::string, X86_64CpuRegisterCompare> tertiary_register_names_;
+ std::map<x86_64::CpuRegister, std::string, X86_64CpuRegisterCompare> quaternary_register_names_;
std::vector<x86_64::XmmRegister*> fp_registers_;
};
@@ -543,6 +589,56 @@ TEST_F(AssemblerX86_64Test, Xchgl) {
// DriverStr(Repeatrr(&x86_64::X86_64Assembler::xchgl, "xchgl %{reg2}, %{reg1}"), "xchgl");
}
+TEST_F(AssemblerX86_64Test, LockCmpxchgl) {
+ GetAssembler()->LockCmpxchgl(x86_64::Address(
+ x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12),
+ x86_64::CpuRegister(x86_64::RSI));
+ GetAssembler()->LockCmpxchgl(x86_64::Address(
+ x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12),
+ x86_64::CpuRegister(x86_64::RSI));
+ GetAssembler()->LockCmpxchgl(x86_64::Address(
+ x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12),
+ x86_64::CpuRegister(x86_64::R8));
+ GetAssembler()->LockCmpxchgl(x86_64::Address(
+ x86_64::CpuRegister(x86_64::R13), 0), x86_64::CpuRegister(x86_64::RSI));
+ GetAssembler()->LockCmpxchgl(x86_64::Address(
+ x86_64::CpuRegister(x86_64::R13), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_1, 0),
+ x86_64::CpuRegister(x86_64::RSI));
+ const char* expected =
+ "lock cmpxchgl %ESI, 0xc(%RDI,%RBX,4)\n"
+ "lock cmpxchgl %ESI, 0xc(%RDI,%R9,4)\n"
+ "lock cmpxchgl %R8d, 0xc(%RDI,%R9,4)\n"
+ "lock cmpxchgl %ESI, (%R13)\n"
+ "lock cmpxchgl %ESI, (%R13,%R9,1)\n";
+
+ DriverStr(expected, "lock_cmpxchgl");
+}
+
+TEST_F(AssemblerX86_64Test, LockCmpxchgq) {
+ GetAssembler()->LockCmpxchgq(x86_64::Address(
+ x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12),
+ x86_64::CpuRegister(x86_64::RSI));
+ GetAssembler()->LockCmpxchgq(x86_64::Address(
+ x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12),
+ x86_64::CpuRegister(x86_64::RSI));
+ GetAssembler()->LockCmpxchgq(x86_64::Address(
+ x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12),
+ x86_64::CpuRegister(x86_64::R8));
+ GetAssembler()->LockCmpxchgq(x86_64::Address(
+ x86_64::CpuRegister(x86_64::R13), 0), x86_64::CpuRegister(x86_64::RSI));
+ GetAssembler()->LockCmpxchgq(x86_64::Address(
+ x86_64::CpuRegister(x86_64::R13), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_1, 0),
+ x86_64::CpuRegister(x86_64::RSI));
+ const char* expected =
+ "lock cmpxchg %RSI, 0xc(%RDI,%RBX,4)\n"
+ "lock cmpxchg %RSI, 0xc(%RDI,%R9,4)\n"
+ "lock cmpxchg %R8, 0xc(%RDI,%R9,4)\n"
+ "lock cmpxchg %RSI, (%R13)\n"
+ "lock cmpxchg %RSI, (%R13,%R9,1)\n";
+
+ DriverStr(expected, "lock_cmpxchg");
+}
+
TEST_F(AssemblerX86_64Test, Movl) {
GetAssembler()->movl(x86_64::CpuRegister(x86_64::RAX), x86_64::Address(
x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
@@ -696,6 +792,14 @@ TEST_F(AssemblerX86_64Test, Sqrtsd) {
DriverStr(RepeatFF(&x86_64::X86_64Assembler::sqrtsd, "sqrtsd %{reg2}, %{reg1}"), "sqrtsd");
}
+TEST_F(AssemblerX86_64Test, Roundss) {
+ DriverStr(RepeatFFI(&x86_64::X86_64Assembler::roundss, 1, "roundss ${imm}, %{reg2}, %{reg1}"), "roundss");
+}
+
+TEST_F(AssemblerX86_64Test, Roundsd) {
+ DriverStr(RepeatFFI(&x86_64::X86_64Assembler::roundsd, 1, "roundsd ${imm}, %{reg2}, %{reg1}"), "roundsd");
+}
+
TEST_F(AssemblerX86_64Test, Xorps) {
DriverStr(RepeatFF(&x86_64::X86_64Assembler::xorps, "xorps %{reg2}, %{reg1}"), "xorps");
}
@@ -820,31 +924,12 @@ std::string setcc_test_fn(AssemblerX86_64Test::Base* assembler_test,
"l", "ge", "le" };
std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters();
-
- std::string byte_regs[16];
- byte_regs[x86_64::RAX] = "al";
- byte_regs[x86_64::RBX] = "bl";
- byte_regs[x86_64::RCX] = "cl";
- byte_regs[x86_64::RDX] = "dl";
- byte_regs[x86_64::RBP] = "bpl";
- byte_regs[x86_64::RSP] = "spl";
- byte_regs[x86_64::RSI] = "sil";
- byte_regs[x86_64::RDI] = "dil";
- byte_regs[x86_64::R8] = "r8b";
- byte_regs[x86_64::R9] = "r9b";
- byte_regs[x86_64::R10] = "r10b";
- byte_regs[x86_64::R11] = "r11b";
- byte_regs[x86_64::R12] = "r12b";
- byte_regs[x86_64::R13] = "r13b";
- byte_regs[x86_64::R14] = "r14b";
- byte_regs[x86_64::R15] = "r15b";
-
std::ostringstream str;
for (auto reg : registers) {
for (size_t i = 0; i < 15; ++i) {
assembler->setcc(static_cast<x86_64::Condition>(i), *reg);
- str << "set" << suffixes[i] << " %" << byte_regs[reg->AsRegister()] << "\n";
+ str << "set" << suffixes[i] << " %" << assembler_test->GetQuaternaryRegisterName(*reg) << "\n";
}
}
@@ -975,4 +1060,12 @@ TEST_F(AssemblerX86_64Test, DecreaseFrame) {
DriverFn(&decreaseframe_test_fn, "DecreaseFrame");
}
+TEST_F(AssemblerX86_64Test, MovzxbRegs) {
+ DriverStr(Repeatrb(&x86_64::X86_64Assembler::movzxb, "movzbl %{reg2}, %{reg1}"), "movzxb");
+}
+
+TEST_F(AssemblerX86_64Test, MovsxbRegs) {
+ DriverStr(Repeatrb(&x86_64::X86_64Assembler::movsxb, "movsbl %{reg2}, %{reg1}"), "movsxb");
+}
+
} // namespace art
diff --git a/compiler/utils/x86_64/managed_register_x86_64.h b/compiler/utils/x86_64/managed_register_x86_64.h
index 3a96ad0b51..47bbb44fc8 100644
--- a/compiler/utils/x86_64/managed_register_x86_64.h
+++ b/compiler/utils/x86_64/managed_register_x86_64.h
@@ -18,6 +18,7 @@
#define ART_COMPILER_UTILS_X86_64_MANAGED_REGISTER_X86_64_H_
#include "constants_x86_64.h"
+#include "dwarf/register.h"
#include "utils/managed_register.h"
namespace art {
@@ -87,21 +88,6 @@ const int kNumberOfAllocIds = kNumberOfCpuAllocIds + kNumberOfXmmAllocIds +
// There is a one-to-one mapping between ManagedRegister and register id.
class X86_64ManagedRegister : public ManagedRegister {
public:
- int DWARFRegId() const {
- CHECK(IsCpuRegister());
- switch (id_) {
- case RAX: return 0;
- case RDX: return 1;
- case RCX: return 2;
- case RBX: return 3;
- case RSI: return 4;
- case RDI: return 5;
- case RBP: return 6;
- case RSP: return 7;
- default: return static_cast<int>(id_); // R8 ~ R15
- }
- }
-
CpuRegister AsCpuRegister() const {
CHECK(IsCpuRegister());
return CpuRegister(static_cast<Register>(id_));