summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'compiler')
-rw-r--r--compiler/Android.bp9
-rw-r--r--compiler/common_compiler_test.cc2
-rw-r--r--compiler/compiler.cc2
-rw-r--r--compiler/compiler.h2
-rw-r--r--compiler/debug/dwarf/dwarf_test.h2
-rw-r--r--compiler/debug/elf_symtab_writer.h2
-rw-r--r--compiler/driver/compiled_method_storage.cc2
-rw-r--r--compiler/driver/compiler_driver.h3
-rw-r--r--compiler/driver/compiler_options.h2
-rw-r--r--compiler/driver/dex_compilation_unit.cc2
-rw-r--r--compiler/jni/quick/jni_compiler.cc2
-rw-r--r--compiler/linker/arm64/relative_patcher_arm64.cc8
-rw-r--r--compiler/linker/elf_builder.h27
-rw-r--r--compiler/linker/file_output_stream.h4
-rw-r--r--compiler/linker/linker_patch.h58
-rw-r--r--compiler/optimizing/code_generator.cc43
-rw-r--r--compiler/optimizing/code_generator.h4
-rw-r--r--compiler/optimizing/code_generator_arm64.cc187
-rw-r--r--compiler/optimizing/code_generator_arm64.h22
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.cc288
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.h12
-rw-r--r--compiler/optimizing/code_generator_mips.cc449
-rw-r--r--compiler/optimizing/code_generator_mips.h10
-rw-r--r--compiler/optimizing/code_generator_mips64.cc233
-rw-r--r--compiler/optimizing/code_generator_mips64.h11
-rw-r--r--compiler/optimizing/code_generator_x86.cc265
-rw-r--r--compiler/optimizing/code_generator_x86.h14
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc218
-rw-r--r--compiler/optimizing/code_generator_x86_64.h14
-rw-r--r--compiler/optimizing/codegen_test.cc2
-rw-r--r--compiler/optimizing/gvn.cc4
-rw-r--r--compiler/optimizing/induction_var_range.cc33
-rw-r--r--compiler/optimizing/instruction_simplifier.cc40
-rw-r--r--compiler/optimizing/intrinsics.cc2
-rw-r--r--compiler/optimizing/intrinsics.h8
-rw-r--r--compiler/optimizing/intrinsics_arm64.cc115
-rw-r--r--compiler/optimizing/intrinsics_arm_vixl.cc258
-rw-r--r--compiler/optimizing/intrinsics_mips.cc604
-rw-r--r--compiler/optimizing/intrinsics_mips.h1
-rw-r--r--compiler/optimizing/intrinsics_mips64.cc305
-rw-r--r--compiler/optimizing/intrinsics_mips64.h2
-rw-r--r--compiler/optimizing/intrinsics_x86.cc277
-rw-r--r--compiler/optimizing/intrinsics_x86_64.cc202
-rw-r--r--compiler/optimizing/loop_optimization.cc249
-rw-r--r--compiler/optimizing/loop_optimization_test.cc5
-rw-r--r--compiler/optimizing/nodes.cc14
-rw-r--r--compiler/optimizing/nodes.h113
-rw-r--r--compiler/optimizing/optimizing_compiler_stats.h4
-rw-r--r--compiler/optimizing/pc_relative_fixups_mips.cc4
-rw-r--r--compiler/optimizing/pc_relative_fixups_x86.cc15
-rw-r--r--compiler/optimizing/scheduler.cc2
-rw-r--r--compiler/optimizing/sharpening.cc10
-rw-r--r--compiler/optimizing/superblock_cloner.cc351
-rw-r--r--compiler/optimizing/superblock_cloner.h108
-rw-r--r--compiler/optimizing/superblock_cloner_test.cc557
-rw-r--r--compiler/utils/assembler_test_base.h2
-rw-r--r--compiler/utils/atomic_dex_ref_map.h2
-rw-r--r--compiler/utils/mips/assembler_mips.cc20
-rw-r--r--compiler/utils/mips/assembler_mips.h5
-rw-r--r--compiler/utils/mips/assembler_mips32r6_test.cc16
-rw-r--r--compiler/utils/mips64/assembler_mips64.cc20
-rw-r--r--compiler/utils/mips64/assembler_mips64.h5
-rw-r--r--compiler/utils/mips64/assembler_mips64_test.cc16
-rw-r--r--compiler/utils/swap_space_test.cc2
-rw-r--r--compiler/utils/test_dex_file_builder_test.cc2
-rw-r--r--compiler/utils/x86/assembler_x86.cc72
-rw-r--r--compiler/utils/x86/assembler_x86.h9
-rw-r--r--compiler/utils/x86/assembler_x86_test.cc32
-rw-r--r--compiler/utils/x86_64/assembler_x86_64.cc80
-rw-r--r--compiler/utils/x86_64/assembler_x86_64.h9
-rw-r--r--compiler/utils/x86_64/assembler_x86_64_test.cc32
71 files changed, 3404 insertions, 2102 deletions
diff --git a/compiler/Android.bp b/compiler/Android.bp
index 40c676c406..c4d538fc88 100644
--- a/compiler/Android.bp
+++ b/compiler/Android.bp
@@ -256,7 +256,14 @@ art_cc_library {
instrumentation: true,
profile_file: "art/dex2oat.profdata",
benchmarks: ["dex2oat"],
- }
+ },
+ target: {
+ android: {
+ lto: {
+ thin: true,
+ },
+ },
+ },
}
art_cc_library {
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc
index a20313374c..d3e3a51f7a 100644
--- a/compiler/common_compiler_test.cc
+++ b/compiler/common_compiler_test.cc
@@ -21,6 +21,7 @@
#include "art_method-inl.h"
#include "base/callee_save_type.h"
#include "base/enums.h"
+#include "base/utils.h"
#include "class_linker.h"
#include "compiled_method-inl.h"
#include "dex/descriptors_names.h"
@@ -36,7 +37,6 @@
#include "oat_quick_method_header.h"
#include "scoped_thread_state_change-inl.h"
#include "thread-current-inl.h"
-#include "utils.h"
namespace art {
diff --git a/compiler/compiler.cc b/compiler/compiler.cc
index 7c7ae71d77..646040fd9d 100644
--- a/compiler/compiler.cc
+++ b/compiler/compiler.cc
@@ -19,10 +19,10 @@
#include <android-base/logging.h>
#include "base/macros.h"
+#include "base/utils.h"
#include "dex/code_item_accessors-inl.h"
#include "driver/compiler_driver.h"
#include "optimizing/optimizing_compiler.h"
-#include "utils.h"
namespace art {
diff --git a/compiler/compiler.h b/compiler/compiler.h
index a17e2b5875..f2ec3a9fa3 100644
--- a/compiler/compiler.h
+++ b/compiler/compiler.h
@@ -18,8 +18,8 @@
#define ART_COMPILER_COMPILER_H_
#include "base/mutex.h"
+#include "base/os.h"
#include "dex/dex_file.h"
-#include "os.h"
namespace art {
diff --git a/compiler/debug/dwarf/dwarf_test.h b/compiler/debug/dwarf/dwarf_test.h
index 5405759c1f..9a7c604ca1 100644
--- a/compiler/debug/dwarf/dwarf_test.h
+++ b/compiler/debug/dwarf/dwarf_test.h
@@ -26,12 +26,12 @@
#include <set>
#include <string>
+#include "base/os.h"
#include "base/unix_file/fd_file.h"
#include "common_runtime_test.h"
#include "gtest/gtest.h"
#include "linker/elf_builder.h"
#include "linker/file_output_stream.h"
-#include "os.h"
namespace art {
namespace dwarf {
diff --git a/compiler/debug/elf_symtab_writer.h b/compiler/debug/elf_symtab_writer.h
index 1310e8dabd..7a8e29191a 100644
--- a/compiler/debug/elf_symtab_writer.h
+++ b/compiler/debug/elf_symtab_writer.h
@@ -20,12 +20,12 @@
#include <map>
#include <unordered_set>
+#include "base/utils.h"
#include "debug/debug_info.h"
#include "debug/method_debug_info.h"
#include "dex/dex_file-inl.h"
#include "dex/code_item_accessors.h"
#include "linker/elf_builder.h"
-#include "utils.h"
namespace art {
namespace debug {
diff --git a/compiler/driver/compiled_method_storage.cc b/compiler/driver/compiled_method_storage.cc
index 48477abe5b..a26a985ff9 100644
--- a/compiler/driver/compiled_method_storage.cc
+++ b/compiler/driver/compiled_method_storage.cc
@@ -21,10 +21,10 @@
#include <android-base/logging.h>
+#include "base/utils.h"
#include "compiled_method.h"
#include "linker/linker_patch.h"
#include "thread-current-inl.h"
-#include "utils.h"
#include "utils/dedupe_set-inl.h"
#include "utils/swap_space.h"
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index a3bb4ec34d..8db892bf18 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -29,6 +29,8 @@
#include "base/array_ref.h"
#include "base/bit_utils.h"
#include "base/mutex.h"
+#include "base/os.h"
+#include "base/quasi_atomic.h"
#include "base/safe_map.h"
#include "base/timing_logger.h"
#include "class_reference.h"
@@ -39,7 +41,6 @@
#include "dex/dex_to_dex_compiler.h"
#include "driver/compiled_method_storage.h"
#include "method_reference.h"
-#include "os.h"
#include "thread_pool.h"
#include "utils/atomic_dex_ref_map.h"
#include "utils/dex_cache_arrays_layout.h"
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index 18b0913430..05d8805e81 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -22,10 +22,10 @@
#include <vector>
#include "base/macros.h"
+#include "base/utils.h"
#include "compiler_filter.h"
#include "globals.h"
#include "optimizing/register_allocator.h"
-#include "utils.h"
namespace art {
diff --git a/compiler/driver/dex_compilation_unit.cc b/compiler/driver/dex_compilation_unit.cc
index 2e315b5d12..c90c37d54a 100644
--- a/compiler/driver/dex_compilation_unit.cc
+++ b/compiler/driver/dex_compilation_unit.cc
@@ -16,10 +16,10 @@
#include "dex_compilation_unit.h"
+#include "base/utils.h"
#include "dex/code_item_accessors-inl.h"
#include "dex/descriptors_names.h"
#include "mirror/dex_cache.h"
-#include "utils.h"
namespace art {
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index fc44927231..d001cfe4fc 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -27,6 +27,7 @@
#include "base/enums.h"
#include "base/logging.h" // For VLOG.
#include "base/macros.h"
+#include "base/utils.h"
#include "calling_convention.h"
#include "class_linker.h"
#include "debug/dwarf/debug_frame_opcode_writer.h"
@@ -37,7 +38,6 @@
#include "jni_env_ext.h"
#include "memory_region.h"
#include "thread.h"
-#include "utils.h"
#include "utils/arm/managed_register_arm.h"
#include "utils/arm64/managed_register_arm64.h"
#include "utils/assembler.h"
diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc
index 52a07965b9..b268204b4a 100644
--- a/compiler/linker/arm64/relative_patcher_arm64.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64.cc
@@ -60,13 +60,12 @@ inline bool IsAdrpPatch(const LinkerPatch& patch) {
case LinkerPatch::Type::kCallRelative:
case LinkerPatch::Type::kBakerReadBarrierBranch:
return false;
+ case LinkerPatch::Type::kDataBimgRelRo:
case LinkerPatch::Type::kMethodRelative:
case LinkerPatch::Type::kMethodBssEntry:
case LinkerPatch::Type::kTypeRelative:
- case LinkerPatch::Type::kTypeClassTable:
case LinkerPatch::Type::kTypeBssEntry:
case LinkerPatch::Type::kStringRelative:
- case LinkerPatch::Type::kStringInternTable:
case LinkerPatch::Type::kStringBssEntry:
return patch.LiteralOffset() == patch.PcInsnOffset();
}
@@ -271,10 +270,9 @@ void Arm64RelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code,
shift = 0u; // No shift for ADD.
} else {
// LDR/STR 32-bit or 64-bit with imm12 == 0 (unset).
- DCHECK(patch.GetType() == LinkerPatch::Type::kMethodBssEntry ||
- patch.GetType() == LinkerPatch::Type::kTypeClassTable ||
+ DCHECK(patch.GetType() == LinkerPatch::Type::kDataBimgRelRo ||
+ patch.GetType() == LinkerPatch::Type::kMethodBssEntry ||
patch.GetType() == LinkerPatch::Type::kTypeBssEntry ||
- patch.GetType() == LinkerPatch::Type::kStringInternTable ||
patch.GetType() == LinkerPatch::Type::kStringBssEntry) << patch.GetType();
DCHECK_EQ(insn & 0xbfbffc00, 0xb9000000) << std::hex << insn;
}
diff --git a/compiler/linker/elf_builder.h b/compiler/linker/elf_builder.h
index a5f60992ca..3da7a43762 100644
--- a/compiler/linker/elf_builder.h
+++ b/compiler/linker/elf_builder.h
@@ -529,6 +529,8 @@ class ElfBuilder FINAL {
stream_(output),
rodata_(this, ".rodata", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
text_(this, ".text", SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR, nullptr, 0, kPageSize, 0),
+ data_bimg_rel_ro_(
+ this, ".data.bimg.rel.ro", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
bss_(this, ".bss", SHT_NOBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
dex_(this, ".dex", SHT_NOBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
dynstr_(this, ".dynstr", SHF_ALLOC, kPageSize),
@@ -552,6 +554,7 @@ class ElfBuilder FINAL {
loaded_size_(0u),
virtual_address_(0) {
text_.phdr_flags_ = PF_R | PF_X;
+ data_bimg_rel_ro_.phdr_flags_ = PF_R | PF_W; // Shall be made read-only at run time.
bss_.phdr_flags_ = PF_R | PF_W;
dex_.phdr_flags_ = PF_R;
dynamic_.phdr_flags_ = PF_R | PF_W;
@@ -566,6 +569,7 @@ class ElfBuilder FINAL {
BuildIdSection* GetBuildId() { return &build_id_; }
Section* GetRoData() { return &rodata_; }
Section* GetText() { return &text_; }
+ Section* GetDataBimgRelRo() { return &data_bimg_rel_ro_; }
Section* GetBss() { return &bss_; }
Section* GetDex() { return &dex_; }
StringSection* GetStrTab() { return &strtab_; }
@@ -694,6 +698,7 @@ class ElfBuilder FINAL {
void PrepareDynamicSection(const std::string& elf_file_path,
Elf_Word rodata_size,
Elf_Word text_size,
+ Elf_Word data_bimg_rel_ro_size,
Elf_Word bss_size,
Elf_Word bss_methods_offset,
Elf_Word bss_roots_offset,
@@ -707,6 +712,9 @@ class ElfBuilder FINAL {
// Allocate all pre-dynamic sections.
rodata_.AllocateVirtualMemory(rodata_size);
text_.AllocateVirtualMemory(text_size);
+ if (data_bimg_rel_ro_size != 0) {
+ data_bimg_rel_ro_.AllocateVirtualMemory(data_bimg_rel_ro_size);
+ }
if (bss_size != 0) {
bss_.AllocateVirtualMemory(bss_size);
}
@@ -735,6 +743,24 @@ class ElfBuilder FINAL {
Elf_Word oatlastword_address = rodata_.GetAddress() + rodata_size - 4;
dynsym_.Add(oatlastword, &rodata_, oatlastword_address, 4, STB_GLOBAL, STT_OBJECT);
}
+ if (data_bimg_rel_ro_size != 0u) {
+ Elf_Word oatdatabimgrelro = dynstr_.Add("oatdatabimgrelro");
+ dynsym_.Add(oatdatabimgrelro,
+ &data_bimg_rel_ro_,
+ data_bimg_rel_ro_.GetAddress(),
+ data_bimg_rel_ro_size,
+ STB_GLOBAL,
+ STT_OBJECT);
+ Elf_Word oatdatabimgrelrolastword = dynstr_.Add("oatdatabimgrelrolastword");
+ Elf_Word oatdatabimgrelrolastword_address =
+ data_bimg_rel_ro_.GetAddress() + data_bimg_rel_ro_size - 4;
+ dynsym_.Add(oatdatabimgrelrolastword,
+ &data_bimg_rel_ro_,
+ oatdatabimgrelrolastword_address,
+ 4,
+ STB_GLOBAL,
+ STT_OBJECT);
+ }
DCHECK_LE(bss_roots_offset, bss_size);
if (bss_size != 0u) {
Elf_Word oatbss = dynstr_.Add("oatbss");
@@ -1010,6 +1036,7 @@ class ElfBuilder FINAL {
Section rodata_;
Section text_;
+ Section data_bimg_rel_ro_;
Section bss_;
Section dex_;
CachedStringSection dynstr_;
diff --git a/compiler/linker/file_output_stream.h b/compiler/linker/file_output_stream.h
index 28296a47fd..deb051fca4 100644
--- a/compiler/linker/file_output_stream.h
+++ b/compiler/linker/file_output_stream.h
@@ -17,9 +17,9 @@
#ifndef ART_COMPILER_LINKER_FILE_OUTPUT_STREAM_H_
#define ART_COMPILER_LINKER_FILE_OUTPUT_STREAM_H_
-#include "output_stream.h"
+#include "base/os.h"
-#include "os.h"
+#include "output_stream.h"
namespace art {
namespace linker {
diff --git a/compiler/linker/linker_patch.h b/compiler/linker/linker_patch.h
index 6f4e7746a6..36051d2726 100644
--- a/compiler/linker/linker_patch.h
+++ b/compiler/linker/linker_patch.h
@@ -41,19 +41,27 @@ class LinkerPatch {
// choose to squeeze the Type into fewer than 8 bits, we'll have to declare
// patch_type_ as an uintN_t and do explicit static_cast<>s.
enum class Type : uint8_t {
+ kDataBimgRelRo, // NOTE: Actual patching is instruction_set-dependent.
kMethodRelative, // NOTE: Actual patching is instruction_set-dependent.
kMethodBssEntry, // NOTE: Actual patching is instruction_set-dependent.
kCall,
kCallRelative, // NOTE: Actual patching is instruction_set-dependent.
kTypeRelative, // NOTE: Actual patching is instruction_set-dependent.
- kTypeClassTable, // NOTE: Actual patching is instruction_set-dependent.
kTypeBssEntry, // NOTE: Actual patching is instruction_set-dependent.
kStringRelative, // NOTE: Actual patching is instruction_set-dependent.
- kStringInternTable, // NOTE: Actual patching is instruction_set-dependent.
kStringBssEntry, // NOTE: Actual patching is instruction_set-dependent.
kBakerReadBarrierBranch, // NOTE: Actual patching is instruction_set-dependent.
};
+ static LinkerPatch DataBimgRelRoPatch(size_t literal_offset,
+ uint32_t pc_insn_offset,
+ uint32_t boot_image_offset) {
+ LinkerPatch patch(literal_offset, Type::kDataBimgRelRo, /* target_dex_file */ nullptr);
+ patch.boot_image_offset_ = boot_image_offset;
+ patch.pc_insn_offset_ = pc_insn_offset;
+ return patch;
+ }
+
static LinkerPatch RelativeMethodPatch(size_t literal_offset,
const DexFile* target_dex_file,
uint32_t pc_insn_offset,
@@ -100,16 +108,6 @@ class LinkerPatch {
return patch;
}
- static LinkerPatch TypeClassTablePatch(size_t literal_offset,
- const DexFile* target_dex_file,
- uint32_t pc_insn_offset,
- uint32_t target_type_idx) {
- LinkerPatch patch(literal_offset, Type::kTypeClassTable, target_dex_file);
- patch.type_idx_ = target_type_idx;
- patch.pc_insn_offset_ = pc_insn_offset;
- return patch;
- }
-
static LinkerPatch TypeBssEntryPatch(size_t literal_offset,
const DexFile* target_dex_file,
uint32_t pc_insn_offset,
@@ -130,16 +128,6 @@ class LinkerPatch {
return patch;
}
- static LinkerPatch StringInternTablePatch(size_t literal_offset,
- const DexFile* target_dex_file,
- uint32_t pc_insn_offset,
- uint32_t target_string_idx) {
- LinkerPatch patch(literal_offset, Type::kStringInternTable, target_dex_file);
- patch.string_idx_ = target_string_idx;
- patch.pc_insn_offset_ = pc_insn_offset;
- return patch;
- }
-
static LinkerPatch StringBssEntryPatch(size_t literal_offset,
const DexFile* target_dex_file,
uint32_t pc_insn_offset,
@@ -172,14 +160,13 @@ class LinkerPatch {
bool IsPcRelative() const {
switch (GetType()) {
+ case Type::kDataBimgRelRo:
case Type::kMethodRelative:
case Type::kMethodBssEntry:
case Type::kCallRelative:
case Type::kTypeRelative:
- case Type::kTypeClassTable:
case Type::kTypeBssEntry:
case Type::kStringRelative:
- case Type::kStringInternTable:
case Type::kStringBssEntry:
case Type::kBakerReadBarrierBranch:
return true;
@@ -188,6 +175,11 @@ class LinkerPatch {
}
}
+ uint32_t BootImageOffset() const {
+ DCHECK(patch_type_ == Type::kDataBimgRelRo);
+ return boot_image_offset_;
+ }
+
MethodReference TargetMethod() const {
DCHECK(patch_type_ == Type::kMethodRelative ||
patch_type_ == Type::kMethodBssEntry ||
@@ -198,40 +190,35 @@ class LinkerPatch {
const DexFile* TargetTypeDexFile() const {
DCHECK(patch_type_ == Type::kTypeRelative ||
- patch_type_ == Type::kTypeClassTable ||
patch_type_ == Type::kTypeBssEntry);
return target_dex_file_;
}
dex::TypeIndex TargetTypeIndex() const {
DCHECK(patch_type_ == Type::kTypeRelative ||
- patch_type_ == Type::kTypeClassTable ||
patch_type_ == Type::kTypeBssEntry);
return dex::TypeIndex(type_idx_);
}
const DexFile* TargetStringDexFile() const {
DCHECK(patch_type_ == Type::kStringRelative ||
- patch_type_ == Type::kStringInternTable ||
patch_type_ == Type::kStringBssEntry);
return target_dex_file_;
}
dex::StringIndex TargetStringIndex() const {
DCHECK(patch_type_ == Type::kStringRelative ||
- patch_type_ == Type::kStringInternTable ||
patch_type_ == Type::kStringBssEntry);
return dex::StringIndex(string_idx_);
}
uint32_t PcInsnOffset() const {
- DCHECK(patch_type_ == Type::kMethodRelative ||
+ DCHECK(patch_type_ == Type::kDataBimgRelRo ||
+ patch_type_ == Type::kMethodRelative ||
patch_type_ == Type::kMethodBssEntry ||
patch_type_ == Type::kTypeRelative ||
- patch_type_ == Type::kTypeClassTable ||
patch_type_ == Type::kTypeBssEntry ||
patch_type_ == Type::kStringRelative ||
- patch_type_ == Type::kStringInternTable ||
patch_type_ == Type::kStringBssEntry);
return pc_insn_offset_;
}
@@ -263,10 +250,11 @@ class LinkerPatch {
uint32_t literal_offset_ : 24; // Method code size up to 16MiB.
Type patch_type_ : 8;
union {
- uint32_t cmp1_; // Used for relational operators.
- uint32_t method_idx_; // Method index for Call/Method patches.
- uint32_t type_idx_; // Type index for Type patches.
- uint32_t string_idx_; // String index for String patches.
+ uint32_t cmp1_; // Used for relational operators.
+ uint32_t boot_image_offset_; // Data to write to the .data.bimg.rel.ro entry.
+ uint32_t method_idx_; // Method index for Call/Method patches.
+ uint32_t type_idx_; // Type index for Type patches.
+ uint32_t string_idx_; // String index for String patches.
uint32_t baker_custom_value1_;
static_assert(sizeof(method_idx_) == sizeof(cmp1_), "needed by relational operators");
static_assert(sizeof(type_idx_) == sizeof(cmp1_), "needed by relational operators");
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index ff59173c8b..0fcc9c6d05 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -51,6 +51,8 @@
#include "dex/verified_method.h"
#include "driver/compiler_driver.h"
#include "graph_visualizer.h"
+#include "image.h"
+#include "gc/space/image_space.h"
#include "intern_table.h"
#include "intrinsics.h"
#include "mirror/array-inl.h"
@@ -722,6 +724,47 @@ void CodeGenerator::GenerateLoadClassRuntimeCall(HLoadClass* cls) {
}
}
+static uint32_t GetBootImageOffsetImpl(const void* object, ImageHeader::ImageSections section) {
+ Runtime* runtime = Runtime::Current();
+ DCHECK(runtime->IsAotCompiler());
+ const std::vector<gc::space::ImageSpace*>& boot_image_spaces =
+ runtime->GetHeap()->GetBootImageSpaces();
+ // Check that the `object` is in the expected section of one of the boot image files.
+ DCHECK(std::any_of(boot_image_spaces.begin(),
+ boot_image_spaces.end(),
+ [object, section](gc::space::ImageSpace* space) {
+ uintptr_t begin = reinterpret_cast<uintptr_t>(space->Begin());
+ uintptr_t offset = reinterpret_cast<uintptr_t>(object) - begin;
+ return space->GetImageHeader().GetImageSection(section).Contains(offset);
+ }));
+ uintptr_t begin = reinterpret_cast<uintptr_t>(boot_image_spaces.front()->Begin());
+ uintptr_t offset = reinterpret_cast<uintptr_t>(object) - begin;
+ return dchecked_integral_cast<uint32_t>(offset);
+}
+
+// NO_THREAD_SAFETY_ANALYSIS: Avoid taking the mutator lock, boot image classes are non-moveable.
+uint32_t CodeGenerator::GetBootImageOffset(HLoadClass* load_class) NO_THREAD_SAFETY_ANALYSIS {
+ DCHECK_EQ(load_class->GetLoadKind(), HLoadClass::LoadKind::kBootImageRelRo);
+ ObjPtr<mirror::Class> klass = load_class->GetClass().Get();
+ DCHECK(klass != nullptr);
+ return GetBootImageOffsetImpl(klass.Ptr(), ImageHeader::kSectionObjects);
+}
+
+// NO_THREAD_SAFETY_ANALYSIS: Avoid taking the mutator lock, boot image strings are non-moveable.
+uint32_t CodeGenerator::GetBootImageOffset(HLoadString* load_string) NO_THREAD_SAFETY_ANALYSIS {
+ DCHECK_EQ(load_string->GetLoadKind(), HLoadString::LoadKind::kBootImageRelRo);
+ ObjPtr<mirror::String> string = load_string->GetString().Get();
+ DCHECK(string != nullptr);
+ return GetBootImageOffsetImpl(string.Ptr(), ImageHeader::kSectionObjects);
+}
+
+uint32_t CodeGenerator::GetBootImageOffset(HInvokeStaticOrDirect* invoke) {
+ DCHECK_EQ(invoke->GetMethodLoadKind(), HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo);
+ ArtMethod* method = invoke->GetResolvedMethod();
+ DCHECK(method != nullptr);
+ return GetBootImageOffsetImpl(method, ImageHeader::kSectionArtMethods);
+}
+
void CodeGenerator::BlockIfInRegister(Location location, bool is_out) const {
// The DCHECKS below check that a register is not specified twice in
// the summary. The out location can overlap with an input, so we need
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 60de722285..7031483a77 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -556,6 +556,10 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
Location runtime_return_location);
void GenerateLoadClassRuntimeCall(HLoadClass* cls);
+ uint32_t GetBootImageOffset(HLoadClass* load_class);
+ uint32_t GetBootImageOffset(HLoadString* load_string);
+ uint32_t GetBootImageOffset(HInvokeStaticOrDirect* invoke);
+
static void CreateSystemArrayCopyLocationSummary(HInvoke* invoke);
void SetDisassemblyInformation(DisassemblyInformation* info) { disasm_info_ = info; }
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 1f9c5546e2..a024df8537 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -78,6 +78,7 @@ using helpers::OutputFPRegister;
using helpers::OutputRegister;
using helpers::QRegisterFrom;
using helpers::RegisterFrom;
+using helpers::SRegisterFrom;
using helpers::StackOperandFrom;
using helpers::VIXLRegCodeFromART;
using helpers::WRegisterFrom;
@@ -4459,12 +4460,23 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(
// Load method address from literal pool.
__ Ldr(XRegisterFrom(temp), DeduplicateUint64Literal(invoke->GetMethodAddress()));
break;
+ case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: {
+ // Add ADRP with its PC-relative .data.bimg.rel.ro patch.
+ uint32_t boot_image_offset = GetBootImageOffset(invoke);
+ vixl::aarch64::Label* adrp_label = NewBootImageRelRoPatch(boot_image_offset);
+ EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp));
+ // Add LDR with its PC-relative .data.bimg.rel.ro patch.
+ vixl::aarch64::Label* ldr_label = NewBootImageRelRoPatch(boot_image_offset, adrp_label);
+ // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load.
+ EmitLdrOffsetPlaceholder(ldr_label, WRegisterFrom(temp), XRegisterFrom(temp));
+ break;
+ }
case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
- // Add ADRP with its PC-relative DexCache access patch.
+ // Add ADRP with its PC-relative .bss entry patch.
MethodReference target_method(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex());
vixl::aarch64::Label* adrp_label = NewMethodBssEntryPatch(target_method);
EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp));
- // Add LDR with its PC-relative DexCache access patch.
+ // Add LDR with its PC-relative .bss entry patch.
vixl::aarch64::Label* ldr_label =
NewMethodBssEntryPatch(target_method, adrp_label);
EmitLdrOffsetPlaceholder(ldr_label, XRegisterFrom(temp), XRegisterFrom(temp));
@@ -4559,6 +4571,13 @@ void InstructionCodeGeneratorARM64::VisitInvokePolymorphic(HInvokePolymorphic* i
codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
}
+vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageRelRoPatch(
+ uint32_t boot_image_offset,
+ vixl::aarch64::Label* adrp_label) {
+ return NewPcRelativePatch(
+ /* dex_file */ nullptr, boot_image_offset, adrp_label, &boot_image_method_patches_);
+}
+
vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageMethodPatch(
MethodReference target_method,
vixl::aarch64::Label* adrp_label) {
@@ -4681,6 +4700,14 @@ inline void CodeGeneratorARM64::EmitPcRelativeLinkerPatches(
}
}
+linker::LinkerPatch DataBimgRelRoPatchAdapter(size_t literal_offset,
+ const DexFile* target_dex_file,
+ uint32_t pc_insn_offset,
+ uint32_t boot_image_offset) {
+ DCHECK(target_dex_file == nullptr); // Unused for DataBimgRelRoPatch(), should be null.
+ return linker::LinkerPatch::DataBimgRelRoPatch(literal_offset, pc_insn_offset, boot_image_offset);
+}
+
void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
DCHECK(linker_patches->empty());
size_t size =
@@ -4700,11 +4727,10 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* lin
EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
boot_image_string_patches_, linker_patches);
} else {
- DCHECK(boot_image_method_patches_.empty());
- EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>(
- boot_image_type_patches_, linker_patches);
- EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>(
- boot_image_string_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<DataBimgRelRoPatchAdapter>(
+ boot_image_method_patches_, linker_patches);
+ DCHECK(boot_image_type_patches_.empty());
+ DCHECK(boot_image_string_patches_.empty());
}
EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
method_bss_entry_patches_, linker_patches);
@@ -4779,7 +4805,7 @@ HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind(
case HLoadClass::LoadKind::kReferrersClass:
break;
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadClass::LoadKind::kBootImageClassTable:
+ case HLoadClass::LoadKind::kBootImageRelRo:
case HLoadClass::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
@@ -4888,23 +4914,16 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA
__ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
break;
}
- case HLoadClass::LoadKind::kBootImageClassTable: {
+ case HLoadClass::LoadKind::kBootImageRelRo: {
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
- // Add ADRP with its PC-relative type patch.
- const DexFile& dex_file = cls->GetDexFile();
- dex::TypeIndex type_index = cls->GetTypeIndex();
- vixl::aarch64::Label* adrp_label = codegen_->NewBootImageTypePatch(dex_file, type_index);
+ uint32_t boot_image_offset = codegen_->GetBootImageOffset(cls);
+ // Add ADRP with its PC-relative .data.bimg.rel.ro patch.
+ vixl::aarch64::Label* adrp_label = codegen_->NewBootImageRelRoPatch(boot_image_offset);
codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
- // Add LDR with its PC-relative type patch.
+ // Add LDR with its PC-relative .data.bimg.rel.ro patch.
vixl::aarch64::Label* ldr_label =
- codegen_->NewBootImageTypePatch(dex_file, type_index, adrp_label);
+ codegen_->NewBootImageRelRoPatch(boot_image_offset, adrp_label);
codegen_->EmitLdrOffsetPlaceholder(ldr_label, out.W(), out.X());
- // Extract the reference from the slot data, i.e. clear the hash bits.
- int32_t masked_hash = ClassTable::TableSlot::MaskHash(
- ComputeModifiedUtf8Hash(dex_file.StringByTypeIdx(type_index)));
- if (masked_hash != 0) {
- __ Sub(out.W(), out.W(), Operand(masked_hash));
- }
break;
}
case HLoadClass::LoadKind::kBssEntry: {
@@ -4914,7 +4933,7 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA
vixl::aarch64::Register temp = XRegisterFrom(out_loc);
vixl::aarch64::Label* adrp_label = codegen_->NewBssEntryTypePatch(dex_file, type_index);
codegen_->EmitAdrpPlaceholder(adrp_label, temp);
- // Add LDR with its PC-relative Class patch.
+ // Add LDR with its PC-relative Class .bss entry patch.
vixl::aarch64::Label* ldr_label =
codegen_->NewBssEntryTypePatch(dex_file, type_index, adrp_label);
// /* GcRoot<mirror::Class> */ out = *(base_address + offset) /* PC-relative */
@@ -4989,7 +5008,7 @@ HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind(
HLoadString::LoadKind desired_string_load_kind) {
switch (desired_string_load_kind) {
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadString::LoadKind::kBootImageInternTable:
+ case HLoadString::LoadKind::kBootImageRelRo:
case HLoadString::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
@@ -5055,16 +5074,15 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD
__ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
return;
}
- case HLoadString::LoadKind::kBootImageInternTable: {
+ case HLoadString::LoadKind::kBootImageRelRo: {
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
- // Add ADRP with its PC-relative String patch.
- const DexFile& dex_file = load->GetDexFile();
- const dex::StringIndex string_index = load->GetStringIndex();
- vixl::aarch64::Label* adrp_label = codegen_->NewBootImageStringPatch(dex_file, string_index);
+ // Add ADRP with its PC-relative .data.bimg.rel.ro patch.
+ uint32_t boot_image_offset = codegen_->GetBootImageOffset(load);
+ vixl::aarch64::Label* adrp_label = codegen_->NewBootImageRelRoPatch(boot_image_offset);
codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
- // Add LDR with its PC-relative String patch.
+ // Add LDR with its PC-relative .data.bimg.rel.ro patch.
vixl::aarch64::Label* ldr_label =
- codegen_->NewBootImageStringPatch(dex_file, string_index, adrp_label);
+ codegen_->NewBootImageRelRoPatch(boot_image_offset, adrp_label);
codegen_->EmitLdrOffsetPlaceholder(ldr_label, out.W(), out.X());
return;
}
@@ -5076,7 +5094,7 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD
Register temp = XRegisterFrom(out_loc);
vixl::aarch64::Label* adrp_label = codegen_->NewStringBssEntryPatch(dex_file, string_index);
codegen_->EmitAdrpPlaceholder(adrp_label, temp);
- // Add LDR with its .bss entry String patch.
+ // Add LDR with its PC-relative String .bss entry patch.
vixl::aarch64::Label* ldr_label =
codegen_->NewStringBssEntryPatch(dex_file, string_index, adrp_label);
// /* GcRoot<mirror::String> */ out = *(base_address + offset) /* PC-relative */
@@ -5462,6 +5480,113 @@ void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) {
}
}
+// TODO: integrate with HandleBinaryOp?
+static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
+ LocationSummary* locations = new (allocator) LocationSummary(minmax);
+ switch (minmax->GetResultType()) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
+ }
+}
+
+void InstructionCodeGeneratorARM64::GenerateMinMaxInt(LocationSummary* locations,
+ bool is_min,
+ DataType::Type type) {
+ Location op1 = locations->InAt(0);
+ Location op2 = locations->InAt(1);
+ Location out = locations->Out();
+
+ Register op1_reg;
+ Register op2_reg;
+ Register out_reg;
+ if (type == DataType::Type::kInt64) {
+ op1_reg = XRegisterFrom(op1);
+ op2_reg = XRegisterFrom(op2);
+ out_reg = XRegisterFrom(out);
+ } else {
+ DCHECK_EQ(type, DataType::Type::kInt32);
+ op1_reg = WRegisterFrom(op1);
+ op2_reg = WRegisterFrom(op2);
+ out_reg = WRegisterFrom(out);
+ }
+
+ __ Cmp(op1_reg, op2_reg);
+ __ Csel(out_reg, op1_reg, op2_reg, is_min ? lt : gt);
+}
+
+void InstructionCodeGeneratorARM64::GenerateMinMaxFP(LocationSummary* locations,
+ bool is_min,
+ DataType::Type type) {
+ Location op1 = locations->InAt(0);
+ Location op2 = locations->InAt(1);
+ Location out = locations->Out();
+
+ FPRegister op1_reg;
+ FPRegister op2_reg;
+ FPRegister out_reg;
+ if (type == DataType::Type::kFloat64) {
+ op1_reg = DRegisterFrom(op1);
+ op2_reg = DRegisterFrom(op2);
+ out_reg = DRegisterFrom(out);
+ } else {
+ DCHECK_EQ(type, DataType::Type::kFloat32);
+ op1_reg = SRegisterFrom(op1);
+ op2_reg = SRegisterFrom(op2);
+ out_reg = SRegisterFrom(out);
+ }
+
+ if (is_min) {
+ __ Fmin(out_reg, op1_reg, op2_reg);
+ } else {
+ __ Fmax(out_reg, op1_reg, op2_reg);
+ }
+}
+
+// TODO: integrate with HandleBinaryOp?
+void InstructionCodeGeneratorARM64::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
+ DataType::Type type = minmax->GetResultType();
+ switch (type) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ GenerateMinMaxInt(minmax->GetLocations(), is_min, type);
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ GenerateMinMaxFP(minmax->GetLocations(), is_min, type);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for HMinMax " << type;
+ }
+}
+
+void LocationsBuilderARM64::VisitMin(HMin* min) {
+ CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
+}
+
+void InstructionCodeGeneratorARM64::VisitMin(HMin* min) {
+ GenerateMinMax(min, /*is_min*/ true);
+}
+
+void LocationsBuilderARM64::VisitMax(HMax* max) {
+ CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
+}
+
+void InstructionCodeGeneratorARM64::VisitMax(HMax* max) {
+ GenerateMinMax(max, /*is_min*/ false);
+}
+
void LocationsBuilderARM64::VisitAbs(HAbs* abs) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
switch (abs->GetResultType()) {
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index e34f799d15..b59ccd9034 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -273,6 +273,10 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator {
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
void HandleCondition(HCondition* instruction);
+ void GenerateMinMaxInt(LocationSummary* locations, bool is_min, DataType::Type type);
+ void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type);
+ void GenerateMinMax(HBinaryOperation* minmax, bool is_min);
+
// Generate a heap reference load using one register `out`:
//
// out <- *(out + offset)
@@ -561,7 +565,14 @@ class CodeGeneratorARM64 : public CodeGenerator {
UNIMPLEMENTED(FATAL);
}
- // Add a new PC-relative method patch for an instruction and return the label
+ // Add a new boot image relocation patch for an instruction and return the label
+ // to be bound before the instruction. The instruction will be either the
+ // ADRP (pass `adrp_label = null`) or the LDR (pass `adrp_label` pointing
+ // to the associated ADRP patch label).
+ vixl::aarch64::Label* NewBootImageRelRoPatch(uint32_t boot_image_offset,
+ vixl::aarch64::Label* adrp_label = nullptr);
+
+ // Add a new boot image method patch for an instruction and return the label
// to be bound before the instruction. The instruction will be either the
// ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing
// to the associated ADRP patch label).
@@ -575,7 +586,7 @@ class CodeGeneratorARM64 : public CodeGenerator {
vixl::aarch64::Label* NewMethodBssEntryPatch(MethodReference target_method,
vixl::aarch64::Label* adrp_label = nullptr);
- // Add a new PC-relative type patch for an instruction and return the label
+ // Add a new boot image type patch for an instruction and return the label
// to be bound before the instruction. The instruction will be either the
// ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing
// to the associated ADRP patch label).
@@ -591,7 +602,7 @@ class CodeGeneratorARM64 : public CodeGenerator {
dex::TypeIndex type_index,
vixl::aarch64::Label* adrp_label = nullptr);
- // Add a new PC-relative string patch for an instruction and return the label
+ // Add a new boot image string patch for an instruction and return the label
// to be bound before the instruction. The instruction will be either the
// ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing
// to the associated ADRP patch label).
@@ -820,7 +831,8 @@ class CodeGeneratorARM64 : public CodeGenerator {
Uint32ToLiteralMap uint32_literals_;
// Deduplication map for 64-bit literals, used for non-patchable method address or method code.
Uint64ToLiteralMap uint64_literals_;
- // PC-relative method patch info for kBootImageLinkTimePcRelative.
+ // PC-relative method patch info for kBootImageLinkTimePcRelative/BootImageRelRo.
+ // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods).
ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_;
// PC-relative method patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_;
@@ -828,7 +840,7 @@ class CodeGeneratorARM64 : public CodeGenerator {
ArenaDeque<PcRelativePatchInfo> boot_image_type_patches_;
// PC-relative type patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
- // PC-relative String patch info; type depends on configuration (intern table or boot image PIC).
+ // PC-relative String patch info for kBootImageLinkTimePcRelative.
ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_;
// PC-relative String patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_;
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 13518adf7d..6ebcc67b49 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -4690,6 +4690,244 @@ void InstructionCodeGeneratorARMVIXL::VisitRem(HRem* rem) {
}
}
+static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
+ LocationSummary* locations = new (allocator) LocationSummary(minmax);
+ switch (minmax->GetResultType()) {
+ case DataType::Type::kInt32:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ case DataType::Type::kFloat32:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ locations->AddTemp(Location::RequiresRegister());
+ break;
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
+ }
+}
+
+void InstructionCodeGeneratorARMVIXL::GenerateMinMaxInt(LocationSummary* locations, bool is_min) {
+ Location op1_loc = locations->InAt(0);
+ Location op2_loc = locations->InAt(1);
+ Location out_loc = locations->Out();
+
+ vixl32::Register op1 = RegisterFrom(op1_loc);
+ vixl32::Register op2 = RegisterFrom(op2_loc);
+ vixl32::Register out = RegisterFrom(out_loc);
+
+ __ Cmp(op1, op2);
+
+ {
+ ExactAssemblyScope aas(GetVIXLAssembler(),
+ 3 * kMaxInstructionSizeInBytes,
+ CodeBufferCheckScope::kMaximumSize);
+
+ __ ite(is_min ? lt : gt);
+ __ mov(is_min ? lt : gt, out, op1);
+ __ mov(is_min ? ge : le, out, op2);
+ }
+}
+
+void InstructionCodeGeneratorARMVIXL::GenerateMinMaxLong(LocationSummary* locations, bool is_min) {
+ Location op1_loc = locations->InAt(0);
+ Location op2_loc = locations->InAt(1);
+ Location out_loc = locations->Out();
+
+ // Optimization: don't generate any code if inputs are the same.
+ if (op1_loc.Equals(op2_loc)) {
+ DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder.
+ return;
+ }
+
+ vixl32::Register op1_lo = LowRegisterFrom(op1_loc);
+ vixl32::Register op1_hi = HighRegisterFrom(op1_loc);
+ vixl32::Register op2_lo = LowRegisterFrom(op2_loc);
+ vixl32::Register op2_hi = HighRegisterFrom(op2_loc);
+ vixl32::Register out_lo = LowRegisterFrom(out_loc);
+ vixl32::Register out_hi = HighRegisterFrom(out_loc);
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ const vixl32::Register temp = temps.Acquire();
+
+ DCHECK(op1_lo.Is(out_lo));
+ DCHECK(op1_hi.Is(out_hi));
+
+ // Compare op1 >= op2, or op1 < op2.
+ __ Cmp(out_lo, op2_lo);
+ __ Sbcs(temp, out_hi, op2_hi);
+
+ // Now GE/LT condition code is correct for the long comparison.
+ {
+ vixl32::ConditionType cond = is_min ? ge : lt;
+ ExactAssemblyScope it_scope(GetVIXLAssembler(),
+ 3 * kMaxInstructionSizeInBytes,
+ CodeBufferCheckScope::kMaximumSize);
+ __ itt(cond);
+ __ mov(cond, out_lo, op2_lo);
+ __ mov(cond, out_hi, op2_hi);
+ }
+}
+
+void InstructionCodeGeneratorARMVIXL::GenerateMinMaxFloat(HInstruction* minmax, bool is_min) {
+ LocationSummary* locations = minmax->GetLocations();
+ Location op1_loc = locations->InAt(0);
+ Location op2_loc = locations->InAt(1);
+ Location out_loc = locations->Out();
+
+ // Optimization: don't generate any code if inputs are the same.
+ if (op1_loc.Equals(op2_loc)) {
+ DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder.
+ return;
+ }
+
+ vixl32::SRegister op1 = SRegisterFrom(op1_loc);
+ vixl32::SRegister op2 = SRegisterFrom(op2_loc);
+ vixl32::SRegister out = SRegisterFrom(out_loc);
+
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ const vixl32::Register temp1 = temps.Acquire();
+ vixl32::Register temp2 = RegisterFrom(locations->GetTemp(0));
+ vixl32::Label nan, done;
+ vixl32::Label* final_label = codegen_->GetFinalLabel(minmax, &done);
+
+ DCHECK(op1.Is(out));
+
+ __ Vcmp(op1, op2);
+ __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
+ __ B(vs, &nan, /* far_target */ false); // if un-ordered, go to NaN handling.
+
+ // op1 <> op2
+ vixl32::ConditionType cond = is_min ? gt : lt;
+ {
+ ExactAssemblyScope it_scope(GetVIXLAssembler(),
+ 2 * kMaxInstructionSizeInBytes,
+ CodeBufferCheckScope::kMaximumSize);
+ __ it(cond);
+ __ vmov(cond, F32, out, op2);
+ }
+ // for <>(not equal), we've done min/max calculation.
+ __ B(ne, final_label, /* far_target */ false);
+
+ // handle op1 == op2, max(+0.0,-0.0), min(+0.0,-0.0).
+ __ Vmov(temp1, op1);
+ __ Vmov(temp2, op2);
+ if (is_min) {
+ __ Orr(temp1, temp1, temp2);
+ } else {
+ __ And(temp1, temp1, temp2);
+ }
+ __ Vmov(out, temp1);
+ __ B(final_label);
+
+ // handle NaN input.
+ __ Bind(&nan);
+ __ Movt(temp1, High16Bits(kNanFloat)); // 0x7FC0xxxx is a NaN.
+ __ Vmov(out, temp1);
+
+ if (done.IsReferenced()) {
+ __ Bind(&done);
+ }
+}
+
+void InstructionCodeGeneratorARMVIXL::GenerateMinMaxDouble(HInstruction* minmax, bool is_min) {
+ LocationSummary* locations = minmax->GetLocations();
+ Location op1_loc = locations->InAt(0);
+ Location op2_loc = locations->InAt(1);
+ Location out_loc = locations->Out();
+
+ // Optimization: don't generate any code if inputs are the same.
+ if (op1_loc.Equals(op2_loc)) {
+ DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in.
+ return;
+ }
+
+ vixl32::DRegister op1 = DRegisterFrom(op1_loc);
+ vixl32::DRegister op2 = DRegisterFrom(op2_loc);
+ vixl32::DRegister out = DRegisterFrom(out_loc);
+ vixl32::Label handle_nan_eq, done;
+ vixl32::Label* final_label = codegen_->GetFinalLabel(minmax, &done);
+
+ DCHECK(op1.Is(out));
+
+ __ Vcmp(op1, op2);
+ __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
+ __ B(vs, &handle_nan_eq, /* far_target */ false); // if un-ordered, go to NaN handling.
+
+ // op1 <> op2
+ vixl32::ConditionType cond = is_min ? gt : lt;
+ {
+ ExactAssemblyScope it_scope(GetVIXLAssembler(),
+ 2 * kMaxInstructionSizeInBytes,
+ CodeBufferCheckScope::kMaximumSize);
+ __ it(cond);
+ __ vmov(cond, F64, out, op2);
+ }
+ // for <>(not equal), we've done min/max calculation.
+ __ B(ne, final_label, /* far_target */ false);
+
+ // handle op1 == op2, max(+0.0,-0.0).
+ if (!is_min) {
+ __ Vand(F64, out, op1, op2);
+ __ B(final_label);
+ }
+
+ // handle op1 == op2, min(+0.0,-0.0), NaN input.
+ __ Bind(&handle_nan_eq);
+ __ Vorr(F64, out, op1, op2); // assemble op1/-0.0/NaN.
+
+ if (done.IsReferenced()) {
+ __ Bind(&done);
+ }
+}
+
+void InstructionCodeGeneratorARMVIXL::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
+ DataType::Type type = minmax->GetResultType();
+ switch (type) {
+ case DataType::Type::kInt32:
+ GenerateMinMaxInt(minmax->GetLocations(), is_min);
+ break;
+ case DataType::Type::kInt64:
+ GenerateMinMaxLong(minmax->GetLocations(), is_min);
+ break;
+ case DataType::Type::kFloat32:
+ GenerateMinMaxFloat(minmax, is_min);
+ break;
+ case DataType::Type::kFloat64:
+ GenerateMinMaxDouble(minmax, is_min);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for HMinMax " << type;
+ }
+}
+
+void LocationsBuilderARMVIXL::VisitMin(HMin* min) {
+ CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitMin(HMin* min) {
+ GenerateMinMax(min, /*is_min*/ true);
+}
+
+void LocationsBuilderARMVIXL::VisitMax(HMax* max) {
+ CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitMax(HMax* max) {
+ GenerateMinMax(max, /*is_min*/ false);
+}
+
void LocationsBuilderARMVIXL::VisitAbs(HAbs* abs) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
switch (abs->GetResultType()) {
@@ -7088,7 +7326,7 @@ HLoadClass::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadClassKind(
case HLoadClass::LoadKind::kReferrersClass:
break;
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadClass::LoadKind::kBootImageClassTable:
+ case HLoadClass::LoadKind::kBootImageRelRo:
case HLoadClass::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
@@ -7198,18 +7436,12 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_
__ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address));
break;
}
- case HLoadClass::LoadKind::kBootImageClassTable: {
+ case HLoadClass::LoadKind::kBootImageRelRo: {
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
- codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
+ codegen_->NewBootImageRelRoPatch(codegen_->GetBootImageOffset(cls));
codegen_->EmitMovwMovtPlaceholder(labels, out);
__ Ldr(out, MemOperand(out, /* offset */ 0));
- // Extract the reference from the slot data, i.e. clear the hash bits.
- int32_t masked_hash = ClassTable::TableSlot::MaskHash(
- ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex())));
- if (masked_hash != 0) {
- __ Sub(out, out, Operand(masked_hash));
- }
break;
}
case HLoadClass::LoadKind::kBssEntry: {
@@ -7295,7 +7527,7 @@ HLoadString::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadStringKind(
HLoadString::LoadKind desired_string_load_kind) {
switch (desired_string_load_kind) {
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadString::LoadKind::kBootImageInternTable:
+ case HLoadString::LoadKind::kBootImageRelRo:
case HLoadString::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
@@ -7359,10 +7591,10 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE
__ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address));
return;
}
- case HLoadString::LoadKind::kBootImageInternTable: {
+ case HLoadString::LoadKind::kBootImageRelRo: {
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
- codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex());
+ codegen_->NewBootImageRelRoPatch(codegen_->GetBootImageOffset(load));
codegen_->EmitMovwMovtPlaceholder(labels, out);
__ Ldr(out, MemOperand(out, /* offset */ 0));
return;
@@ -8956,6 +9188,14 @@ void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall(
case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
__ Mov(RegisterFrom(temp), Operand::From(invoke->GetMethodAddress()));
break;
+ case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: {
+ uint32_t boot_image_offset = GetBootImageOffset(invoke);
+ PcRelativePatchInfo* labels = NewBootImageRelRoPatch(boot_image_offset);
+ vixl32::Register temp_reg = RegisterFrom(temp);
+ EmitMovwMovtPlaceholder(labels, temp_reg);
+ GetAssembler()->LoadFromOffset(kLoadWord, temp_reg, temp_reg, /* offset*/ 0);
+ break;
+ }
case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
PcRelativePatchInfo* labels = NewMethodBssEntryPatch(
MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()));
@@ -9053,6 +9293,13 @@ void CodeGeneratorARMVIXL::GenerateVirtualCall(
}
}
+CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageRelRoPatch(
+ uint32_t boot_image_offset) {
+ return NewPcRelativePatch(/* dex_file */ nullptr,
+ boot_image_offset,
+ &boot_image_method_patches_);
+}
+
CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageMethodPatch(
MethodReference target_method) {
return NewPcRelativePatch(
@@ -9143,6 +9390,14 @@ inline void CodeGeneratorARMVIXL::EmitPcRelativeLinkerPatches(
}
}
+linker::LinkerPatch DataBimgRelRoPatchAdapter(size_t literal_offset,
+ const DexFile* target_dex_file,
+ uint32_t pc_insn_offset,
+ uint32_t boot_image_offset) {
+ DCHECK(target_dex_file == nullptr); // Unused for DataBimgRelRoPatch(), should be null.
+ return linker::LinkerPatch::DataBimgRelRoPatch(literal_offset, pc_insn_offset, boot_image_offset);
+}
+
void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
DCHECK(linker_patches->empty());
size_t size =
@@ -9162,11 +9417,10 @@ void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* l
EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
boot_image_string_patches_, linker_patches);
} else {
- DCHECK(boot_image_method_patches_.empty());
- EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>(
- boot_image_type_patches_, linker_patches);
- EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>(
- boot_image_string_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<DataBimgRelRoPatchAdapter>(
+ boot_image_method_patches_, linker_patches);
+ DCHECK(boot_image_type_patches_.empty());
+ DCHECK(boot_image_string_patches_.empty());
}
EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
method_bss_entry_patches_, linker_patches);
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index bbc715c59d..2d8f6a6c78 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -349,6 +349,12 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator {
bool value_can_be_null);
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+ void GenerateMinMaxInt(LocationSummary* locations, bool is_min);
+ void GenerateMinMaxLong(LocationSummary* locations, bool is_min);
+ void GenerateMinMaxFloat(HInstruction* minmax, bool is_min);
+ void GenerateMinMaxDouble(HInstruction* minmax, bool is_min);
+ void GenerateMinMax(HBinaryOperation* minmax, bool is_min);
+
// Generate a heap reference load using one register `out`:
//
// out <- *(out + offset)
@@ -574,6 +580,7 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
vixl::aarch32::Label add_pc_label;
};
+ PcRelativePatchInfo* NewBootImageRelRoPatch(uint32_t boot_image_offset);
PcRelativePatchInfo* NewBootImageMethodPatch(MethodReference target_method);
PcRelativePatchInfo* NewMethodBssEntryPatch(MethodReference target_method);
PcRelativePatchInfo* NewBootImageTypePatch(const DexFile& dex_file, dex::TypeIndex type_index);
@@ -798,7 +805,8 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
// Deduplication map for 32-bit literals, used for non-patchable boot image addresses.
Uint32ToLiteralMap uint32_literals_;
- // PC-relative method patch info for kBootImageLinkTimePcRelative.
+ // PC-relative method patch info for kBootImageLinkTimePcRelative/kBootImageRelRo.
+ // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods).
ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_;
// PC-relative method patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_;
@@ -806,7 +814,7 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
ArenaDeque<PcRelativePatchInfo> boot_image_type_patches_;
// PC-relative type patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
- // PC-relative String patch info; type depends on configuration (intern table or boot image PIC).
+ // PC-relative String patch info for kBootImageLinkTimePcRelative.
ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_;
// PC-relative String patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_;
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index eb5f72e953..be9ff48a6b 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -1597,6 +1597,14 @@ inline void CodeGeneratorMIPS::EmitPcRelativeLinkerPatches(
}
}
+linker::LinkerPatch DataBimgRelRoPatchAdapter(size_t literal_offset,
+ const DexFile* target_dex_file,
+ uint32_t pc_insn_offset,
+ uint32_t boot_image_offset) {
+ DCHECK(target_dex_file == nullptr); // Unused for DataBimgRelRoPatch(), should be null.
+ return linker::LinkerPatch::DataBimgRelRoPatch(literal_offset, pc_insn_offset, boot_image_offset);
+}
+
void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
DCHECK(linker_patches->empty());
size_t size =
@@ -1615,11 +1623,10 @@ void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* link
EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
boot_image_string_patches_, linker_patches);
} else {
- DCHECK(boot_image_method_patches_.empty());
- EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>(
- boot_image_type_patches_, linker_patches);
- EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>(
- boot_image_string_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<DataBimgRelRoPatchAdapter>(
+ boot_image_method_patches_, linker_patches);
+ DCHECK(boot_image_type_patches_.empty());
+ DCHECK(boot_image_string_patches_.empty());
}
EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
method_bss_entry_patches_, linker_patches);
@@ -1630,6 +1637,13 @@ void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* link
DCHECK_EQ(size, linker_patches->size());
}
+CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewBootImageRelRoPatch(
+ uint32_t boot_image_offset,
+ const PcRelativePatchInfo* info_high) {
+ return NewPcRelativePatch(
+ /* dex_file */ nullptr, boot_image_offset, info_high, &boot_image_method_patches_);
+}
+
CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewBootImageMethodPatch(
MethodReference target_method,
const PcRelativePatchInfo* info_high) {
@@ -7725,7 +7739,7 @@ HLoadString::LoadKind CodeGeneratorMIPS::GetSupportedLoadStringKind(
HLoadString::LoadKind desired_string_load_kind) {
switch (desired_string_load_kind) {
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadString::LoadKind::kBootImageInternTable:
+ case HLoadString::LoadKind::kBootImageRelRo:
case HLoadString::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
@@ -7748,7 +7762,7 @@ HLoadClass::LoadKind CodeGeneratorMIPS::GetSupportedLoadClassKind(
case HLoadClass::LoadKind::kReferrersClass:
break;
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadClass::LoadKind::kBootImageClassTable:
+ case HLoadClass::LoadKind::kBootImageRelRo:
case HLoadClass::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
@@ -7835,6 +7849,15 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(
case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
__ LoadConst32(temp.AsRegister<Register>(), invoke->GetMethodAddress());
break;
+ case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: {
+ uint32_t boot_image_offset = GetBootImageOffset(invoke);
+ PcRelativePatchInfo* info_high = NewBootImageRelRoPatch(boot_image_offset);
+ PcRelativePatchInfo* info_low = NewBootImageRelRoPatch(boot_image_offset, info_high);
+ Register temp_reg = temp.AsRegister<Register>();
+ EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base_reg);
+ __ Lw(temp_reg, TMP, /* placeholder */ 0x5678, &info_low->label);
+ break;
+ }
case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
PcRelativePatchInfo* info_high = NewMethodBssEntryPatch(
MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()));
@@ -7956,7 +7979,7 @@ void LocationsBuilderMIPS::VisitLoadClass(HLoadClass* cls) {
// We need an extra register for PC-relative literals on R2.
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
case HLoadClass::LoadKind::kBootImageAddress:
- case HLoadClass::LoadKind::kBootImageClassTable:
+ case HLoadClass::LoadKind::kBootImageRelRo:
case HLoadClass::LoadKind::kBssEntry:
if (isR6) {
break;
@@ -8008,7 +8031,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF
// We need an extra register for PC-relative literals on R2.
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
case HLoadClass::LoadKind::kBootImageAddress:
- case HLoadClass::LoadKind::kBootImageClassTable:
+ case HLoadClass::LoadKind::kBootImageRelRo:
case HLoadClass::LoadKind::kBssEntry:
base_or_current_method_reg =
(isR6 || has_irreducible_loops) ? ZERO : locations->InAt(0).AsRegister<Register>();
@@ -8065,22 +8088,17 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF
}
break;
}
- case HLoadClass::LoadKind::kBootImageClassTable: {
+ case HLoadClass::LoadKind::kBootImageRelRo: {
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
+ uint32_t boot_image_offset = codegen_->GetBootImageOffset(cls);
CodeGeneratorMIPS::PcRelativePatchInfo* info_high =
- codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
+ codegen_->NewBootImageRelRoPatch(boot_image_offset);
CodeGeneratorMIPS::PcRelativePatchInfo* info_low =
- codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex(), info_high);
+ codegen_->NewBootImageRelRoPatch(boot_image_offset, info_high);
codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high,
out,
base_or_current_method_reg);
__ Lw(out, out, /* placeholder */ 0x5678, &info_low->label);
- // Extract the reference from the slot data, i.e. clear the hash bits.
- int32_t masked_hash = ClassTable::TableSlot::MaskHash(
- ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex())));
- if (masked_hash != 0) {
- __ Addiu(out, out, -masked_hash);
- }
break;
}
case HLoadClass::LoadKind::kBssEntry: {
@@ -8171,7 +8189,7 @@ void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) {
// We need an extra register for PC-relative literals on R2.
case HLoadString::LoadKind::kBootImageAddress:
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadString::LoadKind::kBootImageInternTable:
+ case HLoadString::LoadKind::kBootImageRelRo:
case HLoadString::LoadKind::kBssEntry:
if (isR6) {
break;
@@ -8223,7 +8241,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_
// We need an extra register for PC-relative literals on R2.
case HLoadString::LoadKind::kBootImageAddress:
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadString::LoadKind::kBootImageInternTable:
+ case HLoadString::LoadKind::kBootImageRelRo:
case HLoadString::LoadKind::kBssEntry:
base_or_current_method_reg =
(isR6 || has_irreducible_loops) ? ZERO : locations->InAt(0).AsRegister<Register>();
@@ -8259,12 +8277,13 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_
}
return;
}
- case HLoadString::LoadKind::kBootImageInternTable: {
+ case HLoadString::LoadKind::kBootImageRelRo: {
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
+ uint32_t boot_image_offset = codegen_->GetBootImageOffset(load);
CodeGeneratorMIPS::PcRelativePatchInfo* info_high =
- codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex());
+ codegen_->NewBootImageRelRoPatch(boot_image_offset);
CodeGeneratorMIPS::PcRelativePatchInfo* info_low =
- codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high);
+ codegen_->NewBootImageRelRoPatch(boot_image_offset, info_high);
codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high,
out,
base_or_current_method_reg);
@@ -8779,6 +8798,390 @@ void InstructionCodeGeneratorMIPS::VisitRem(HRem* instruction) {
}
}
+static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
+ LocationSummary* locations = new (allocator) LocationSummary(minmax);
+ switch (minmax->GetResultType()) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kOutputOverlap);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
+ }
+}
+
+void InstructionCodeGeneratorMIPS::GenerateMinMaxInt(LocationSummary* locations,
+ bool is_min,
+ bool isR6,
+ DataType::Type type) {
+ if (isR6) {
+ // Some architectures, such as ARM and MIPS (prior to r6), have a
+ // conditional move instruction which only changes the target
+ // (output) register if the condition is true (MIPS prior to r6 had
+ // MOVF, MOVT, MOVN, and MOVZ). The SELEQZ and SELNEZ instructions
+ // always change the target (output) register. If the condition is
+ // true the output register gets the contents of the "rs" register;
+ // otherwise, the output register is set to zero. One consequence
+ // of this is that to implement something like "rd = c==0 ? rs : rt"
+ // MIPS64r6 needs to use a pair of SELEQZ/SELNEZ instructions.
+ // After executing this pair of instructions one of the output
+ // registers from the pair will necessarily contain zero. Then the
+ // code ORs the output registers from the SELEQZ/SELNEZ instructions
+ // to get the final result.
+ //
+ // The initial test to see if the output register is same as the
+ // first input register is needed to make sure that value in the
+ // first input register isn't clobbered before we've finished
+ // computing the output value. The logic in the corresponding else
+ // clause performs the same task but makes sure the second input
+ // register isn't clobbered in the event that it's the same register
+ // as the output register; the else clause also handles the case
+ // where the output register is distinct from both the first, and the
+ // second input registers.
+ if (type == DataType::Type::kInt64) {
+ Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+ Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+ Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>();
+ Register b_hi = locations->InAt(1).AsRegisterPairHigh<Register>();
+ Register out_lo = locations->Out().AsRegisterPairLow<Register>();
+ Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
+
+ MipsLabel compare_done;
+
+ if (a_lo == b_lo) {
+ if (out_lo != a_lo) {
+ __ Move(out_lo, a_lo);
+ __ Move(out_hi, a_hi);
+ }
+ } else {
+ __ Slt(TMP, b_hi, a_hi);
+ __ Bne(b_hi, a_hi, &compare_done);
+
+ __ Sltu(TMP, b_lo, a_lo);
+
+ __ Bind(&compare_done);
+
+ if (is_min) {
+ __ Seleqz(AT, a_lo, TMP);
+ __ Selnez(out_lo, b_lo, TMP); // Safe even if out_lo == a_lo/b_lo
+ // because at this point we're
+ // done using a_lo/b_lo.
+ } else {
+ __ Selnez(AT, a_lo, TMP);
+ __ Seleqz(out_lo, b_lo, TMP); // ditto
+ }
+ __ Or(out_lo, out_lo, AT);
+ if (is_min) {
+ __ Seleqz(AT, a_hi, TMP);
+ __ Selnez(out_hi, b_hi, TMP); // ditto but for out_hi & a_hi/b_hi
+ } else {
+ __ Selnez(AT, a_hi, TMP);
+ __ Seleqz(out_hi, b_hi, TMP); // ditto but for out_hi & a_hi/b_hi
+ }
+ __ Or(out_hi, out_hi, AT);
+ }
+ } else {
+ DCHECK_EQ(type, DataType::Type::kInt32);
+ Register a = locations->InAt(0).AsRegister<Register>();
+ Register b = locations->InAt(1).AsRegister<Register>();
+ Register out = locations->Out().AsRegister<Register>();
+
+ if (a == b) {
+ if (out != a) {
+ __ Move(out, a);
+ }
+ } else {
+ __ Slt(AT, b, a);
+ if (is_min) {
+ __ Seleqz(TMP, a, AT);
+ __ Selnez(AT, b, AT);
+ } else {
+ __ Selnez(TMP, a, AT);
+ __ Seleqz(AT, b, AT);
+ }
+ __ Or(out, TMP, AT);
+ }
+ }
+ } else { // !isR6
+ if (type == DataType::Type::kInt64) {
+ Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+ Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+ Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>();
+ Register b_hi = locations->InAt(1).AsRegisterPairHigh<Register>();
+ Register out_lo = locations->Out().AsRegisterPairLow<Register>();
+ Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
+
+ MipsLabel compare_done;
+
+ if (a_lo == b_lo) {
+ if (out_lo != a_lo) {
+ __ Move(out_lo, a_lo);
+ __ Move(out_hi, a_hi);
+ }
+ } else {
+ __ Slt(TMP, a_hi, b_hi);
+ __ Bne(a_hi, b_hi, &compare_done);
+
+ __ Sltu(TMP, a_lo, b_lo);
+
+ __ Bind(&compare_done);
+
+ if (is_min) {
+ if (out_lo != a_lo) {
+ __ Movn(out_hi, a_hi, TMP);
+ __ Movn(out_lo, a_lo, TMP);
+ }
+ if (out_lo != b_lo) {
+ __ Movz(out_hi, b_hi, TMP);
+ __ Movz(out_lo, b_lo, TMP);
+ }
+ } else {
+ if (out_lo != a_lo) {
+ __ Movz(out_hi, a_hi, TMP);
+ __ Movz(out_lo, a_lo, TMP);
+ }
+ if (out_lo != b_lo) {
+ __ Movn(out_hi, b_hi, TMP);
+ __ Movn(out_lo, b_lo, TMP);
+ }
+ }
+ }
+ } else {
+ DCHECK_EQ(type, DataType::Type::kInt32);
+ Register a = locations->InAt(0).AsRegister<Register>();
+ Register b = locations->InAt(1).AsRegister<Register>();
+ Register out = locations->Out().AsRegister<Register>();
+
+ if (a == b) {
+ if (out != a) {
+ __ Move(out, a);
+ }
+ } else {
+ __ Slt(AT, a, b);
+ if (is_min) {
+ if (out != a) {
+ __ Movn(out, a, AT);
+ }
+ if (out != b) {
+ __ Movz(out, b, AT);
+ }
+ } else {
+ if (out != a) {
+ __ Movz(out, a, AT);
+ }
+ if (out != b) {
+ __ Movn(out, b, AT);
+ }
+ }
+ }
+ }
+ }
+}
+
+void InstructionCodeGeneratorMIPS::GenerateMinMaxFP(LocationSummary* locations,
+ bool is_min,
+ bool isR6,
+ DataType::Type type) {
+ FRegister out = locations->Out().AsFpuRegister<FRegister>();
+ FRegister a = locations->InAt(0).AsFpuRegister<FRegister>();
+ FRegister b = locations->InAt(1).AsFpuRegister<FRegister>();
+
+ if (isR6) {
+ MipsLabel noNaNs;
+ MipsLabel done;
+ FRegister ftmp = ((out != a) && (out != b)) ? out : FTMP;
+
+ // When Java computes min/max it prefers a NaN to a number; the
+ // behavior of MIPSR6 is to prefer numbers to NaNs, i.e., if one of
+ // the inputs is a NaN and the other is a valid number, the MIPS
+ // instruction will return the number; Java wants the NaN value
+ // returned. This is why there is extra logic preceding the use of
+ // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a
+ // NaN, return the NaN, otherwise return the min/max.
+ if (type == DataType::Type::kFloat64) {
+ __ CmpUnD(FTMP, a, b);
+ __ Bc1eqz(FTMP, &noNaNs);
+
+ // One of the inputs is a NaN
+ __ CmpEqD(ftmp, a, a);
+ // If a == a then b is the NaN, otherwise a is the NaN.
+ __ SelD(ftmp, a, b);
+
+ if (ftmp != out) {
+ __ MovD(out, ftmp);
+ }
+
+ __ B(&done);
+
+ __ Bind(&noNaNs);
+
+ if (is_min) {
+ __ MinD(out, a, b);
+ } else {
+ __ MaxD(out, a, b);
+ }
+ } else {
+ DCHECK_EQ(type, DataType::Type::kFloat32);
+ __ CmpUnS(FTMP, a, b);
+ __ Bc1eqz(FTMP, &noNaNs);
+
+ // One of the inputs is a NaN
+ __ CmpEqS(ftmp, a, a);
+ // If a == a then b is the NaN, otherwise a is the NaN.
+ __ SelS(ftmp, a, b);
+
+ if (ftmp != out) {
+ __ MovS(out, ftmp);
+ }
+
+ __ B(&done);
+
+ __ Bind(&noNaNs);
+
+ if (is_min) {
+ __ MinS(out, a, b);
+ } else {
+ __ MaxS(out, a, b);
+ }
+ }
+
+ __ Bind(&done);
+
+ } else { // !isR6
+ MipsLabel ordered;
+ MipsLabel compare;
+ MipsLabel select;
+ MipsLabel done;
+
+ if (type == DataType::Type::kFloat64) {
+ __ CunD(a, b);
+ } else {
+ DCHECK_EQ(type, DataType::Type::kFloat32);
+ __ CunS(a, b);
+ }
+ __ Bc1f(&ordered);
+
+ // a or b (or both) is a NaN. Return one, which is a NaN.
+ if (type == DataType::Type::kFloat64) {
+ __ CeqD(b, b);
+ } else {
+ __ CeqS(b, b);
+ }
+ __ B(&select);
+
+ __ Bind(&ordered);
+
+ // Neither is a NaN.
+ // a == b? (-0.0 compares equal with +0.0)
+ // If equal, handle zeroes, else compare further.
+ if (type == DataType::Type::kFloat64) {
+ __ CeqD(a, b);
+ } else {
+ __ CeqS(a, b);
+ }
+ __ Bc1f(&compare);
+
+ // a == b either bit for bit or one is -0.0 and the other is +0.0.
+ if (type == DataType::Type::kFloat64) {
+ __ MoveFromFpuHigh(TMP, a);
+ __ MoveFromFpuHigh(AT, b);
+ } else {
+ __ Mfc1(TMP, a);
+ __ Mfc1(AT, b);
+ }
+
+ if (is_min) {
+ // -0.0 prevails over +0.0.
+ __ Or(TMP, TMP, AT);
+ } else {
+ // +0.0 prevails over -0.0.
+ __ And(TMP, TMP, AT);
+ }
+
+ if (type == DataType::Type::kFloat64) {
+ __ Mfc1(AT, a);
+ __ Mtc1(AT, out);
+ __ MoveToFpuHigh(TMP, out);
+ } else {
+ __ Mtc1(TMP, out);
+ }
+ __ B(&done);
+
+ __ Bind(&compare);
+
+ if (type == DataType::Type::kFloat64) {
+ if (is_min) {
+ // return (a <= b) ? a : b;
+ __ ColeD(a, b);
+ } else {
+ // return (a >= b) ? a : b;
+ __ ColeD(b, a); // b <= a
+ }
+ } else {
+ if (is_min) {
+ // return (a <= b) ? a : b;
+ __ ColeS(a, b);
+ } else {
+ // return (a >= b) ? a : b;
+ __ ColeS(b, a); // b <= a
+ }
+ }
+
+ __ Bind(&select);
+
+ if (type == DataType::Type::kFloat64) {
+ __ MovtD(out, a);
+ __ MovfD(out, b);
+ } else {
+ __ MovtS(out, a);
+ __ MovfS(out, b);
+ }
+
+ __ Bind(&done);
+ }
+}
+
+void InstructionCodeGeneratorMIPS::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
+ bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
+ DataType::Type type = minmax->GetResultType();
+ switch (type) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ GenerateMinMaxInt(minmax->GetLocations(), is_min, isR6, type);
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ GenerateMinMaxFP(minmax->GetLocations(), is_min, isR6, type);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for HMinMax " << type;
+ }
+}
+
+void LocationsBuilderMIPS::VisitMin(HMin* min) {
+ CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
+}
+
+void InstructionCodeGeneratorMIPS::VisitMin(HMin* min) {
+ GenerateMinMax(min, /*is_min*/ true);
+}
+
+void LocationsBuilderMIPS::VisitMax(HMax* max) {
+ CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
+}
+
+void InstructionCodeGeneratorMIPS::VisitMax(HMax* max) {
+ GenerateMinMax(max, /*is_min*/ false);
+}
+
void LocationsBuilderMIPS::VisitAbs(HAbs* abs) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
switch (abs->GetResultType()) {
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index d09ab7cce0..d90689695b 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -246,6 +246,9 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator {
bool value_can_be_null);
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info, uint32_t dex_pc);
+ void GenerateMinMaxInt(LocationSummary* locations, bool is_min, bool isR6, DataType::Type type);
+ void GenerateMinMaxFP(LocationSummary* locations, bool is_min, bool isR6, DataType::Type type);
+ void GenerateMinMax(HBinaryOperation*, bool is_min);
void GenerateAbsFP(LocationSummary* locations, DataType::Type type, bool isR2OrNewer, bool isR6);
// Generate a heap reference load using one register `out`:
@@ -617,6 +620,8 @@ class CodeGeneratorMIPS : public CodeGenerator {
DISALLOW_COPY_AND_ASSIGN(PcRelativePatchInfo);
};
+ PcRelativePatchInfo* NewBootImageRelRoPatch(uint32_t boot_image_offset,
+ const PcRelativePatchInfo* info_high = nullptr);
PcRelativePatchInfo* NewBootImageMethodPatch(MethodReference target_method,
const PcRelativePatchInfo* info_high = nullptr);
PcRelativePatchInfo* NewMethodBssEntryPatch(MethodReference target_method,
@@ -691,7 +696,8 @@ class CodeGeneratorMIPS : public CodeGenerator {
// Deduplication map for 32-bit literals, used for non-patchable boot image addresses.
Uint32ToLiteralMap uint32_literals_;
- // PC-relative method patch info for kBootImageLinkTimePcRelative.
+ // PC-relative method patch info for kBootImageLinkTimePcRelative/kBootImageRelRo.
+ // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods).
ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_;
// PC-relative method patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_;
@@ -699,7 +705,7 @@ class CodeGeneratorMIPS : public CodeGenerator {
ArenaDeque<PcRelativePatchInfo> boot_image_type_patches_;
// PC-relative type patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
- // PC-relative String patch info; type depends on configuration (intern table or boot image PIC).
+ // PC-relative String patch info for kBootImageLinkTimePcRelative.
ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_;
// PC-relative String patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_;
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 9593eec455..f8851b4eea 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -1509,6 +1509,14 @@ inline void CodeGeneratorMIPS64::EmitPcRelativeLinkerPatches(
}
}
+linker::LinkerPatch DataBimgRelRoPatchAdapter(size_t literal_offset,
+ const DexFile* target_dex_file,
+ uint32_t pc_insn_offset,
+ uint32_t boot_image_offset) {
+ DCHECK(target_dex_file == nullptr); // Unused for DataBimgRelRoPatch(), should be null.
+ return linker::LinkerPatch::DataBimgRelRoPatch(literal_offset, pc_insn_offset, boot_image_offset);
+}
+
void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
DCHECK(linker_patches->empty());
size_t size =
@@ -1527,11 +1535,10 @@ void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* li
EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
boot_image_string_patches_, linker_patches);
} else {
- DCHECK(boot_image_method_patches_.empty());
- EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>(
- boot_image_type_patches_, linker_patches);
- EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>(
- boot_image_string_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<DataBimgRelRoPatchAdapter>(
+ boot_image_method_patches_, linker_patches);
+ DCHECK(boot_image_type_patches_.empty());
+ DCHECK(boot_image_string_patches_.empty());
}
EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
method_bss_entry_patches_, linker_patches);
@@ -1542,6 +1549,13 @@ void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* li
DCHECK_EQ(size, linker_patches->size());
}
+CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewBootImageRelRoPatch(
+ uint32_t boot_image_offset,
+ const PcRelativePatchInfo* info_high) {
+ return NewPcRelativePatch(
+ /* dex_file */ nullptr, boot_image_offset, info_high, &boot_image_method_patches_);
+}
+
CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewBootImageMethodPatch(
MethodReference target_method,
const PcRelativePatchInfo* info_high) {
@@ -5839,7 +5853,7 @@ HLoadString::LoadKind CodeGeneratorMIPS64::GetSupportedLoadStringKind(
bool fallback_load = false;
switch (desired_string_load_kind) {
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadString::LoadKind::kBootImageInternTable:
+ case HLoadString::LoadKind::kBootImageRelRo:
case HLoadString::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
@@ -5866,7 +5880,7 @@ HLoadClass::LoadKind CodeGeneratorMIPS64::GetSupportedLoadClassKind(
case HLoadClass::LoadKind::kReferrersClass:
break;
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadClass::LoadKind::kBootImageClassTable:
+ case HLoadClass::LoadKind::kBootImageRelRo:
case HLoadClass::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
@@ -5926,6 +5940,15 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(
kLoadDoubleword,
DeduplicateUint64Literal(invoke->GetMethodAddress()));
break;
+ case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: {
+ uint32_t boot_image_offset = GetBootImageOffset(invoke);
+ PcRelativePatchInfo* info_high = NewBootImageRelRoPatch(boot_image_offset);
+ PcRelativePatchInfo* info_low = NewBootImageRelRoPatch(boot_image_offset, info_high);
+ EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low);
+ // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load.
+ __ Lwu(temp.AsRegister<GpuRegister>(), AT, /* placeholder */ 0x5678);
+ break;
+ }
case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
PcRelativePatchInfo* info_high = NewMethodBssEntryPatch(
MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()));
@@ -6113,20 +6136,15 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S
codegen_->DeduplicateBootImageAddressLiteral(address));
break;
}
- case HLoadClass::LoadKind::kBootImageClassTable: {
+ case HLoadClass::LoadKind::kBootImageRelRo: {
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
+ uint32_t boot_image_offset = codegen_->GetBootImageOffset(cls);
CodeGeneratorMIPS64::PcRelativePatchInfo* info_high =
- codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
+ codegen_->NewBootImageRelRoPatch(boot_image_offset);
CodeGeneratorMIPS64::PcRelativePatchInfo* info_low =
- codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex(), info_high);
+ codegen_->NewBootImageRelRoPatch(boot_image_offset, info_high);
codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low);
__ Lwu(out, AT, /* placeholder */ 0x5678);
- // Extract the reference from the slot data, i.e. clear the hash bits.
- int32_t masked_hash = ClassTable::TableSlot::MaskHash(
- ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex())));
- if (masked_hash != 0) {
- __ Daddiu(out, out, -masked_hash);
- }
break;
}
case HLoadClass::LoadKind::kBssEntry: {
@@ -6248,12 +6266,13 @@ void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) NO_THREA
codegen_->DeduplicateBootImageAddressLiteral(address));
return;
}
- case HLoadString::LoadKind::kBootImageInternTable: {
+ case HLoadString::LoadKind::kBootImageRelRo: {
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
+ uint32_t boot_image_offset = codegen_->GetBootImageOffset(load);
CodeGeneratorMIPS64::PcRelativePatchInfo* info_high =
- codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex());
+ codegen_->NewBootImageRelRoPatch(boot_image_offset);
CodeGeneratorMIPS64::PcRelativePatchInfo* info_low =
- codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high);
+ codegen_->NewBootImageRelRoPatch(boot_image_offset, info_high);
codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low);
__ Lwu(out, AT, /* placeholder */ 0x5678);
return;
@@ -6665,6 +6684,182 @@ void InstructionCodeGeneratorMIPS64::VisitRem(HRem* instruction) {
}
}
+static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
+ LocationSummary* locations = new (allocator) LocationSummary(minmax);
+ switch (minmax->GetResultType()) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
+ }
+}
+
+void InstructionCodeGeneratorMIPS64::GenerateMinMaxInt(LocationSummary* locations, bool is_min) {
+ GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
+ GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>();
+ GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+
+ if (lhs == rhs) {
+ if (out != lhs) {
+ __ Move(out, lhs);
+ }
+ } else {
+ // Some architectures, such as ARM and MIPS (prior to r6), have a
+ // conditional move instruction which only changes the target
+ // (output) register if the condition is true (MIPS prior to r6 had
+ // MOVF, MOVT, and MOVZ). The SELEQZ and SELNEZ instructions always
+ // change the target (output) register. If the condition is true the
+ // output register gets the contents of the "rs" register; otherwise,
+ // the output register is set to zero. One consequence of this is
+ // that to implement something like "rd = c==0 ? rs : rt" MIPS64r6
+ // needs to use a pair of SELEQZ/SELNEZ instructions. After
+ // executing this pair of instructions one of the output registers
+ // from the pair will necessarily contain zero. Then the code ORs the
+ // output registers from the SELEQZ/SELNEZ instructions to get the
+ // final result.
+ //
+ // The initial test to see if the output register is same as the
+ // first input register is needed to make sure that value in the
+ // first input register isn't clobbered before we've finished
+ // computing the output value. The logic in the corresponding else
+ // clause performs the same task but makes sure the second input
+ // register isn't clobbered in the event that it's the same register
+ // as the output register; the else clause also handles the case
+ // where the output register is distinct from both the first, and the
+ // second input registers.
+ if (out == lhs) {
+ __ Slt(AT, rhs, lhs);
+ if (is_min) {
+ __ Seleqz(out, lhs, AT);
+ __ Selnez(AT, rhs, AT);
+ } else {
+ __ Selnez(out, lhs, AT);
+ __ Seleqz(AT, rhs, AT);
+ }
+ } else {
+ __ Slt(AT, lhs, rhs);
+ if (is_min) {
+ __ Seleqz(out, rhs, AT);
+ __ Selnez(AT, lhs, AT);
+ } else {
+ __ Selnez(out, rhs, AT);
+ __ Seleqz(AT, lhs, AT);
+ }
+ }
+ __ Or(out, out, AT);
+ }
+}
+
+void InstructionCodeGeneratorMIPS64::GenerateMinMaxFP(LocationSummary* locations,
+ bool is_min,
+ DataType::Type type) {
+ FpuRegister a = locations->InAt(0).AsFpuRegister<FpuRegister>();
+ FpuRegister b = locations->InAt(1).AsFpuRegister<FpuRegister>();
+ FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
+
+ Mips64Label noNaNs;
+ Mips64Label done;
+ FpuRegister ftmp = ((out != a) && (out != b)) ? out : FTMP;
+
+ // When Java computes min/max it prefers a NaN to a number; the
+ // behavior of MIPSR6 is to prefer numbers to NaNs, i.e., if one of
+ // the inputs is a NaN and the other is a valid number, the MIPS
+ // instruction will return the number; Java wants the NaN value
+ // returned. This is why there is extra logic preceding the use of
+ // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a
+ // NaN, return the NaN, otherwise return the min/max.
+ if (type == DataType::Type::kFloat64) {
+ __ CmpUnD(FTMP, a, b);
+ __ Bc1eqz(FTMP, &noNaNs);
+
+ // One of the inputs is a NaN
+ __ CmpEqD(ftmp, a, a);
+ // If a == a then b is the NaN, otherwise a is the NaN.
+ __ SelD(ftmp, a, b);
+
+ if (ftmp != out) {
+ __ MovD(out, ftmp);
+ }
+
+ __ Bc(&done);
+
+ __ Bind(&noNaNs);
+
+ if (is_min) {
+ __ MinD(out, a, b);
+ } else {
+ __ MaxD(out, a, b);
+ }
+ } else {
+ DCHECK_EQ(type, DataType::Type::kFloat32);
+ __ CmpUnS(FTMP, a, b);
+ __ Bc1eqz(FTMP, &noNaNs);
+
+ // One of the inputs is a NaN
+ __ CmpEqS(ftmp, a, a);
+ // If a == a then b is the NaN, otherwise a is the NaN.
+ __ SelS(ftmp, a, b);
+
+ if (ftmp != out) {
+ __ MovS(out, ftmp);
+ }
+
+ __ Bc(&done);
+
+ __ Bind(&noNaNs);
+
+ if (is_min) {
+ __ MinS(out, a, b);
+ } else {
+ __ MaxS(out, a, b);
+ }
+ }
+
+ __ Bind(&done);
+}
+
+void InstructionCodeGeneratorMIPS64::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
+ DataType::Type type = minmax->GetResultType();
+ switch (type) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ GenerateMinMaxInt(minmax->GetLocations(), is_min);
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ GenerateMinMaxFP(minmax->GetLocations(), is_min, type);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for HMinMax " << type;
+ }
+}
+
+void LocationsBuilderMIPS64::VisitMin(HMin* min) {
+ CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitMin(HMin* min) {
+ GenerateMinMax(min, /*is_min*/ true);
+}
+
+void LocationsBuilderMIPS64::VisitMax(HMax* max) {
+ CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitMax(HMax* max) {
+ GenerateMinMax(max, /*is_min*/ false);
+}
+
void LocationsBuilderMIPS64::VisitAbs(HAbs* abs) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
switch (abs->GetResultType()) {
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index ddeb3eb90c..d1da1cec15 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -242,6 +242,10 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator {
bool value_can_be_null);
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+ void GenerateMinMaxInt(LocationSummary* locations, bool is_min);
+ void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type);
+ void GenerateMinMax(HBinaryOperation* minmax, bool is_min);
+
// Generate a heap reference load using one register `out`:
//
// out <- *(out + offset)
@@ -586,6 +590,8 @@ class CodeGeneratorMIPS64 : public CodeGenerator {
DISALLOW_COPY_AND_ASSIGN(PcRelativePatchInfo);
};
+ PcRelativePatchInfo* NewBootImageRelRoPatch(uint32_t boot_image_offset,
+ const PcRelativePatchInfo* info_high = nullptr);
PcRelativePatchInfo* NewBootImageMethodPatch(MethodReference target_method,
const PcRelativePatchInfo* info_high = nullptr);
PcRelativePatchInfo* NewMethodBssEntryPatch(MethodReference target_method,
@@ -655,7 +661,8 @@ class CodeGeneratorMIPS64 : public CodeGenerator {
// Deduplication map for 64-bit literals, used for non-patchable method address or method code
// address.
Uint64ToLiteralMap uint64_literals_;
- // PC-relative method patch info for kBootImageLinkTimePcRelative.
+ // PC-relative method patch info for kBootImageLinkTimePcRelative/kBootImageRelRo.
+ // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods).
ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_;
// PC-relative method patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_;
@@ -663,7 +670,7 @@ class CodeGeneratorMIPS64 : public CodeGenerator {
ArenaDeque<PcRelativePatchInfo> boot_image_type_patches_;
// PC-relative type patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
- // PC-relative String patch info; type depends on configuration (intern table or boot image PIC).
+ // PC-relative String patch info for kBootImageLinkTimePcRelative.
ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_;
// PC-relative type patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_;
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 51b96be0f8..4818084a72 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -51,6 +51,9 @@ static constexpr int kC2ConditionMask = 0x400;
static constexpr int kFakeReturnRegister = Register(8);
+static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000);
+static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000);
+
// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
#define __ down_cast<X86Assembler*>(codegen->GetAssembler())-> // NOLINT
#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, x).Int32Value()
@@ -3802,6 +3805,211 @@ void InstructionCodeGeneratorX86::VisitRem(HRem* rem) {
}
}
+static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
+ LocationSummary* locations = new (allocator) LocationSummary(minmax);
+ switch (minmax->GetResultType()) {
+ case DataType::Type::kInt32:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ // Register to use to perform a long subtract to set cc.
+ locations->AddTemp(Location::RequiresRegister());
+ break;
+ case DataType::Type::kFloat32:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ locations->AddTemp(Location::RequiresRegister());
+ break;
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
+ }
+}
+
+void InstructionCodeGeneratorX86::GenerateMinMaxInt(LocationSummary* locations,
+ bool is_min,
+ DataType::Type type) {
+ Location op1_loc = locations->InAt(0);
+ Location op2_loc = locations->InAt(1);
+
+ // Shortcut for same input locations.
+ if (op1_loc.Equals(op2_loc)) {
+ // Can return immediately, as op1_loc == out_loc.
+ // Note: if we ever support separate registers, e.g., output into memory, we need to check for
+ // a copy here.
+ DCHECK(locations->Out().Equals(op1_loc));
+ return;
+ }
+
+ if (type == DataType::Type::kInt64) {
+ // Need to perform a subtract to get the sign right.
+ // op1 is already in the same location as the output.
+ Location output = locations->Out();
+ Register output_lo = output.AsRegisterPairLow<Register>();
+ Register output_hi = output.AsRegisterPairHigh<Register>();
+
+ Register op2_lo = op2_loc.AsRegisterPairLow<Register>();
+ Register op2_hi = op2_loc.AsRegisterPairHigh<Register>();
+
+ // The comparison is performed by subtracting the second operand from
+ // the first operand and then setting the status flags in the same
+ // manner as the SUB instruction."
+ __ cmpl(output_lo, op2_lo);
+
+ // Now use a temp and the borrow to finish the subtraction of op2_hi.
+ Register temp = locations->GetTemp(0).AsRegister<Register>();
+ __ movl(temp, output_hi);
+ __ sbbl(temp, op2_hi);
+
+ // Now the condition code is correct.
+ Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess;
+ __ cmovl(cond, output_lo, op2_lo);
+ __ cmovl(cond, output_hi, op2_hi);
+ } else {
+ DCHECK_EQ(type, DataType::Type::kInt32);
+ Register out = locations->Out().AsRegister<Register>();
+ Register op2 = op2_loc.AsRegister<Register>();
+
+ // (out := op1)
+ // out <=? op2
+ // if out is min jmp done
+ // out := op2
+ // done:
+
+ __ cmpl(out, op2);
+ Condition cond = is_min ? Condition::kGreater : Condition::kLess;
+ __ cmovl(cond, out, op2);
+ }
+}
+
+void InstructionCodeGeneratorX86::GenerateMinMaxFP(LocationSummary* locations,
+ bool is_min,
+ DataType::Type type) {
+ Location op1_loc = locations->InAt(0);
+ Location op2_loc = locations->InAt(1);
+ Location out_loc = locations->Out();
+ XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
+
+ // Shortcut for same input locations.
+ if (op1_loc.Equals(op2_loc)) {
+ DCHECK(out_loc.Equals(op1_loc));
+ return;
+ }
+
+ // (out := op1)
+ // out <=? op2
+ // if Nan jmp Nan_label
+ // if out is min jmp done
+ // if op2 is min jmp op2_label
+ // handle -0/+0
+ // jmp done
+ // Nan_label:
+ // out := NaN
+ // op2_label:
+ // out := op2
+ // done:
+ //
+ // This removes one jmp, but needs to copy one input (op1) to out.
+ //
+ // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
+
+ XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
+
+ NearLabel nan, done, op2_label;
+ if (type == DataType::Type::kFloat64) {
+ __ ucomisd(out, op2);
+ } else {
+ DCHECK_EQ(type, DataType::Type::kFloat32);
+ __ ucomiss(out, op2);
+ }
+
+ __ j(Condition::kParityEven, &nan);
+
+ __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
+ __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
+
+ // Handle 0.0/-0.0.
+ if (is_min) {
+ if (type == DataType::Type::kFloat64) {
+ __ orpd(out, op2);
+ } else {
+ __ orps(out, op2);
+ }
+ } else {
+ if (type == DataType::Type::kFloat64) {
+ __ andpd(out, op2);
+ } else {
+ __ andps(out, op2);
+ }
+ }
+ __ jmp(&done);
+
+ // NaN handling.
+ __ Bind(&nan);
+ if (type == DataType::Type::kFloat64) {
+ // TODO: Use a constant from the constant table (requires extra input).
+ __ LoadLongConstant(out, kDoubleNaN);
+ } else {
+ Register constant = locations->GetTemp(0).AsRegister<Register>();
+ __ movl(constant, Immediate(kFloatNaN));
+ __ movd(out, constant);
+ }
+ __ jmp(&done);
+
+ // out := op2;
+ __ Bind(&op2_label);
+ if (type == DataType::Type::kFloat64) {
+ __ movsd(out, op2);
+ } else {
+ __ movss(out, op2);
+ }
+
+ // Done.
+ __ Bind(&done);
+}
+
+void InstructionCodeGeneratorX86::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
+ DataType::Type type = minmax->GetResultType();
+ switch (type) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ GenerateMinMaxInt(minmax->GetLocations(), is_min, type);
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ GenerateMinMaxFP(minmax->GetLocations(), is_min, type);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for HMinMax " << type;
+ }
+}
+
+void LocationsBuilderX86::VisitMin(HMin* min) {
+ CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
+}
+
+void InstructionCodeGeneratorX86::VisitMin(HMin* min) {
+ GenerateMinMax(min, /*is_min*/ true);
+}
+
+void LocationsBuilderX86::VisitMax(HMax* max) {
+ CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
+}
+
+void InstructionCodeGeneratorX86::VisitMax(HMax* max) {
+ GenerateMinMax(max, /*is_min*/ false);
+}
+
void LocationsBuilderX86::VisitAbs(HAbs* abs) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
switch (abs->GetResultType()) {
@@ -4624,6 +4832,15 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall(
case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
__ movl(temp.AsRegister<Register>(), Immediate(invoke->GetMethodAddress()));
break;
+ case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: {
+ Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke,
+ temp.AsRegister<Register>());
+ __ movl(temp.AsRegister<Register>(), Address(base_reg, kDummy32BitOffset));
+ RecordBootImageRelRoPatch(
+ invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress(),
+ GetBootImageOffset(invoke));
+ break;
+ }
case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke,
temp.AsRegister<Register>());
@@ -4685,6 +4902,13 @@ void CodeGeneratorX86::GenerateVirtualCall(
RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
}
+void CodeGeneratorX86::RecordBootImageRelRoPatch(HX86ComputeBaseMethodAddress* method_address,
+ uint32_t boot_image_offset) {
+ boot_image_method_patches_.emplace_back(
+ method_address, /* target_dex_file */ nullptr, boot_image_offset);
+ __ Bind(&boot_image_method_patches_.back().label);
+}
+
void CodeGeneratorX86::RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke) {
DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
HX86ComputeBaseMethodAddress* method_address =
@@ -4754,6 +4978,14 @@ inline void CodeGeneratorX86::EmitPcRelativeLinkerPatches(
}
}
+linker::LinkerPatch DataBimgRelRoPatchAdapter(size_t literal_offset,
+ const DexFile* target_dex_file,
+ uint32_t pc_insn_offset,
+ uint32_t boot_image_offset) {
+ DCHECK(target_dex_file == nullptr); // Unused for DataBimgRelRoPatch(), should be null.
+ return linker::LinkerPatch::DataBimgRelRoPatch(literal_offset, pc_insn_offset, boot_image_offset);
+}
+
void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
DCHECK(linker_patches->empty());
size_t size =
@@ -4772,11 +5004,10 @@ void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linke
EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
boot_image_string_patches_, linker_patches);
} else {
- DCHECK(boot_image_method_patches_.empty());
- EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>(
- boot_image_type_patches_, linker_patches);
- EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>(
- boot_image_string_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<DataBimgRelRoPatchAdapter>(
+ boot_image_method_patches_, linker_patches);
+ DCHECK(boot_image_type_patches_.empty());
+ DCHECK(boot_image_string_patches_.empty());
}
EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
method_bss_entry_patches_, linker_patches);
@@ -6145,7 +6376,7 @@ HLoadClass::LoadKind CodeGeneratorX86::GetSupportedLoadClassKind(
case HLoadClass::LoadKind::kReferrersClass:
break;
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadClass::LoadKind::kBootImageClassTable:
+ case HLoadClass::LoadKind::kBootImageRelRo:
case HLoadClass::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
@@ -6183,7 +6414,7 @@ void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) {
if (load_kind == HLoadClass::LoadKind::kReferrersClass ||
load_kind == HLoadClass::LoadKind::kBootImageLinkTimePcRelative ||
- load_kind == HLoadClass::LoadKind::kBootImageClassTable ||
+ load_kind == HLoadClass::LoadKind::kBootImageRelRo ||
load_kind == HLoadClass::LoadKind::kBssEntry) {
locations->SetInAt(0, Location::RequiresRegister());
}
@@ -6259,17 +6490,12 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE
__ movl(out, Immediate(address));
break;
}
- case HLoadClass::LoadKind::kBootImageClassTable: {
+ case HLoadClass::LoadKind::kBootImageRelRo: {
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
Register method_address = locations->InAt(0).AsRegister<Register>();
__ movl(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset));
- codegen_->RecordBootImageTypePatch(cls);
- // Extract the reference from the slot data, i.e. clear the hash bits.
- int32_t masked_hash = ClassTable::TableSlot::MaskHash(
- ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex())));
- if (masked_hash != 0) {
- __ subl(out, Immediate(masked_hash));
- }
+ codegen_->RecordBootImageRelRoPatch(cls->InputAt(0)->AsX86ComputeBaseMethodAddress(),
+ codegen_->GetBootImageOffset(cls));
break;
}
case HLoadClass::LoadKind::kBssEntry: {
@@ -6349,7 +6575,7 @@ HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind(
HLoadString::LoadKind desired_string_load_kind) {
switch (desired_string_load_kind) {
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadString::LoadKind::kBootImageInternTable:
+ case HLoadString::LoadKind::kBootImageRelRo:
case HLoadString::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
@@ -6368,7 +6594,7 @@ void LocationsBuilderX86::VisitLoadString(HLoadString* load) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
HLoadString::LoadKind load_kind = load->GetLoadKind();
if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative ||
- load_kind == HLoadString::LoadKind::kBootImageInternTable ||
+ load_kind == HLoadString::LoadKind::kBootImageRelRo ||
load_kind == HLoadString::LoadKind::kBssEntry) {
locations->SetInAt(0, Location::RequiresRegister());
}
@@ -6422,11 +6648,12 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_S
__ movl(out, Immediate(address));
return;
}
- case HLoadString::LoadKind::kBootImageInternTable: {
+ case HLoadString::LoadKind::kBootImageRelRo: {
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
Register method_address = locations->InAt(0).AsRegister<Register>();
__ movl(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset));
- codegen_->RecordBootImageStringPatch(load);
+ codegen_->RecordBootImageRelRoPatch(load->InputAt(0)->AsX86ComputeBaseMethodAddress(),
+ codegen_->GetBootImageOffset(load));
return;
}
case HLoadString::LoadKind::kBssEntry: {
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 51e5bca00b..9c537a7371 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -225,6 +225,9 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator {
void GenerateShlLong(const Location& loc, int shift);
void GenerateShrLong(const Location& loc, int shift);
void GenerateUShrLong(const Location& loc, int shift);
+ void GenerateMinMaxInt(LocationSummary* locations, bool is_min, DataType::Type type);
+ void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type);
+ void GenerateMinMax(HBinaryOperation* minmax, bool is_min);
void HandleFieldSet(HInstruction* instruction,
const FieldInfo& field_info,
@@ -414,6 +417,8 @@ class CodeGeneratorX86 : public CodeGenerator {
void GenerateVirtualCall(
HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE;
+ void RecordBootImageRelRoPatch(HX86ComputeBaseMethodAddress* method_address,
+ uint32_t boot_image_offset);
void RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke);
void RecordMethodBssEntryPatch(HInvokeStaticOrDirect* invoke);
void RecordBootImageTypePatch(HLoadClass* load_class);
@@ -631,17 +636,18 @@ class CodeGeneratorX86 : public CodeGenerator {
X86Assembler assembler_;
const X86InstructionSetFeatures& isa_features_;
- // PC-relative method patch info for kBootImageLinkTimePcRelative.
+ // PC-relative method patch info for kBootImageLinkTimePcRelative/kBootImageRelRo.
+ // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods).
ArenaDeque<X86PcRelativePatchInfo> boot_image_method_patches_;
// PC-relative method patch info for kBssEntry.
ArenaDeque<X86PcRelativePatchInfo> method_bss_entry_patches_;
// PC-relative type patch info for kBootImageLinkTimePcRelative.
ArenaDeque<X86PcRelativePatchInfo> boot_image_type_patches_;
- // Type patch locations for kBssEntry.
+ // PC-relative type patch info for kBssEntry.
ArenaDeque<X86PcRelativePatchInfo> type_bss_entry_patches_;
- // String patch locations; type depends on configuration (intern table or boot image PIC).
+ // PC-relative String patch info for kBootImageLinkTimePcRelative.
ArenaDeque<X86PcRelativePatchInfo> boot_image_string_patches_;
- // String patch locations for kBssEntry.
+ // PC-relative String patch info for kBssEntry.
ArenaDeque<X86PcRelativePatchInfo> string_bss_entry_patches_;
// Patches for string root accesses in JIT compiled code.
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 0bb56a2b4a..c378c5b957 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -998,6 +998,13 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall(
case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
Load64BitValue(temp.AsRegister<CpuRegister>(), invoke->GetMethodAddress());
break;
+ case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: {
+ // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load.
+ __ movl(temp.AsRegister<CpuRegister>(),
+ Address::Absolute(kDummy32BitOffset, /* no_rip */ false));
+ RecordBootImageRelRoPatch(GetBootImageOffset(invoke));
+ break;
+ }
case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
__ movq(temp.AsRegister<CpuRegister>(),
Address::Absolute(kDummy32BitOffset, /* no_rip */ false));
@@ -1059,6 +1066,11 @@ void CodeGeneratorX86_64::GenerateVirtualCall(
RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
}
+void CodeGeneratorX86_64::RecordBootImageRelRoPatch(uint32_t boot_image_offset) {
+ boot_image_method_patches_.emplace_back(/* target_dex_file */ nullptr, boot_image_offset);
+ __ Bind(&boot_image_method_patches_.back().label);
+}
+
void CodeGeneratorX86_64::RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke) {
boot_image_method_patches_.emplace_back(
invoke->GetTargetMethod().dex_file, invoke->GetTargetMethod().index);
@@ -1110,6 +1122,14 @@ inline void CodeGeneratorX86_64::EmitPcRelativeLinkerPatches(
}
}
+linker::LinkerPatch DataBimgRelRoPatchAdapter(size_t literal_offset,
+ const DexFile* target_dex_file,
+ uint32_t pc_insn_offset,
+ uint32_t boot_image_offset) {
+ DCHECK(target_dex_file == nullptr); // Unused for DataBimgRelRoPatch(), should be null.
+ return linker::LinkerPatch::DataBimgRelRoPatch(literal_offset, pc_insn_offset, boot_image_offset);
+}
+
void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
DCHECK(linker_patches->empty());
size_t size =
@@ -1128,11 +1148,10 @@ void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* li
EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
boot_image_string_patches_, linker_patches);
} else {
- DCHECK(boot_image_method_patches_.empty());
- EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>(
- boot_image_type_patches_, linker_patches);
- EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>(
- boot_image_string_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<DataBimgRelRoPatchAdapter>(
+ boot_image_method_patches_, linker_patches);
+ DCHECK(boot_image_type_patches_.empty());
+ DCHECK(boot_image_string_patches_.empty());
}
EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
method_bss_entry_patches_, linker_patches);
@@ -3821,6 +3840,177 @@ void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) {
}
}
+static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
+ LocationSummary* locations = new (allocator) LocationSummary(minmax);
+ switch (minmax->GetResultType()) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ // The following is sub-optimal, but all we can do for now. It would be fine to also accept
+ // the second input to be the output (we can simply swap inputs).
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
+ }
+}
+
+void InstructionCodeGeneratorX86_64::GenerateMinMaxInt(LocationSummary* locations,
+ bool is_min,
+ DataType::Type type) {
+ Location op1_loc = locations->InAt(0);
+ Location op2_loc = locations->InAt(1);
+
+ // Shortcut for same input locations.
+ if (op1_loc.Equals(op2_loc)) {
+ // Can return immediately, as op1_loc == out_loc.
+ // Note: if we ever support separate registers, e.g., output into memory, we need to check for
+ // a copy here.
+ DCHECK(locations->Out().Equals(op1_loc));
+ return;
+ }
+
+ CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+ CpuRegister op2 = op2_loc.AsRegister<CpuRegister>();
+
+ // (out := op1)
+ // out <=? op2
+ // if out is min jmp done
+ // out := op2
+ // done:
+
+ if (type == DataType::Type::kInt64) {
+ __ cmpq(out, op2);
+ __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ true);
+ } else {
+ DCHECK_EQ(type, DataType::Type::kInt32);
+ __ cmpl(out, op2);
+ __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ false);
+ }
+}
+
+void InstructionCodeGeneratorX86_64::GenerateMinMaxFP(LocationSummary* locations,
+ bool is_min,
+ DataType::Type type) {
+ Location op1_loc = locations->InAt(0);
+ Location op2_loc = locations->InAt(1);
+ Location out_loc = locations->Out();
+ XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
+
+ // Shortcut for same input locations.
+ if (op1_loc.Equals(op2_loc)) {
+ DCHECK(out_loc.Equals(op1_loc));
+ return;
+ }
+
+ // (out := op1)
+ // out <=? op2
+ // if Nan jmp Nan_label
+ // if out is min jmp done
+ // if op2 is min jmp op2_label
+ // handle -0/+0
+ // jmp done
+ // Nan_label:
+ // out := NaN
+ // op2_label:
+ // out := op2
+ // done:
+ //
+ // This removes one jmp, but needs to copy one input (op1) to out.
+ //
+ // TODO: This is straight from Quick. Make NaN an out-of-line slowpath?
+
+ XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
+
+ NearLabel nan, done, op2_label;
+ if (type == DataType::Type::kFloat64) {
+ __ ucomisd(out, op2);
+ } else {
+ DCHECK_EQ(type, DataType::Type::kFloat32);
+ __ ucomiss(out, op2);
+ }
+
+ __ j(Condition::kParityEven, &nan);
+
+ __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
+ __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
+
+ // Handle 0.0/-0.0.
+ if (is_min) {
+ if (type == DataType::Type::kFloat64) {
+ __ orpd(out, op2);
+ } else {
+ __ orps(out, op2);
+ }
+ } else {
+ if (type == DataType::Type::kFloat64) {
+ __ andpd(out, op2);
+ } else {
+ __ andps(out, op2);
+ }
+ }
+ __ jmp(&done);
+
+ // NaN handling.
+ __ Bind(&nan);
+ if (type == DataType::Type::kFloat64) {
+ __ movsd(out, codegen_->LiteralInt64Address(INT64_C(0x7FF8000000000000)));
+ } else {
+ __ movss(out, codegen_->LiteralInt32Address(INT32_C(0x7FC00000)));
+ }
+ __ jmp(&done);
+
+ // out := op2;
+ __ Bind(&op2_label);
+ if (type == DataType::Type::kFloat64) {
+ __ movsd(out, op2);
+ } else {
+ __ movss(out, op2);
+ }
+
+ // Done.
+ __ Bind(&done);
+}
+
+void InstructionCodeGeneratorX86_64::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
+ DataType::Type type = minmax->GetResultType();
+ switch (type) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ GenerateMinMaxInt(minmax->GetLocations(), is_min, type);
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ GenerateMinMaxFP(minmax->GetLocations(), is_min, type);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for HMinMax " << type;
+ }
+}
+
+void LocationsBuilderX86_64::VisitMin(HMin* min) {
+ CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
+}
+
+void InstructionCodeGeneratorX86_64::VisitMin(HMin* min) {
+ GenerateMinMax(min, /*is_min*/ true);
+}
+
+void LocationsBuilderX86_64::VisitMax(HMax* max) {
+ CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
+}
+
+void InstructionCodeGeneratorX86_64::VisitMax(HMax* max) {
+ GenerateMinMax(max, /*is_min*/ false);
+}
+
void LocationsBuilderX86_64::VisitAbs(HAbs* abs) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
switch (abs->GetResultType()) {
@@ -5535,7 +5725,7 @@ HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind(
case HLoadClass::LoadKind::kReferrersClass:
break;
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadClass::LoadKind::kBootImageClassTable:
+ case HLoadClass::LoadKind::kBootImageRelRo:
case HLoadClass::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
@@ -5643,16 +5833,10 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S
__ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended.
break;
}
- case HLoadClass::LoadKind::kBootImageClassTable: {
+ case HLoadClass::LoadKind::kBootImageRelRo: {
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
__ movl(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false));
- codegen_->RecordBootImageTypePatch(cls);
- // Extract the reference from the slot data, i.e. clear the hash bits.
- int32_t masked_hash = ClassTable::TableSlot::MaskHash(
- ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex())));
- if (masked_hash != 0) {
- __ subl(out, Immediate(masked_hash));
- }
+ codegen_->RecordBootImageRelRoPatch(codegen_->GetBootImageOffset(cls));
break;
}
case HLoadClass::LoadKind::kBssEntry: {
@@ -5717,7 +5901,7 @@ HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind(
HLoadString::LoadKind desired_string_load_kind) {
switch (desired_string_load_kind) {
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadString::LoadKind::kBootImageInternTable:
+ case HLoadString::LoadKind::kBootImageRelRo:
case HLoadString::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
@@ -5783,10 +5967,10 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREA
__ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended.
return;
}
- case HLoadString::LoadKind::kBootImageInternTable: {
+ case HLoadString::LoadKind::kBootImageRelRo: {
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
__ movl(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false));
- codegen_->RecordBootImageStringPatch(load);
+ codegen_->RecordBootImageRelRoPatch(codegen_->GetBootImageOffset(load));
return;
}
case HLoadString::LoadKind::kBssEntry: {
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 1079e94dfc..e8d1efe702 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -222,6 +222,10 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator {
bool value_can_be_null);
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+ void GenerateMinMaxInt(LocationSummary* locations, bool is_min, DataType::Type type);
+ void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type);
+ void GenerateMinMax(HBinaryOperation* minmax, bool is_min);
+
// Generate a heap reference load using one register `out`:
//
// out <- *(out + offset)
@@ -410,6 +414,7 @@ class CodeGeneratorX86_64 : public CodeGenerator {
void GenerateVirtualCall(
HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE;
+ void RecordBootImageRelRoPatch(uint32_t boot_image_offset);
void RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke);
void RecordMethodBssEntryPatch(HInvokeStaticOrDirect* invoke);
void RecordBootImageTypePatch(HLoadClass* load_class);
@@ -604,17 +609,18 @@ class CodeGeneratorX86_64 : public CodeGenerator {
// Used for fixups to the constant area.
int constant_area_start_;
- // PC-relative method patch info for kBootImageLinkTimePcRelative.
+ // PC-relative method patch info for kBootImageLinkTimePcRelative/kBootImageRelRo.
+ // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods).
ArenaDeque<PatchInfo<Label>> boot_image_method_patches_;
// PC-relative method patch info for kBssEntry.
ArenaDeque<PatchInfo<Label>> method_bss_entry_patches_;
// PC-relative type patch info for kBootImageLinkTimePcRelative.
ArenaDeque<PatchInfo<Label>> boot_image_type_patches_;
- // Type patch locations for kBssEntry.
+ // PC-relative type patch info for kBssEntry.
ArenaDeque<PatchInfo<Label>> type_bss_entry_patches_;
- // String patch locations; type depends on configuration (intern table or boot image PIC).
+ // PC-relative String patch info for kBootImageLinkTimePcRelative.
ArenaDeque<PatchInfo<Label>> boot_image_string_patches_;
- // String patch locations for kBssEntry.
+ // PC-relative String patch info for kBssEntry.
ArenaDeque<PatchInfo<Label>> string_bss_entry_patches_;
// Patches for string literals in JIT compiled code.
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index ba4040acad..a0fd5ffcb1 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -18,6 +18,7 @@
#include <memory>
#include "base/macros.h"
+#include "base/utils.h"
#include "builder.h"
#include "codegen_test_utils.h"
#include "dex/dex_file.h"
@@ -26,7 +27,6 @@
#include "nodes.h"
#include "optimizing_unit_test.h"
#include "register_allocator_linear_scan.h"
-#include "utils.h"
#include "utils/arm/assembler_arm_vixl.h"
#include "utils/arm/managed_register_arm.h"
#include "utils/mips/managed_register_mips.h"
diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc
index 71c394ec1f..f05159b735 100644
--- a/compiler/optimizing/gvn.cc
+++ b/compiler/optimizing/gvn.cc
@@ -17,11 +17,11 @@
#include "gvn.h"
#include "base/arena_bit_vector.h"
+#include "base/bit_vector-inl.h"
#include "base/scoped_arena_allocator.h"
#include "base/scoped_arena_containers.h"
-#include "base/bit_vector-inl.h"
+#include "base/utils.h"
#include "side_effects_analysis.h"
-#include "utils.h"
namespace art {
diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc
index d699d01ecb..0a310ca940 100644
--- a/compiler/optimizing/induction_var_range.cc
+++ b/compiler/optimizing/induction_var_range.cc
@@ -78,16 +78,10 @@ static bool IsGEZero(HInstruction* instruction) {
DCHECK(instruction != nullptr);
if (instruction->IsArrayLength()) {
return true;
- } else if (instruction->IsInvokeStaticOrDirect()) {
- switch (instruction->AsInvoke()->GetIntrinsic()) {
- case Intrinsics::kMathMinIntInt:
- case Intrinsics::kMathMinLongLong:
- // Instruction MIN(>=0, >=0) is >= 0.
- return IsGEZero(instruction->InputAt(0)) &&
- IsGEZero(instruction->InputAt(1));
- default:
- break;
- }
+ } else if (instruction->IsMin()) {
+ // Instruction MIN(>=0, >=0) is >= 0.
+ return IsGEZero(instruction->InputAt(0)) &&
+ IsGEZero(instruction->InputAt(1));
} else if (instruction->IsAbs()) {
// Instruction ABS(>=0) is >= 0.
// NOTE: ABS(minint) = minint prevents assuming
@@ -101,21 +95,14 @@ static bool IsGEZero(HInstruction* instruction) {
/** Hunts "under the hood" for a suitable instruction at the hint. */
static bool IsMaxAtHint(
HInstruction* instruction, HInstruction* hint, /*out*/HInstruction** suitable) {
- if (instruction->IsInvokeStaticOrDirect()) {
- switch (instruction->AsInvoke()->GetIntrinsic()) {
- case Intrinsics::kMathMinIntInt:
- case Intrinsics::kMathMinLongLong:
- // For MIN(x, y), return most suitable x or y as maximum.
- return IsMaxAtHint(instruction->InputAt(0), hint, suitable) ||
- IsMaxAtHint(instruction->InputAt(1), hint, suitable);
- default:
- break;
- }
+ if (instruction->IsMin()) {
+ // For MIN(x, y), return most suitable x or y as maximum.
+ return IsMaxAtHint(instruction->InputAt(0), hint, suitable) ||
+ IsMaxAtHint(instruction->InputAt(1), hint, suitable);
} else {
*suitable = instruction;
return HuntForDeclaration(instruction) == hint;
}
- return false;
}
/** Post-analysis simplification of a minimum value that makes the bound more useful to clients. */
@@ -364,11 +351,11 @@ void InductionVarRange::Replace(HInstruction* instruction,
}
}
-bool InductionVarRange::IsFinite(HLoopInformation* loop, /*out*/ int64_t* tc) const {
+bool InductionVarRange::IsFinite(HLoopInformation* loop, /*out*/ int64_t* trip_count) const {
HInductionVarAnalysis::InductionInfo *trip =
induction_analysis_->LookupInfo(loop, GetLoopControl(loop));
if (trip != nullptr && !IsUnsafeTripCount(trip)) {
- IsConstant(trip->op_a, kExact, tc);
+ IsConstant(trip->op_a, kExact, trip_count);
return true;
}
return false;
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index eac8d2f593..34837700a2 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -120,6 +120,8 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor {
void SimplifyReturnThis(HInvoke* invoke);
void SimplifyAllocationIntrinsic(HInvoke* invoke);
void SimplifyMemBarrier(HInvoke* invoke, MemBarrierKind barrier_kind);
+ void SimplifyMin(HInvoke* invoke, DataType::Type type);
+ void SimplifyMax(HInvoke* invoke, DataType::Type type);
void SimplifyAbs(HInvoke* invoke, DataType::Type type);
CodeGenerator* codegen_;
@@ -2407,6 +2409,20 @@ void InstructionSimplifierVisitor::SimplifyMemBarrier(HInvoke* invoke,
invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, mem_barrier);
}
+void InstructionSimplifierVisitor::SimplifyMin(HInvoke* invoke, DataType::Type type) {
+ DCHECK(invoke->IsInvokeStaticOrDirect());
+ HMin* min = new (GetGraph()->GetAllocator())
+ HMin(type, invoke->InputAt(0), invoke->InputAt(1), invoke->GetDexPc());
+ invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, min);
+}
+
+void InstructionSimplifierVisitor::SimplifyMax(HInvoke* invoke, DataType::Type type) {
+ DCHECK(invoke->IsInvokeStaticOrDirect());
+ HMax* max = new (GetGraph()->GetAllocator())
+ HMax(type, invoke->InputAt(0), invoke->InputAt(1), invoke->GetDexPc());
+ invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, max);
+}
+
void InstructionSimplifierVisitor::SimplifyAbs(HInvoke* invoke, DataType::Type type) {
DCHECK(invoke->IsInvokeStaticOrDirect());
HAbs* abs = new (GetGraph()->GetAllocator())
@@ -2497,6 +2513,30 @@ void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) {
case Intrinsics::kVarHandleStoreStoreFence:
SimplifyMemBarrier(instruction, MemBarrierKind::kStoreStore);
break;
+ case Intrinsics::kMathMinIntInt:
+ SimplifyMin(instruction, DataType::Type::kInt32);
+ break;
+ case Intrinsics::kMathMinLongLong:
+ SimplifyMin(instruction, DataType::Type::kInt64);
+ break;
+ case Intrinsics::kMathMinFloatFloat:
+ SimplifyMin(instruction, DataType::Type::kFloat32);
+ break;
+ case Intrinsics::kMathMinDoubleDouble:
+ SimplifyMin(instruction, DataType::Type::kFloat64);
+ break;
+ case Intrinsics::kMathMaxIntInt:
+ SimplifyMax(instruction, DataType::Type::kInt32);
+ break;
+ case Intrinsics::kMathMaxLongLong:
+ SimplifyMax(instruction, DataType::Type::kInt64);
+ break;
+ case Intrinsics::kMathMaxFloatFloat:
+ SimplifyMax(instruction, DataType::Type::kFloat32);
+ break;
+ case Intrinsics::kMathMaxDoubleDouble:
+ SimplifyMax(instruction, DataType::Type::kFloat64);
+ break;
case Intrinsics::kMathAbsInt:
SimplifyAbs(instruction, DataType::Type::kInt32);
break;
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index acb830e524..f8dc316e45 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -18,6 +18,7 @@
#include "art_field-inl.h"
#include "art_method-inl.h"
+#include "base/utils.h"
#include "class_linker.h"
#include "dex/invoke_type.h"
#include "driver/compiler_driver.h"
@@ -26,7 +27,6 @@
#include "nodes.h"
#include "scoped_thread_state_change-inl.h"
#include "thread-current-inl.h"
-#include "utils.h"
namespace art {
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index 24aff226ca..1035cbc2c4 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h
@@ -266,6 +266,14 @@ void IntrinsicCodeGenerator ## Arch::Visit ## Name(HInvoke* invoke) { \
<< " should have been converted to HIR"; \
}
#define UNREACHABLE_INTRINSICS(Arch) \
+UNREACHABLE_INTRINSIC(Arch, MathMinIntInt) \
+UNREACHABLE_INTRINSIC(Arch, MathMinLongLong) \
+UNREACHABLE_INTRINSIC(Arch, MathMinFloatFloat) \
+UNREACHABLE_INTRINSIC(Arch, MathMinDoubleDouble) \
+UNREACHABLE_INTRINSIC(Arch, MathMaxIntInt) \
+UNREACHABLE_INTRINSIC(Arch, MathMaxLongLong) \
+UNREACHABLE_INTRINSIC(Arch, MathMaxFloatFloat) \
+UNREACHABLE_INTRINSIC(Arch, MathMaxDoubleDouble) \
UNREACHABLE_INTRINSIC(Arch, MathAbsInt) \
UNREACHABLE_INTRINSIC(Arch, MathAbsLong) \
UNREACHABLE_INTRINSIC(Arch, MathAbsFloat) \
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 0b04fff927..81c0b50932 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -344,14 +344,6 @@ void IntrinsicCodeGeneratorARM64::VisitShortReverseBytes(HInvoke* invoke) {
GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetVIXLAssembler());
}
-static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-}
-
static void GenNumberOfLeadingZeros(LocationSummary* locations,
DataType::Type type,
MacroAssembler* masm) {
@@ -529,113 +521,6 @@ void IntrinsicCodeGeneratorARM64::VisitLongLowestOneBit(HInvoke* invoke) {
GenLowestOneBit(invoke, DataType::Type::kInt64, GetVIXLAssembler());
}
-static void GenMinMaxFP(LocationSummary* locations,
- bool is_min,
- bool is_double,
- MacroAssembler* masm) {
- Location op1 = locations->InAt(0);
- Location op2 = locations->InAt(1);
- Location out = locations->Out();
-
- FPRegister op1_reg = is_double ? DRegisterFrom(op1) : SRegisterFrom(op1);
- FPRegister op2_reg = is_double ? DRegisterFrom(op2) : SRegisterFrom(op2);
- FPRegister out_reg = is_double ? DRegisterFrom(out) : SRegisterFrom(out);
- if (is_min) {
- __ Fmin(out_reg, op1_reg, op2_reg);
- } else {
- __ Fmax(out_reg, op1_reg, op2_reg);
- }
-}
-
-static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
- locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
-}
-
-void IntrinsicLocationsBuilderARM64::VisitMathMinDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitMathMinDoubleDouble(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetVIXLAssembler());
-}
-
-void IntrinsicLocationsBuilderARM64::VisitMathMinFloatFloat(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitMathMinFloatFloat(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetVIXLAssembler());
-}
-
-void IntrinsicLocationsBuilderARM64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetVIXLAssembler());
-}
-
-void IntrinsicLocationsBuilderARM64::VisitMathMaxFloatFloat(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitMathMaxFloatFloat(HInvoke* invoke) {
- GenMinMaxFP(
- invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetVIXLAssembler());
-}
-
-static void GenMinMax(LocationSummary* locations,
- bool is_min,
- bool is_long,
- MacroAssembler* masm) {
- Location op1 = locations->InAt(0);
- Location op2 = locations->InAt(1);
- Location out = locations->Out();
-
- Register op1_reg = is_long ? XRegisterFrom(op1) : WRegisterFrom(op1);
- Register op2_reg = is_long ? XRegisterFrom(op2) : WRegisterFrom(op2);
- Register out_reg = is_long ? XRegisterFrom(out) : WRegisterFrom(out);
-
- __ Cmp(op1_reg, op2_reg);
- __ Csel(out_reg, op1_reg, op2_reg, is_min ? lt : gt);
-}
-
-void IntrinsicLocationsBuilderARM64::VisitMathMinIntInt(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitMathMinIntInt(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetVIXLAssembler());
-}
-
-void IntrinsicLocationsBuilderARM64::VisitMathMinLongLong(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitMathMinLongLong(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetVIXLAssembler());
-}
-
-void IntrinsicLocationsBuilderARM64::VisitMathMaxIntInt(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitMathMaxIntInt(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetVIXLAssembler());
-}
-
-void IntrinsicLocationsBuilderARM64::VisitMathMaxLongLong(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitMathMaxLongLong(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetVIXLAssembler());
-}
-
static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
LocationSummary* locations =
new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index e351fcc706..e61a0b0809 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -432,264 +432,6 @@ void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invo
GenNumberOfTrailingZeros(invoke, DataType::Type::kInt64, codegen_);
}
-static void GenMinMaxFloat(HInvoke* invoke, bool is_min, CodeGeneratorARMVIXL* codegen) {
- ArmVIXLAssembler* assembler = codegen->GetAssembler();
- Location op1_loc = invoke->GetLocations()->InAt(0);
- Location op2_loc = invoke->GetLocations()->InAt(1);
- Location out_loc = invoke->GetLocations()->Out();
-
- // Optimization: don't generate any code if inputs are the same.
- if (op1_loc.Equals(op2_loc)) {
- DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder.
- return;
- }
-
- vixl32::SRegister op1 = SRegisterFrom(op1_loc);
- vixl32::SRegister op2 = SRegisterFrom(op2_loc);
- vixl32::SRegister out = OutputSRegister(invoke);
- UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
- const vixl32::Register temp1 = temps.Acquire();
- vixl32::Register temp2 = RegisterFrom(invoke->GetLocations()->GetTemp(0));
- vixl32::Label nan, done;
- vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &done);
-
- DCHECK(op1.Is(out));
-
- __ Vcmp(op1, op2);
- __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
- __ B(vs, &nan, /* far_target */ false); // if un-ordered, go to NaN handling.
-
- // op1 <> op2
- vixl32::ConditionType cond = is_min ? gt : lt;
- {
- ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
- 2 * kMaxInstructionSizeInBytes,
- CodeBufferCheckScope::kMaximumSize);
- __ it(cond);
- __ vmov(cond, F32, out, op2);
- }
- // for <>(not equal), we've done min/max calculation.
- __ B(ne, final_label, /* far_target */ false);
-
- // handle op1 == op2, max(+0.0,-0.0), min(+0.0,-0.0).
- __ Vmov(temp1, op1);
- __ Vmov(temp2, op2);
- if (is_min) {
- __ Orr(temp1, temp1, temp2);
- } else {
- __ And(temp1, temp1, temp2);
- }
- __ Vmov(out, temp1);
- __ B(final_label);
-
- // handle NaN input.
- __ Bind(&nan);
- __ Movt(temp1, High16Bits(kNanFloat)); // 0x7FC0xxxx is a NaN.
- __ Vmov(out, temp1);
-
- if (done.IsReferenced()) {
- __ Bind(&done);
- }
-}
-
-static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
- locations->SetOut(Location::SameAsFirstInput());
-}
-
-void IntrinsicLocationsBuilderARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
- invoke->GetLocations()->AddTemp(Location::RequiresRegister());
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) {
- GenMinMaxFloat(invoke, /* is_min */ true, codegen_);
-}
-
-void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
- invoke->GetLocations()->AddTemp(Location::RequiresRegister());
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) {
- GenMinMaxFloat(invoke, /* is_min */ false, codegen_);
-}
-
-static void GenMinMaxDouble(HInvoke* invoke, bool is_min, CodeGeneratorARMVIXL* codegen) {
- ArmVIXLAssembler* assembler = codegen->GetAssembler();
- Location op1_loc = invoke->GetLocations()->InAt(0);
- Location op2_loc = invoke->GetLocations()->InAt(1);
- Location out_loc = invoke->GetLocations()->Out();
-
- // Optimization: don't generate any code if inputs are the same.
- if (op1_loc.Equals(op2_loc)) {
- DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in.
- return;
- }
-
- vixl32::DRegister op1 = DRegisterFrom(op1_loc);
- vixl32::DRegister op2 = DRegisterFrom(op2_loc);
- vixl32::DRegister out = OutputDRegister(invoke);
- vixl32::Label handle_nan_eq, done;
- vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &done);
-
- DCHECK(op1.Is(out));
-
- __ Vcmp(op1, op2);
- __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
- __ B(vs, &handle_nan_eq, /* far_target */ false); // if un-ordered, go to NaN handling.
-
- // op1 <> op2
- vixl32::ConditionType cond = is_min ? gt : lt;
- {
- ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
- 2 * kMaxInstructionSizeInBytes,
- CodeBufferCheckScope::kMaximumSize);
- __ it(cond);
- __ vmov(cond, F64, out, op2);
- }
- // for <>(not equal), we've done min/max calculation.
- __ B(ne, final_label, /* far_target */ false);
-
- // handle op1 == op2, max(+0.0,-0.0).
- if (!is_min) {
- __ Vand(F64, out, op1, op2);
- __ B(final_label);
- }
-
- // handle op1 == op2, min(+0.0,-0.0), NaN input.
- __ Bind(&handle_nan_eq);
- __ Vorr(F64, out, op1, op2); // assemble op1/-0.0/NaN.
-
- if (done.IsReferenced()) {
- __ Bind(&done);
- }
-}
-
-void IntrinsicLocationsBuilderARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) {
- GenMinMaxDouble(invoke, /* is_min */ true , codegen_);
-}
-
-void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- GenMinMaxDouble(invoke, /* is_min */ false, codegen_);
-}
-
-static void GenMinMaxLong(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) {
- Location op1_loc = invoke->GetLocations()->InAt(0);
- Location op2_loc = invoke->GetLocations()->InAt(1);
- Location out_loc = invoke->GetLocations()->Out();
-
- // Optimization: don't generate any code if inputs are the same.
- if (op1_loc.Equals(op2_loc)) {
- DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder.
- return;
- }
-
- vixl32::Register op1_lo = LowRegisterFrom(op1_loc);
- vixl32::Register op1_hi = HighRegisterFrom(op1_loc);
- vixl32::Register op2_lo = LowRegisterFrom(op2_loc);
- vixl32::Register op2_hi = HighRegisterFrom(op2_loc);
- vixl32::Register out_lo = LowRegisterFrom(out_loc);
- vixl32::Register out_hi = HighRegisterFrom(out_loc);
- UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
- const vixl32::Register temp = temps.Acquire();
-
- DCHECK(op1_lo.Is(out_lo));
- DCHECK(op1_hi.Is(out_hi));
-
- // Compare op1 >= op2, or op1 < op2.
- __ Cmp(out_lo, op2_lo);
- __ Sbcs(temp, out_hi, op2_hi);
-
- // Now GE/LT condition code is correct for the long comparison.
- {
- vixl32::ConditionType cond = is_min ? ge : lt;
- ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
- 3 * kMaxInstructionSizeInBytes,
- CodeBufferCheckScope::kMaximumSize);
- __ itt(cond);
- __ mov(cond, out_lo, op2_lo);
- __ mov(cond, out_hi, op2_hi);
- }
-}
-
-static void CreateLongLongToLongLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
- locations->SetOut(Location::SameAsFirstInput());
-}
-
-void IntrinsicLocationsBuilderARMVIXL::VisitMathMinLongLong(HInvoke* invoke) {
- CreateLongLongToLongLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitMathMinLongLong(HInvoke* invoke) {
- GenMinMaxLong(invoke, /* is_min */ true, GetAssembler());
-}
-
-void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxLongLong(HInvoke* invoke) {
- CreateLongLongToLongLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxLongLong(HInvoke* invoke) {
- GenMinMaxLong(invoke, /* is_min */ false, GetAssembler());
-}
-
-static void GenMinMax(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) {
- vixl32::Register op1 = InputRegisterAt(invoke, 0);
- vixl32::Register op2 = InputRegisterAt(invoke, 1);
- vixl32::Register out = OutputRegister(invoke);
-
- __ Cmp(op1, op2);
-
- {
- ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
- 3 * kMaxInstructionSizeInBytes,
- CodeBufferCheckScope::kMaximumSize);
-
- __ ite(is_min ? lt : gt);
- __ mov(is_min ? lt : gt, out, op1);
- __ mov(is_min ? ge : le, out, op2);
- }
-}
-
-static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-}
-
-void IntrinsicLocationsBuilderARMVIXL::VisitMathMinIntInt(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitMathMinIntInt(HInvoke* invoke) {
- GenMinMax(invoke, /* is_min */ true, GetAssembler());
-}
-
-void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) {
- GenMinMax(invoke, /* is_min */ false, GetAssembler());
-}
-
void IntrinsicLocationsBuilderARMVIXL::VisitMathSqrt(HInvoke* invoke) {
CreateFPToFPLocations(allocator_, invoke);
}
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 6d6ff7576b..bc1292b2b7 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -58,6 +58,10 @@ inline bool IntrinsicCodeGeneratorMIPS::Is32BitFPU() const {
return codegen_->GetInstructionSetFeatures().Is32BitFloatingPoint();
}
+inline bool IntrinsicCodeGeneratorMIPS::HasMsa() const {
+ return codegen_->GetInstructionSetFeatures().HasMsa();
+}
+
#define __ codegen->GetAssembler()->
static void MoveFromReturnRegister(Location trg,
@@ -612,6 +616,7 @@ static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
static void GenBitCount(LocationSummary* locations,
DataType::Type type,
bool isR6,
+ bool hasMsa,
MipsAssembler* assembler) {
Register out = locations->Out().AsRegister<Register>();
@@ -637,85 +642,102 @@ static void GenBitCount(LocationSummary* locations,
// instructions compared to a loop-based algorithm which required 47
// instructions.
- if (type == DataType::Type::kInt32) {
- Register in = locations->InAt(0).AsRegister<Register>();
-
- __ Srl(TMP, in, 1);
- __ LoadConst32(AT, 0x55555555);
- __ And(TMP, TMP, AT);
- __ Subu(TMP, in, TMP);
- __ LoadConst32(AT, 0x33333333);
- __ And(out, TMP, AT);
- __ Srl(TMP, TMP, 2);
- __ And(TMP, TMP, AT);
- __ Addu(TMP, out, TMP);
- __ Srl(out, TMP, 4);
- __ Addu(out, out, TMP);
- __ LoadConst32(AT, 0x0F0F0F0F);
- __ And(out, out, AT);
- __ LoadConst32(TMP, 0x01010101);
- if (isR6) {
- __ MulR6(out, out, TMP);
+ if (hasMsa) {
+ if (type == DataType::Type::kInt32) {
+ Register in = locations->InAt(0).AsRegister<Register>();
+ __ Mtc1(in, FTMP);
+ __ PcntW(static_cast<VectorRegister>(FTMP), static_cast<VectorRegister>(FTMP));
+ __ Mfc1(out, FTMP);
} else {
- __ MulR2(out, out, TMP);
+ DCHECK_EQ(type, DataType::Type::kInt64);
+ Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+ Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+ __ Mtc1(in_lo, FTMP);
+ __ Mthc1(in_hi, FTMP);
+ __ PcntD(static_cast<VectorRegister>(FTMP), static_cast<VectorRegister>(FTMP));
+ __ Mfc1(out, FTMP);
}
- __ Srl(out, out, 24);
} else {
- DCHECK_EQ(type, DataType::Type::kInt64);
- Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
- Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
- Register tmp_hi = locations->GetTemp(0).AsRegister<Register>();
- Register out_hi = locations->GetTemp(1).AsRegister<Register>();
- Register tmp_lo = TMP;
- Register out_lo = out;
+ if (type == DataType::Type::kInt32) {
+ Register in = locations->InAt(0).AsRegister<Register>();
- __ Srl(tmp_lo, in_lo, 1);
- __ Srl(tmp_hi, in_hi, 1);
+ __ Srl(TMP, in, 1);
+ __ LoadConst32(AT, 0x55555555);
+ __ And(TMP, TMP, AT);
+ __ Subu(TMP, in, TMP);
+ __ LoadConst32(AT, 0x33333333);
+ __ And(out, TMP, AT);
+ __ Srl(TMP, TMP, 2);
+ __ And(TMP, TMP, AT);
+ __ Addu(TMP, out, TMP);
+ __ Srl(out, TMP, 4);
+ __ Addu(out, out, TMP);
+ __ LoadConst32(AT, 0x0F0F0F0F);
+ __ And(out, out, AT);
+ __ LoadConst32(TMP, 0x01010101);
+ if (isR6) {
+ __ MulR6(out, out, TMP);
+ } else {
+ __ MulR2(out, out, TMP);
+ }
+ __ Srl(out, out, 24);
+ } else {
+ DCHECK_EQ(type, DataType::Type::kInt64);
+ Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+ Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+ Register tmp_hi = locations->GetTemp(0).AsRegister<Register>();
+ Register out_hi = locations->GetTemp(1).AsRegister<Register>();
+ Register tmp_lo = TMP;
+ Register out_lo = out;
- __ LoadConst32(AT, 0x55555555);
+ __ Srl(tmp_lo, in_lo, 1);
+ __ Srl(tmp_hi, in_hi, 1);
- __ And(tmp_lo, tmp_lo, AT);
- __ Subu(tmp_lo, in_lo, tmp_lo);
+ __ LoadConst32(AT, 0x55555555);
- __ And(tmp_hi, tmp_hi, AT);
- __ Subu(tmp_hi, in_hi, tmp_hi);
+ __ And(tmp_lo, tmp_lo, AT);
+ __ Subu(tmp_lo, in_lo, tmp_lo);
- __ LoadConst32(AT, 0x33333333);
+ __ And(tmp_hi, tmp_hi, AT);
+ __ Subu(tmp_hi, in_hi, tmp_hi);
- __ And(out_lo, tmp_lo, AT);
- __ Srl(tmp_lo, tmp_lo, 2);
- __ And(tmp_lo, tmp_lo, AT);
- __ Addu(tmp_lo, out_lo, tmp_lo);
+ __ LoadConst32(AT, 0x33333333);
- __ And(out_hi, tmp_hi, AT);
- __ Srl(tmp_hi, tmp_hi, 2);
- __ And(tmp_hi, tmp_hi, AT);
- __ Addu(tmp_hi, out_hi, tmp_hi);
+ __ And(out_lo, tmp_lo, AT);
+ __ Srl(tmp_lo, tmp_lo, 2);
+ __ And(tmp_lo, tmp_lo, AT);
+ __ Addu(tmp_lo, out_lo, tmp_lo);
- // Here we deviate from the original algorithm a bit. We've reached
- // the stage where the bitfields holding the subtotals are large
- // enough to hold the combined subtotals for both the low word, and
- // the high word. This means that we can add the subtotals for the
- // the high, and low words into a single word, and compute the final
- // result for both the high, and low words using fewer instructions.
- __ LoadConst32(AT, 0x0F0F0F0F);
+ __ And(out_hi, tmp_hi, AT);
+ __ Srl(tmp_hi, tmp_hi, 2);
+ __ And(tmp_hi, tmp_hi, AT);
+ __ Addu(tmp_hi, out_hi, tmp_hi);
- __ Addu(TMP, tmp_hi, tmp_lo);
+ // Here we deviate from the original algorithm a bit. We've reached
+ // the stage where the bitfields holding the subtotals are large
+ // enough to hold the combined subtotals for both the low word, and
+ // the high word. This means that we can add the subtotals for the
+ // the high, and low words into a single word, and compute the final
+ // result for both the high, and low words using fewer instructions.
+ __ LoadConst32(AT, 0x0F0F0F0F);
- __ Srl(out, TMP, 4);
- __ And(out, out, AT);
- __ And(TMP, TMP, AT);
- __ Addu(out, out, TMP);
+ __ Addu(TMP, tmp_hi, tmp_lo);
- __ LoadConst32(AT, 0x01010101);
+ __ Srl(out, TMP, 4);
+ __ And(out, out, AT);
+ __ And(TMP, TMP, AT);
+ __ Addu(out, out, TMP);
- if (isR6) {
- __ MulR6(out, out, AT);
- } else {
- __ MulR2(out, out, AT);
- }
+ __ LoadConst32(AT, 0x01010101);
- __ Srl(out, out, 24);
+ if (isR6) {
+ __ MulR6(out, out, AT);
+ } else {
+ __ MulR2(out, out, AT);
+ }
+
+ __ Srl(out, out, 24);
+ }
}
}
@@ -725,7 +747,7 @@ void IntrinsicLocationsBuilderMIPS::VisitIntegerBitCount(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorMIPS::VisitIntegerBitCount(HInvoke* invoke) {
- GenBitCount(invoke->GetLocations(), DataType::Type::kInt32, IsR6(), GetAssembler());
+ GenBitCount(invoke->GetLocations(), DataType::Type::kInt32, IsR6(), HasMsa(), GetAssembler());
}
// int java.lang.Long.bitCount(int)
@@ -739,459 +761,7 @@ void IntrinsicLocationsBuilderMIPS::VisitLongBitCount(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorMIPS::VisitLongBitCount(HInvoke* invoke) {
- GenBitCount(invoke->GetLocations(), DataType::Type::kInt64, IsR6(), GetAssembler());
-}
-
-static void GenMinMaxFP(LocationSummary* locations,
- bool is_min,
- DataType::Type type,
- bool is_R6,
- MipsAssembler* assembler) {
- FRegister out = locations->Out().AsFpuRegister<FRegister>();
- FRegister a = locations->InAt(0).AsFpuRegister<FRegister>();
- FRegister b = locations->InAt(1).AsFpuRegister<FRegister>();
-
- if (is_R6) {
- MipsLabel noNaNs;
- MipsLabel done;
- FRegister ftmp = ((out != a) && (out != b)) ? out : FTMP;
-
- // When Java computes min/max it prefers a NaN to a number; the
- // behavior of MIPSR6 is to prefer numbers to NaNs, i.e., if one of
- // the inputs is a NaN and the other is a valid number, the MIPS
- // instruction will return the number; Java wants the NaN value
- // returned. This is why there is extra logic preceding the use of
- // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a
- // NaN, return the NaN, otherwise return the min/max.
- if (type == DataType::Type::kFloat64) {
- __ CmpUnD(FTMP, a, b);
- __ Bc1eqz(FTMP, &noNaNs);
-
- // One of the inputs is a NaN
- __ CmpEqD(ftmp, a, a);
- // If a == a then b is the NaN, otherwise a is the NaN.
- __ SelD(ftmp, a, b);
-
- if (ftmp != out) {
- __ MovD(out, ftmp);
- }
-
- __ B(&done);
-
- __ Bind(&noNaNs);
-
- if (is_min) {
- __ MinD(out, a, b);
- } else {
- __ MaxD(out, a, b);
- }
- } else {
- DCHECK_EQ(type, DataType::Type::kFloat32);
- __ CmpUnS(FTMP, a, b);
- __ Bc1eqz(FTMP, &noNaNs);
-
- // One of the inputs is a NaN
- __ CmpEqS(ftmp, a, a);
- // If a == a then b is the NaN, otherwise a is the NaN.
- __ SelS(ftmp, a, b);
-
- if (ftmp != out) {
- __ MovS(out, ftmp);
- }
-
- __ B(&done);
-
- __ Bind(&noNaNs);
-
- if (is_min) {
- __ MinS(out, a, b);
- } else {
- __ MaxS(out, a, b);
- }
- }
-
- __ Bind(&done);
- } else {
- MipsLabel ordered;
- MipsLabel compare;
- MipsLabel select;
- MipsLabel done;
-
- if (type == DataType::Type::kFloat64) {
- __ CunD(a, b);
- } else {
- DCHECK_EQ(type, DataType::Type::kFloat32);
- __ CunS(a, b);
- }
- __ Bc1f(&ordered);
-
- // a or b (or both) is a NaN. Return one, which is a NaN.
- if (type == DataType::Type::kFloat64) {
- __ CeqD(b, b);
- } else {
- __ CeqS(b, b);
- }
- __ B(&select);
-
- __ Bind(&ordered);
-
- // Neither is a NaN.
- // a == b? (-0.0 compares equal with +0.0)
- // If equal, handle zeroes, else compare further.
- if (type == DataType::Type::kFloat64) {
- __ CeqD(a, b);
- } else {
- __ CeqS(a, b);
- }
- __ Bc1f(&compare);
-
- // a == b either bit for bit or one is -0.0 and the other is +0.0.
- if (type == DataType::Type::kFloat64) {
- __ MoveFromFpuHigh(TMP, a);
- __ MoveFromFpuHigh(AT, b);
- } else {
- __ Mfc1(TMP, a);
- __ Mfc1(AT, b);
- }
-
- if (is_min) {
- // -0.0 prevails over +0.0.
- __ Or(TMP, TMP, AT);
- } else {
- // +0.0 prevails over -0.0.
- __ And(TMP, TMP, AT);
- }
-
- if (type == DataType::Type::kFloat64) {
- __ Mfc1(AT, a);
- __ Mtc1(AT, out);
- __ MoveToFpuHigh(TMP, out);
- } else {
- __ Mtc1(TMP, out);
- }
- __ B(&done);
-
- __ Bind(&compare);
-
- if (type == DataType::Type::kFloat64) {
- if (is_min) {
- // return (a <= b) ? a : b;
- __ ColeD(a, b);
- } else {
- // return (a >= b) ? a : b;
- __ ColeD(b, a); // b <= a
- }
- } else {
- if (is_min) {
- // return (a <= b) ? a : b;
- __ ColeS(a, b);
- } else {
- // return (a >= b) ? a : b;
- __ ColeS(b, a); // b <= a
- }
- }
-
- __ Bind(&select);
-
- if (type == DataType::Type::kFloat64) {
- __ MovtD(out, a);
- __ MovfD(out, b);
- } else {
- __ MovtS(out, a);
- __ MovfS(out, b);
- }
-
- __ Bind(&done);
- }
-}
-
-static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
- locations->SetOut(Location::RequiresFpuRegister(), Location::kOutputOverlap);
-}
-
-// double java.lang.Math.min(double, double)
-void IntrinsicLocationsBuilderMIPS::VisitMathMinDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitMathMinDoubleDouble(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(),
- /* is_min */ true,
- DataType::Type::kFloat64,
- IsR6(),
- GetAssembler());
-}
-
-// float java.lang.Math.min(float, float)
-void IntrinsicLocationsBuilderMIPS::VisitMathMinFloatFloat(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitMathMinFloatFloat(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(),
- /* is_min */ true,
- DataType::Type::kFloat32,
- IsR6(),
- GetAssembler());
-}
-
-// double java.lang.Math.max(double, double)
-void IntrinsicLocationsBuilderMIPS::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(),
- /* is_min */ false,
- DataType::Type::kFloat64,
- IsR6(),
- GetAssembler());
-}
-
-// float java.lang.Math.max(float, float)
-void IntrinsicLocationsBuilderMIPS::VisitMathMaxFloatFloat(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitMathMaxFloatFloat(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(),
- /* is_min */ false,
- DataType::Type::kFloat32,
- IsR6(),
- GetAssembler());
-}
-
-static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-}
-
-static void GenMinMax(LocationSummary* locations,
- bool is_min,
- DataType::Type type,
- bool is_R6,
- MipsAssembler* assembler) {
- if (is_R6) {
- // Some architectures, such as ARM and MIPS (prior to r6), have a
- // conditional move instruction which only changes the target
- // (output) register if the condition is true (MIPS prior to r6 had
- // MOVF, MOVT, MOVN, and MOVZ). The SELEQZ and SELNEZ instructions
- // always change the target (output) register. If the condition is
- // true the output register gets the contents of the "rs" register;
- // otherwise, the output register is set to zero. One consequence
- // of this is that to implement something like "rd = c==0 ? rs : rt"
- // MIPS64r6 needs to use a pair of SELEQZ/SELNEZ instructions.
- // After executing this pair of instructions one of the output
- // registers from the pair will necessarily contain zero. Then the
- // code ORs the output registers from the SELEQZ/SELNEZ instructions
- // to get the final result.
- //
- // The initial test to see if the output register is same as the
- // first input register is needed to make sure that value in the
- // first input register isn't clobbered before we've finished
- // computing the output value. The logic in the corresponding else
- // clause performs the same task but makes sure the second input
- // register isn't clobbered in the event that it's the same register
- // as the output register; the else clause also handles the case
- // where the output register is distinct from both the first, and the
- // second input registers.
- if (type == DataType::Type::kInt64) {
- Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>();
- Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
- Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>();
- Register b_hi = locations->InAt(1).AsRegisterPairHigh<Register>();
- Register out_lo = locations->Out().AsRegisterPairLow<Register>();
- Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
-
- MipsLabel compare_done;
-
- if (a_lo == b_lo) {
- if (out_lo != a_lo) {
- __ Move(out_lo, a_lo);
- __ Move(out_hi, a_hi);
- }
- } else {
- __ Slt(TMP, b_hi, a_hi);
- __ Bne(b_hi, a_hi, &compare_done);
-
- __ Sltu(TMP, b_lo, a_lo);
-
- __ Bind(&compare_done);
-
- if (is_min) {
- __ Seleqz(AT, a_lo, TMP);
- __ Selnez(out_lo, b_lo, TMP); // Safe even if out_lo == a_lo/b_lo
- // because at this point we're
- // done using a_lo/b_lo.
- } else {
- __ Selnez(AT, a_lo, TMP);
- __ Seleqz(out_lo, b_lo, TMP); // ditto
- }
- __ Or(out_lo, out_lo, AT);
- if (is_min) {
- __ Seleqz(AT, a_hi, TMP);
- __ Selnez(out_hi, b_hi, TMP); // ditto but for out_hi & a_hi/b_hi
- } else {
- __ Selnez(AT, a_hi, TMP);
- __ Seleqz(out_hi, b_hi, TMP); // ditto but for out_hi & a_hi/b_hi
- }
- __ Or(out_hi, out_hi, AT);
- }
- } else {
- DCHECK_EQ(type, DataType::Type::kInt32);
- Register a = locations->InAt(0).AsRegister<Register>();
- Register b = locations->InAt(1).AsRegister<Register>();
- Register out = locations->Out().AsRegister<Register>();
-
- if (a == b) {
- if (out != a) {
- __ Move(out, a);
- }
- } else {
- __ Slt(AT, b, a);
- if (is_min) {
- __ Seleqz(TMP, a, AT);
- __ Selnez(AT, b, AT);
- } else {
- __ Selnez(TMP, a, AT);
- __ Seleqz(AT, b, AT);
- }
- __ Or(out, TMP, AT);
- }
- }
- } else {
- if (type == DataType::Type::kInt64) {
- Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>();
- Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
- Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>();
- Register b_hi = locations->InAt(1).AsRegisterPairHigh<Register>();
- Register out_lo = locations->Out().AsRegisterPairLow<Register>();
- Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
-
- MipsLabel compare_done;
-
- if (a_lo == b_lo) {
- if (out_lo != a_lo) {
- __ Move(out_lo, a_lo);
- __ Move(out_hi, a_hi);
- }
- } else {
- __ Slt(TMP, a_hi, b_hi);
- __ Bne(a_hi, b_hi, &compare_done);
-
- __ Sltu(TMP, a_lo, b_lo);
-
- __ Bind(&compare_done);
-
- if (is_min) {
- if (out_lo != a_lo) {
- __ Movn(out_hi, a_hi, TMP);
- __ Movn(out_lo, a_lo, TMP);
- }
- if (out_lo != b_lo) {
- __ Movz(out_hi, b_hi, TMP);
- __ Movz(out_lo, b_lo, TMP);
- }
- } else {
- if (out_lo != a_lo) {
- __ Movz(out_hi, a_hi, TMP);
- __ Movz(out_lo, a_lo, TMP);
- }
- if (out_lo != b_lo) {
- __ Movn(out_hi, b_hi, TMP);
- __ Movn(out_lo, b_lo, TMP);
- }
- }
- }
- } else {
- DCHECK_EQ(type, DataType::Type::kInt32);
- Register a = locations->InAt(0).AsRegister<Register>();
- Register b = locations->InAt(1).AsRegister<Register>();
- Register out = locations->Out().AsRegister<Register>();
-
- if (a == b) {
- if (out != a) {
- __ Move(out, a);
- }
- } else {
- __ Slt(AT, a, b);
- if (is_min) {
- if (out != a) {
- __ Movn(out, a, AT);
- }
- if (out != b) {
- __ Movz(out, b, AT);
- }
- } else {
- if (out != a) {
- __ Movz(out, a, AT);
- }
- if (out != b) {
- __ Movn(out, b, AT);
- }
- }
- }
- }
- }
-}
-
-// int java.lang.Math.min(int, int)
-void IntrinsicLocationsBuilderMIPS::VisitMathMinIntInt(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitMathMinIntInt(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(),
- /* is_min */ true,
- DataType::Type::kInt32,
- IsR6(),
- GetAssembler());
-}
-
-// long java.lang.Math.min(long, long)
-void IntrinsicLocationsBuilderMIPS::VisitMathMinLongLong(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitMathMinLongLong(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(),
- /* is_min */ true,
- DataType::Type::kInt64,
- IsR6(),
- GetAssembler());
-}
-
-// int java.lang.Math.max(int, int)
-void IntrinsicLocationsBuilderMIPS::VisitMathMaxIntInt(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitMathMaxIntInt(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(),
- /* is_min */ false,
- DataType::Type::kInt32,
- IsR6(),
- GetAssembler());
-}
-
-// long java.lang.Math.max(long, long)
-void IntrinsicLocationsBuilderMIPS::VisitMathMaxLongLong(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitMathMaxLongLong(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(),
- /* is_min */ false,
- DataType::Type::kInt64,
- IsR6(),
- GetAssembler());
+ GenBitCount(invoke->GetLocations(), DataType::Type::kInt64, IsR6(), HasMsa(), GetAssembler());
}
// double java.lang.Math.sqrt(double)
diff --git a/compiler/optimizing/intrinsics_mips.h b/compiler/optimizing/intrinsics_mips.h
index 13397f11d4..1c1ba40132 100644
--- a/compiler/optimizing/intrinsics_mips.h
+++ b/compiler/optimizing/intrinsics_mips.h
@@ -71,6 +71,7 @@ class IntrinsicCodeGeneratorMIPS FINAL : public IntrinsicVisitor {
bool IsR2OrNewer() const;
bool IsR6() const;
bool Is32BitFPU() const;
+ bool HasMsa() const;
private:
MipsAssembler* GetAssembler();
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 5debd26c5a..f429afde2c 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -46,6 +46,10 @@ ArenaAllocator* IntrinsicCodeGeneratorMIPS64::GetAllocator() {
return codegen_->GetGraph()->GetAllocator();
}
+inline bool IntrinsicCodeGeneratorMIPS64::HasMsa() const {
+ return codegen_->GetInstructionSetFeatures().HasMsa();
+}
+
#define __ codegen->GetAssembler()->
static void MoveFromReturnRegister(Location trg,
@@ -386,6 +390,7 @@ static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
static void GenBitCount(LocationSummary* locations,
const DataType::Type type,
+ const bool hasMsa,
Mips64Assembler* assembler) {
GpuRegister out = locations->Out().AsRegister<GpuRegister>();
GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>();
@@ -414,41 +419,52 @@ static void GenBitCount(LocationSummary* locations,
// bits are set but the algorithm here attempts to minimize the total
// number of instructions executed even when a large number of bits
// are set.
-
- if (type == DataType::Type::kInt32) {
- __ Srl(TMP, in, 1);
- __ LoadConst32(AT, 0x55555555);
- __ And(TMP, TMP, AT);
- __ Subu(TMP, in, TMP);
- __ LoadConst32(AT, 0x33333333);
- __ And(out, TMP, AT);
- __ Srl(TMP, TMP, 2);
- __ And(TMP, TMP, AT);
- __ Addu(TMP, out, TMP);
- __ Srl(out, TMP, 4);
- __ Addu(out, out, TMP);
- __ LoadConst32(AT, 0x0F0F0F0F);
- __ And(out, out, AT);
- __ LoadConst32(TMP, 0x01010101);
- __ MulR6(out, out, TMP);
- __ Srl(out, out, 24);
- } else if (type == DataType::Type::kInt64) {
- __ Dsrl(TMP, in, 1);
- __ LoadConst64(AT, 0x5555555555555555L);
- __ And(TMP, TMP, AT);
- __ Dsubu(TMP, in, TMP);
- __ LoadConst64(AT, 0x3333333333333333L);
- __ And(out, TMP, AT);
- __ Dsrl(TMP, TMP, 2);
- __ And(TMP, TMP, AT);
- __ Daddu(TMP, out, TMP);
- __ Dsrl(out, TMP, 4);
- __ Daddu(out, out, TMP);
- __ LoadConst64(AT, 0x0F0F0F0F0F0F0F0FL);
- __ And(out, out, AT);
- __ LoadConst64(TMP, 0x0101010101010101L);
- __ Dmul(out, out, TMP);
- __ Dsrl32(out, out, 24);
+ if (hasMsa) {
+ if (type == DataType::Type::kInt32) {
+ __ Mtc1(in, FTMP);
+ __ PcntW(static_cast<VectorRegister>(FTMP), static_cast<VectorRegister>(FTMP));
+ __ Mfc1(out, FTMP);
+ } else {
+ __ Dmtc1(in, FTMP);
+ __ PcntD(static_cast<VectorRegister>(FTMP), static_cast<VectorRegister>(FTMP));
+ __ Dmfc1(out, FTMP);
+ }
+ } else {
+ if (type == DataType::Type::kInt32) {
+ __ Srl(TMP, in, 1);
+ __ LoadConst32(AT, 0x55555555);
+ __ And(TMP, TMP, AT);
+ __ Subu(TMP, in, TMP);
+ __ LoadConst32(AT, 0x33333333);
+ __ And(out, TMP, AT);
+ __ Srl(TMP, TMP, 2);
+ __ And(TMP, TMP, AT);
+ __ Addu(TMP, out, TMP);
+ __ Srl(out, TMP, 4);
+ __ Addu(out, out, TMP);
+ __ LoadConst32(AT, 0x0F0F0F0F);
+ __ And(out, out, AT);
+ __ LoadConst32(TMP, 0x01010101);
+ __ MulR6(out, out, TMP);
+ __ Srl(out, out, 24);
+ } else {
+ __ Dsrl(TMP, in, 1);
+ __ LoadConst64(AT, 0x5555555555555555L);
+ __ And(TMP, TMP, AT);
+ __ Dsubu(TMP, in, TMP);
+ __ LoadConst64(AT, 0x3333333333333333L);
+ __ And(out, TMP, AT);
+ __ Dsrl(TMP, TMP, 2);
+ __ And(TMP, TMP, AT);
+ __ Daddu(TMP, out, TMP);
+ __ Dsrl(out, TMP, 4);
+ __ Daddu(out, out, TMP);
+ __ LoadConst64(AT, 0x0F0F0F0F0F0F0F0FL);
+ __ And(out, out, AT);
+ __ LoadConst64(TMP, 0x0101010101010101L);
+ __ Dmul(out, out, TMP);
+ __ Dsrl32(out, out, 24);
+ }
}
}
@@ -458,7 +474,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitIntegerBitCount(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorMIPS64::VisitIntegerBitCount(HInvoke* invoke) {
- GenBitCount(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
+ GenBitCount(invoke->GetLocations(), DataType::Type::kInt32, HasMsa(), GetAssembler());
}
// int java.lang.Long.bitCount(long)
@@ -467,222 +483,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitLongBitCount(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorMIPS64::VisitLongBitCount(HInvoke* invoke) {
- GenBitCount(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
-}
-
-static void GenMinMaxFP(LocationSummary* locations,
- bool is_min,
- DataType::Type type,
- Mips64Assembler* assembler) {
- FpuRegister a = locations->InAt(0).AsFpuRegister<FpuRegister>();
- FpuRegister b = locations->InAt(1).AsFpuRegister<FpuRegister>();
- FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
-
- Mips64Label noNaNs;
- Mips64Label done;
- FpuRegister ftmp = ((out != a) && (out != b)) ? out : FTMP;
-
- // When Java computes min/max it prefers a NaN to a number; the
- // behavior of MIPSR6 is to prefer numbers to NaNs, i.e., if one of
- // the inputs is a NaN and the other is a valid number, the MIPS
- // instruction will return the number; Java wants the NaN value
- // returned. This is why there is extra logic preceding the use of
- // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a
- // NaN, return the NaN, otherwise return the min/max.
- if (type == DataType::Type::kFloat64) {
- __ CmpUnD(FTMP, a, b);
- __ Bc1eqz(FTMP, &noNaNs);
-
- // One of the inputs is a NaN
- __ CmpEqD(ftmp, a, a);
- // If a == a then b is the NaN, otherwise a is the NaN.
- __ SelD(ftmp, a, b);
-
- if (ftmp != out) {
- __ MovD(out, ftmp);
- }
-
- __ Bc(&done);
-
- __ Bind(&noNaNs);
-
- if (is_min) {
- __ MinD(out, a, b);
- } else {
- __ MaxD(out, a, b);
- }
- } else {
- DCHECK_EQ(type, DataType::Type::kFloat32);
- __ CmpUnS(FTMP, a, b);
- __ Bc1eqz(FTMP, &noNaNs);
-
- // One of the inputs is a NaN
- __ CmpEqS(ftmp, a, a);
- // If a == a then b is the NaN, otherwise a is the NaN.
- __ SelS(ftmp, a, b);
-
- if (ftmp != out) {
- __ MovS(out, ftmp);
- }
-
- __ Bc(&done);
-
- __ Bind(&noNaNs);
-
- if (is_min) {
- __ MinS(out, a, b);
- } else {
- __ MaxS(out, a, b);
- }
- }
-
- __ Bind(&done);
-}
-
-static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
- locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
-}
-
-// double java.lang.Math.min(double, double)
-void IntrinsicLocationsBuilderMIPS64::VisitMathMinDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitMathMinDoubleDouble(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, DataType::Type::kFloat64, GetAssembler());
-}
-
-// float java.lang.Math.min(float, float)
-void IntrinsicLocationsBuilderMIPS64::VisitMathMinFloatFloat(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitMathMinFloatFloat(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, DataType::Type::kFloat32, GetAssembler());
-}
-
-// double java.lang.Math.max(double, double)
-void IntrinsicLocationsBuilderMIPS64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, DataType::Type::kFloat64, GetAssembler());
-}
-
-// float java.lang.Math.max(float, float)
-void IntrinsicLocationsBuilderMIPS64::VisitMathMaxFloatFloat(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitMathMaxFloatFloat(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, DataType::Type::kFloat32, GetAssembler());
-}
-
-static void GenMinMax(LocationSummary* locations,
- bool is_min,
- Mips64Assembler* assembler) {
- GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
- GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>();
- GpuRegister out = locations->Out().AsRegister<GpuRegister>();
-
- if (lhs == rhs) {
- if (out != lhs) {
- __ Move(out, lhs);
- }
- } else {
- // Some architectures, such as ARM and MIPS (prior to r6), have a
- // conditional move instruction which only changes the target
- // (output) register if the condition is true (MIPS prior to r6 had
- // MOVF, MOVT, and MOVZ). The SELEQZ and SELNEZ instructions always
- // change the target (output) register. If the condition is true the
- // output register gets the contents of the "rs" register; otherwise,
- // the output register is set to zero. One consequence of this is
- // that to implement something like "rd = c==0 ? rs : rt" MIPS64r6
- // needs to use a pair of SELEQZ/SELNEZ instructions. After
- // executing this pair of instructions one of the output registers
- // from the pair will necessarily contain zero. Then the code ORs the
- // output registers from the SELEQZ/SELNEZ instructions to get the
- // final result.
- //
- // The initial test to see if the output register is same as the
- // first input register is needed to make sure that value in the
- // first input register isn't clobbered before we've finished
- // computing the output value. The logic in the corresponding else
- // clause performs the same task but makes sure the second input
- // register isn't clobbered in the event that it's the same register
- // as the output register; the else clause also handles the case
- // where the output register is distinct from both the first, and the
- // second input registers.
- if (out == lhs) {
- __ Slt(AT, rhs, lhs);
- if (is_min) {
- __ Seleqz(out, lhs, AT);
- __ Selnez(AT, rhs, AT);
- } else {
- __ Selnez(out, lhs, AT);
- __ Seleqz(AT, rhs, AT);
- }
- } else {
- __ Slt(AT, lhs, rhs);
- if (is_min) {
- __ Seleqz(out, rhs, AT);
- __ Selnez(AT, lhs, AT);
- } else {
- __ Selnez(out, rhs, AT);
- __ Seleqz(AT, lhs, AT);
- }
- }
- __ Or(out, out, AT);
- }
-}
-
-static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-}
-
-// int java.lang.Math.min(int, int)
-void IntrinsicLocationsBuilderMIPS64::VisitMathMinIntInt(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitMathMinIntInt(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ true, GetAssembler());
-}
-
-// long java.lang.Math.min(long, long)
-void IntrinsicLocationsBuilderMIPS64::VisitMathMinLongLong(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitMathMinLongLong(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ true, GetAssembler());
-}
-
-// int java.lang.Math.max(int, int)
-void IntrinsicLocationsBuilderMIPS64::VisitMathMaxIntInt(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitMathMaxIntInt(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ false, GetAssembler());
-}
-
-// long java.lang.Math.max(long, long)
-void IntrinsicLocationsBuilderMIPS64::VisitMathMaxLongLong(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitMathMaxLongLong(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ false, GetAssembler());
+ GenBitCount(invoke->GetLocations(), DataType::Type::kInt64, HasMsa(), GetAssembler());
}
// double java.lang.Math.sqrt(double)
diff --git a/compiler/optimizing/intrinsics_mips64.h b/compiler/optimizing/intrinsics_mips64.h
index 6f40d90ddb..748b0b02b2 100644
--- a/compiler/optimizing/intrinsics_mips64.h
+++ b/compiler/optimizing/intrinsics_mips64.h
@@ -68,6 +68,8 @@ class IntrinsicCodeGeneratorMIPS64 FINAL : public IntrinsicVisitor {
#undef INTRINSICS_LIST
#undef OPTIMIZING_INTRINSICS
+ bool HasMsa() const;
+
private:
Mips64Assembler* GetAssembler();
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 0edc061e97..c4f322bf0c 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -40,11 +40,6 @@ namespace art {
namespace x86 {
-static constexpr int kDoubleNaNHigh = 0x7FF80000;
-static constexpr int kDoubleNaNLow = 0x00000000;
-static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000);
-static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000);
-
IntrinsicLocationsBuilderX86::IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen)
: allocator_(codegen->GetGraph()->GetAllocator()),
codegen_(codegen) {
@@ -333,278 +328,6 @@ void IntrinsicCodeGeneratorX86::VisitShortReverseBytes(HInvoke* invoke) {
GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
}
-static void GenMinMaxFP(HInvoke* invoke,
- bool is_min,
- bool is_double,
- X86Assembler* assembler,
- CodeGeneratorX86* codegen) {
- LocationSummary* locations = invoke->GetLocations();
- Location op1_loc = locations->InAt(0);
- Location op2_loc = locations->InAt(1);
- Location out_loc = locations->Out();
- XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
-
- // Shortcut for same input locations.
- if (op1_loc.Equals(op2_loc)) {
- DCHECK(out_loc.Equals(op1_loc));
- return;
- }
-
- // (out := op1)
- // out <=? op2
- // if Nan jmp Nan_label
- // if out is min jmp done
- // if op2 is min jmp op2_label
- // handle -0/+0
- // jmp done
- // Nan_label:
- // out := NaN
- // op2_label:
- // out := op2
- // done:
- //
- // This removes one jmp, but needs to copy one input (op1) to out.
- //
- // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
-
- XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
-
- NearLabel nan, done, op2_label;
- if (is_double) {
- __ ucomisd(out, op2);
- } else {
- __ ucomiss(out, op2);
- }
-
- __ j(Condition::kParityEven, &nan);
-
- __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
- __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
-
- // Handle 0.0/-0.0.
- if (is_min) {
- if (is_double) {
- __ orpd(out, op2);
- } else {
- __ orps(out, op2);
- }
- } else {
- if (is_double) {
- __ andpd(out, op2);
- } else {
- __ andps(out, op2);
- }
- }
- __ jmp(&done);
-
- // NaN handling.
- __ Bind(&nan);
- // Do we have a constant area pointer?
- if (locations->GetInputCount() == 3 && locations->InAt(2).IsValid()) {
- HX86ComputeBaseMethodAddress* method_address =
- invoke->InputAt(2)->AsX86ComputeBaseMethodAddress();
- DCHECK(locations->InAt(2).IsRegister());
- Register constant_area = locations->InAt(2).AsRegister<Register>();
- if (is_double) {
- __ movsd(out, codegen->LiteralInt64Address(kDoubleNaN, method_address, constant_area));
- } else {
- __ movss(out, codegen->LiteralInt32Address(kFloatNaN, method_address, constant_area));
- }
- } else {
- if (is_double) {
- __ pushl(Immediate(kDoubleNaNHigh));
- __ pushl(Immediate(kDoubleNaNLow));
- __ movsd(out, Address(ESP, 0));
- __ addl(ESP, Immediate(8));
- } else {
- __ pushl(Immediate(kFloatNaN));
- __ movss(out, Address(ESP, 0));
- __ addl(ESP, Immediate(4));
- }
- }
- __ jmp(&done);
-
- // out := op2;
- __ Bind(&op2_label);
- if (is_double) {
- __ movsd(out, op2);
- } else {
- __ movss(out, op2);
- }
-
- // Done.
- __ Bind(&done);
-}
-
-static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
- // The following is sub-optimal, but all we can do for now. It would be fine to also accept
- // the second input to be the output (we can simply swap inputs).
- locations->SetOut(Location::SameAsFirstInput());
- HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
- DCHECK(static_or_direct != nullptr);
- if (static_or_direct->HasSpecialInput() &&
- invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
- locations->SetInAt(2, Location::RequiresRegister());
- }
-}
-
-void IntrinsicLocationsBuilderX86::VisitMathMinDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitMathMinDoubleDouble(HInvoke* invoke) {
- GenMinMaxFP(invoke,
- /* is_min */ true,
- /* is_double */ true,
- GetAssembler(),
- codegen_);
-}
-
-void IntrinsicLocationsBuilderX86::VisitMathMinFloatFloat(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitMathMinFloatFloat(HInvoke* invoke) {
- GenMinMaxFP(invoke,
- /* is_min */ true,
- /* is_double */ false,
- GetAssembler(),
- codegen_);
-}
-
-void IntrinsicLocationsBuilderX86::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- GenMinMaxFP(invoke,
- /* is_min */ false,
- /* is_double */ true,
- GetAssembler(),
- codegen_);
-}
-
-void IntrinsicLocationsBuilderX86::VisitMathMaxFloatFloat(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitMathMaxFloatFloat(HInvoke* invoke) {
- GenMinMaxFP(invoke,
- /* is_min */ false,
- /* is_double */ false,
- GetAssembler(),
- codegen_);
-}
-
-static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
- X86Assembler* assembler) {
- Location op1_loc = locations->InAt(0);
- Location op2_loc = locations->InAt(1);
-
- // Shortcut for same input locations.
- if (op1_loc.Equals(op2_loc)) {
- // Can return immediately, as op1_loc == out_loc.
- // Note: if we ever support separate registers, e.g., output into memory, we need to check for
- // a copy here.
- DCHECK(locations->Out().Equals(op1_loc));
- return;
- }
-
- if (is_long) {
- // Need to perform a subtract to get the sign right.
- // op1 is already in the same location as the output.
- Location output = locations->Out();
- Register output_lo = output.AsRegisterPairLow<Register>();
- Register output_hi = output.AsRegisterPairHigh<Register>();
-
- Register op2_lo = op2_loc.AsRegisterPairLow<Register>();
- Register op2_hi = op2_loc.AsRegisterPairHigh<Register>();
-
- // Spare register to compute the subtraction to set condition code.
- Register temp = locations->GetTemp(0).AsRegister<Register>();
-
- // Subtract off op2_low.
- __ movl(temp, output_lo);
- __ subl(temp, op2_lo);
-
- // Now use the same tempo and the borrow to finish the subtraction of op2_hi.
- __ movl(temp, output_hi);
- __ sbbl(temp, op2_hi);
-
- // Now the condition code is correct.
- Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess;
- __ cmovl(cond, output_lo, op2_lo);
- __ cmovl(cond, output_hi, op2_hi);
- } else {
- Register out = locations->Out().AsRegister<Register>();
- Register op2 = op2_loc.AsRegister<Register>();
-
- // (out := op1)
- // out <=? op2
- // if out is min jmp done
- // out := op2
- // done:
-
- __ cmpl(out, op2);
- Condition cond = is_min ? Condition::kGreater : Condition::kLess;
- __ cmovl(cond, out, op2);
- }
-}
-
-static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
- locations->SetOut(Location::SameAsFirstInput());
-}
-
-static void CreateLongLongToLongLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
- locations->SetOut(Location::SameAsFirstInput());
- // Register to use to perform a long subtract to set cc.
- locations->AddTemp(Location::RequiresRegister());
-}
-
-void IntrinsicLocationsBuilderX86::VisitMathMinIntInt(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitMathMinIntInt(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetAssembler());
-}
-
-void IntrinsicLocationsBuilderX86::VisitMathMinLongLong(HInvoke* invoke) {
- CreateLongLongToLongLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitMathMinLongLong(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetAssembler());
-}
-
-void IntrinsicLocationsBuilderX86::VisitMathMaxIntInt(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitMathMaxIntInt(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetAssembler());
-}
-
-void IntrinsicLocationsBuilderX86::VisitMathMaxLongLong(HInvoke* invoke) {
- CreateLongLongToLongLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitMathMaxLongLong(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetAssembler());
-}
-
static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
LocationSummary* locations =
new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 9d378d6b59..437bc3dd3c 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -236,208 +236,6 @@ void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) {
GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
}
-static void GenMinMaxFP(LocationSummary* locations,
- bool is_min,
- bool is_double,
- X86_64Assembler* assembler,
- CodeGeneratorX86_64* codegen) {
- Location op1_loc = locations->InAt(0);
- Location op2_loc = locations->InAt(1);
- Location out_loc = locations->Out();
- XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
-
- // Shortcut for same input locations.
- if (op1_loc.Equals(op2_loc)) {
- DCHECK(out_loc.Equals(op1_loc));
- return;
- }
-
- // (out := op1)
- // out <=? op2
- // if Nan jmp Nan_label
- // if out is min jmp done
- // if op2 is min jmp op2_label
- // handle -0/+0
- // jmp done
- // Nan_label:
- // out := NaN
- // op2_label:
- // out := op2
- // done:
- //
- // This removes one jmp, but needs to copy one input (op1) to out.
- //
- // TODO: This is straight from Quick. Make NaN an out-of-line slowpath?
-
- XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
-
- NearLabel nan, done, op2_label;
- if (is_double) {
- __ ucomisd(out, op2);
- } else {
- __ ucomiss(out, op2);
- }
-
- __ j(Condition::kParityEven, &nan);
-
- __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
- __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
-
- // Handle 0.0/-0.0.
- if (is_min) {
- if (is_double) {
- __ orpd(out, op2);
- } else {
- __ orps(out, op2);
- }
- } else {
- if (is_double) {
- __ andpd(out, op2);
- } else {
- __ andps(out, op2);
- }
- }
- __ jmp(&done);
-
- // NaN handling.
- __ Bind(&nan);
- if (is_double) {
- __ movsd(out, codegen->LiteralInt64Address(INT64_C(0x7FF8000000000000)));
- } else {
- __ movss(out, codegen->LiteralInt32Address(INT32_C(0x7FC00000)));
- }
- __ jmp(&done);
-
- // out := op2;
- __ Bind(&op2_label);
- if (is_double) {
- __ movsd(out, op2);
- } else {
- __ movss(out, op2);
- }
-
- // Done.
- __ Bind(&done);
-}
-
-static void CreateFPFPToFP(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
- // The following is sub-optimal, but all we can do for now. It would be fine to also accept
- // the second input to be the output (we can simply swap inputs).
- locations->SetOut(Location::SameAsFirstInput());
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFP(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
- GenMinMaxFP(
- invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetAssembler(), codegen_);
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
- CreateFPFPToFP(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
- GenMinMaxFP(
- invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetAssembler(), codegen_);
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFP(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- GenMinMaxFP(
- invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetAssembler(), codegen_);
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
- CreateFPFPToFP(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
- GenMinMaxFP(
- invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetAssembler(), codegen_);
-}
-
-static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
- X86_64Assembler* assembler) {
- Location op1_loc = locations->InAt(0);
- Location op2_loc = locations->InAt(1);
-
- // Shortcut for same input locations.
- if (op1_loc.Equals(op2_loc)) {
- // Can return immediately, as op1_loc == out_loc.
- // Note: if we ever support separate registers, e.g., output into memory, we need to check for
- // a copy here.
- DCHECK(locations->Out().Equals(op1_loc));
- return;
- }
-
- CpuRegister out = locations->Out().AsRegister<CpuRegister>();
- CpuRegister op2 = op2_loc.AsRegister<CpuRegister>();
-
- // (out := op1)
- // out <=? op2
- // if out is min jmp done
- // out := op2
- // done:
-
- if (is_long) {
- __ cmpq(out, op2);
- } else {
- __ cmpl(out, op2);
- }
-
- __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, is_long);
-}
-
-static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
- locations->SetOut(Location::SameAsFirstInput());
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitMathMinIntInt(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitMathMinIntInt(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetAssembler());
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitMathMinLongLong(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitMathMinLongLong(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetAssembler());
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetAssembler());
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetAssembler());
-}
-
static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
LocationSummary* locations =
new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 899496328e..d3b081e005 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -334,29 +334,14 @@ static bool IsAddConst(HInstruction* instruction,
// Detect reductions of the following forms,
// x = x_phi + ..
// x = x_phi - ..
-// x = max(x_phi, ..)
// x = min(x_phi, ..)
+// x = max(x_phi, ..)
static bool HasReductionFormat(HInstruction* reduction, HInstruction* phi) {
- if (reduction->IsAdd()) {
+ if (reduction->IsAdd() || reduction->IsMin() || reduction->IsMax()) {
return (reduction->InputAt(0) == phi && reduction->InputAt(1) != phi) ||
(reduction->InputAt(0) != phi && reduction->InputAt(1) == phi);
} else if (reduction->IsSub()) {
return (reduction->InputAt(0) == phi && reduction->InputAt(1) != phi);
- } else if (reduction->IsInvokeStaticOrDirect()) {
- switch (reduction->AsInvokeStaticOrDirect()->GetIntrinsic()) {
- case Intrinsics::kMathMinIntInt:
- case Intrinsics::kMathMinLongLong:
- case Intrinsics::kMathMinFloatFloat:
- case Intrinsics::kMathMinDoubleDouble:
- case Intrinsics::kMathMaxIntInt:
- case Intrinsics::kMathMaxLongLong:
- case Intrinsics::kMathMaxFloatFloat:
- case Intrinsics::kMathMaxDoubleDouble:
- return (reduction->InputAt(0) == phi && reduction->InputAt(1) != phi) ||
- (reduction->InputAt(0) != phi && reduction->InputAt(1) == phi);
- default:
- return false;
- }
}
return false;
}
@@ -1297,80 +1282,59 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node,
return true;
}
}
- } else if (instruction->IsInvokeStaticOrDirect()) {
- // Accept particular intrinsics.
- HInvokeStaticOrDirect* invoke = instruction->AsInvokeStaticOrDirect();
- switch (invoke->GetIntrinsic()) {
- case Intrinsics::kMathAbsInt:
- case Intrinsics::kMathAbsLong:
- case Intrinsics::kMathAbsFloat:
- case Intrinsics::kMathAbsDouble: {
- // Deal with vector restrictions.
- HInstruction* opa = instruction->InputAt(0);
- HInstruction* r = opa;
- bool is_unsigned = false;
- if (HasVectorRestrictions(restrictions, kNoAbs)) {
- return false;
- } else if (HasVectorRestrictions(restrictions, kNoHiBits) &&
- (!IsNarrowerOperand(opa, type, &r, &is_unsigned) || is_unsigned)) {
- return false; // reject, unless operand is sign-extension narrower
- }
- // Accept ABS(x) for vectorizable operand.
- DCHECK(r != nullptr);
- if (generate_code && vector_mode_ != kVector) { // de-idiom
- r = opa;
- }
- if (VectorizeUse(node, r, generate_code, type, restrictions)) {
- if (generate_code) {
- GenerateVecOp(instruction,
- vector_map_->Get(r),
- nullptr,
- HVecOperation::ToProperType(type, is_unsigned));
- }
- return true;
- }
- return false;
+ } else if (instruction->IsAbs()) {
+ // Deal with vector restrictions.
+ HInstruction* opa = instruction->InputAt(0);
+ HInstruction* r = opa;
+ bool is_unsigned = false;
+ if (HasVectorRestrictions(restrictions, kNoAbs)) {
+ return false;
+ } else if (HasVectorRestrictions(restrictions, kNoHiBits) &&
+ (!IsNarrowerOperand(opa, type, &r, &is_unsigned) || is_unsigned)) {
+ return false; // reject, unless operand is sign-extension narrower
+ }
+ // Accept ABS(x) for vectorizable operand.
+ DCHECK(r != nullptr);
+ if (generate_code && vector_mode_ != kVector) { // de-idiom
+ r = opa;
+ }
+ if (VectorizeUse(node, r, generate_code, type, restrictions)) {
+ if (generate_code) {
+ GenerateVecOp(instruction,
+ vector_map_->Get(r),
+ nullptr,
+ HVecOperation::ToProperType(type, is_unsigned));
}
- case Intrinsics::kMathMinIntInt:
- case Intrinsics::kMathMinLongLong:
- case Intrinsics::kMathMinFloatFloat:
- case Intrinsics::kMathMinDoubleDouble:
- case Intrinsics::kMathMaxIntInt:
- case Intrinsics::kMathMaxLongLong:
- case Intrinsics::kMathMaxFloatFloat:
- case Intrinsics::kMathMaxDoubleDouble: {
- // Deal with vector restrictions.
- HInstruction* opa = instruction->InputAt(0);
- HInstruction* opb = instruction->InputAt(1);
- HInstruction* r = opa;
- HInstruction* s = opb;
- bool is_unsigned = false;
- if (HasVectorRestrictions(restrictions, kNoMinMax)) {
- return false;
- } else if (HasVectorRestrictions(restrictions, kNoHiBits) &&
- !IsNarrowerOperands(opa, opb, type, &r, &s, &is_unsigned)) {
- return false; // reject, unless all operands are same-extension narrower
- }
- // Accept MIN/MAX(x, y) for vectorizable operands.
- DCHECK(r != nullptr);
- DCHECK(s != nullptr);
- if (generate_code && vector_mode_ != kVector) { // de-idiom
- r = opa;
- s = opb;
- }
- if (VectorizeUse(node, r, generate_code, type, restrictions) &&
- VectorizeUse(node, s, generate_code, type, restrictions)) {
- if (generate_code) {
- GenerateVecOp(
- instruction, vector_map_->Get(r), vector_map_->Get(s), type, is_unsigned);
- }
- return true;
- }
- return false;
+ return true;
+ }
+ } else if (instruction->IsMin() || instruction->IsMax()) {
+ // Deal with vector restrictions.
+ HInstruction* opa = instruction->InputAt(0);
+ HInstruction* opb = instruction->InputAt(1);
+ HInstruction* r = opa;
+ HInstruction* s = opb;
+ bool is_unsigned = false;
+ if (HasVectorRestrictions(restrictions, kNoMinMax)) {
+ return false;
+ } else if (HasVectorRestrictions(restrictions, kNoHiBits) &&
+ !IsNarrowerOperands(opa, opb, type, &r, &s, &is_unsigned)) {
+ return false; // reject, unless all operands are same-extension narrower
+ }
+ // Accept MIN/MAX(x, y) for vectorizable operands.
+ DCHECK(r != nullptr);
+ DCHECK(s != nullptr);
+ if (generate_code && vector_mode_ != kVector) { // de-idiom
+ r = opa;
+ s = opb;
+ }
+ if (VectorizeUse(node, r, generate_code, type, restrictions) &&
+ VectorizeUse(node, s, generate_code, type, restrictions)) {
+ if (generate_code) {
+ GenerateVecOp(
+ instruction, vector_map_->Get(r), vector_map_->Get(s), type, is_unsigned);
}
- default:
- return false;
- } // switch
+ return true;
+ }
}
return false;
}
@@ -1811,83 +1775,29 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org,
GENERATE_VEC(
new (global_allocator_) HVecUShr(global_allocator_, opa, opb, type, vector_length_, dex_pc),
new (global_allocator_) HUShr(org_type, opa, opb, dex_pc));
- case HInstruction::kInvokeStaticOrDirect: {
- HInvokeStaticOrDirect* invoke = org->AsInvokeStaticOrDirect();
- if (vector_mode_ == kVector) {
- switch (invoke->GetIntrinsic()) {
- case Intrinsics::kMathAbsInt:
- case Intrinsics::kMathAbsLong:
- case Intrinsics::kMathAbsFloat:
- case Intrinsics::kMathAbsDouble:
- DCHECK(opb == nullptr);
- vector = new (global_allocator_)
- HVecAbs(global_allocator_, opa, type, vector_length_, dex_pc);
- break;
- case Intrinsics::kMathMinIntInt:
- case Intrinsics::kMathMinLongLong:
- case Intrinsics::kMathMinFloatFloat:
- case Intrinsics::kMathMinDoubleDouble: {
- vector = new (global_allocator_)
- HVecMin(global_allocator_,
- opa,
- opb,
- HVecOperation::ToProperType(type, is_unsigned),
- vector_length_,
- dex_pc);
- break;
- }
- case Intrinsics::kMathMaxIntInt:
- case Intrinsics::kMathMaxLongLong:
- case Intrinsics::kMathMaxFloatFloat:
- case Intrinsics::kMathMaxDoubleDouble: {
- vector = new (global_allocator_)
- HVecMax(global_allocator_,
- opa,
- opb,
- HVecOperation::ToProperType(type, is_unsigned),
- vector_length_,
- dex_pc);
- break;
- }
- default:
- LOG(FATAL) << "Unsupported SIMD intrinsic " << org->GetId();
- UNREACHABLE();
- } // switch invoke
- } else {
- // In scalar code, simply clone the method invoke, and replace its operands with the
- // corresponding new scalar instructions in the loop. The instruction will get an
- // environment while being inserted from the instruction map in original program order.
- DCHECK(vector_mode_ == kSequential);
- size_t num_args = invoke->GetNumberOfArguments();
- HInvokeStaticOrDirect* new_invoke = new (global_allocator_) HInvokeStaticOrDirect(
- global_allocator_,
- num_args,
- invoke->GetType(),
- invoke->GetDexPc(),
- invoke->GetDexMethodIndex(),
- invoke->GetResolvedMethod(),
- invoke->GetDispatchInfo(),
- invoke->GetInvokeType(),
- invoke->GetTargetMethod(),
- invoke->GetClinitCheckRequirement());
- HInputsRef inputs = invoke->GetInputs();
- size_t num_inputs = inputs.size();
- DCHECK_LE(num_args, num_inputs);
- DCHECK_EQ(num_inputs, new_invoke->GetInputs().size()); // both invokes agree
- for (size_t index = 0; index < num_inputs; ++index) {
- HInstruction* new_input = index < num_args
- ? vector_map_->Get(inputs[index])
- : inputs[index]; // beyond arguments: just pass through
- new_invoke->SetArgumentAt(index, new_input);
- }
- new_invoke->SetIntrinsic(invoke->GetIntrinsic(),
- kNeedsEnvironmentOrCache,
- kNoSideEffects,
- kNoThrow);
- vector = new_invoke;
- }
- break;
- }
+ case HInstruction::kMin:
+ GENERATE_VEC(
+ new (global_allocator_) HVecMin(global_allocator_,
+ opa,
+ opb,
+ HVecOperation::ToProperType(type, is_unsigned),
+ vector_length_,
+ dex_pc),
+ new (global_allocator_) HMin(org_type, opa, opb, dex_pc));
+ case HInstruction::kMax:
+ GENERATE_VEC(
+ new (global_allocator_) HVecMax(global_allocator_,
+ opa,
+ opb,
+ HVecOperation::ToProperType(type, is_unsigned),
+ vector_length_,
+ dex_pc),
+ new (global_allocator_) HMax(org_type, opa, opb, dex_pc));
+ case HInstruction::kAbs:
+ DCHECK(opb == nullptr);
+ GENERATE_VEC(
+ new (global_allocator_) HVecAbs(global_allocator_, opa, type, vector_length_, dex_pc),
+ new (global_allocator_) HAbs(org_type, opa, dex_pc));
default:
break;
} // switch
@@ -1998,9 +1908,7 @@ bool HLoopOptimization::VectorizeSADIdiom(LoopNode* node,
HInstruction* v = instruction->InputAt(1);
HInstruction* a = nullptr;
HInstruction* b = nullptr;
- if (v->IsInvokeStaticOrDirect() &&
- (v->AsInvokeStaticOrDirect()->GetIntrinsic() == Intrinsics::kMathAbsInt ||
- v->AsInvokeStaticOrDirect()->GetIntrinsic() == Intrinsics::kMathAbsLong)) {
+ if (v->GetType() == reduction_type && v->IsAbs()) {
HInstruction* x = v->InputAt(0);
if (x->GetType() == reduction_type) {
int64_t c = 0;
@@ -2054,14 +1962,13 @@ bool HLoopOptimization::VectorizeSADIdiom(LoopNode* node,
VectorizeUse(node, r, generate_code, sub_type, restrictions) &&
VectorizeUse(node, s, generate_code, sub_type, restrictions)) {
if (generate_code) {
- reduction_type = HVecOperation::ToProperType(reduction_type, is_unsigned);
if (vector_mode_ == kVector) {
vector_map_->Put(instruction, new (global_allocator_) HVecSADAccumulate(
global_allocator_,
vector_map_->Get(q),
vector_map_->Get(r),
vector_map_->Get(s),
- reduction_type,
+ HVecOperation::ToProperType(reduction_type, is_unsigned),
GetOtherVL(reduction_type, sub_type, vector_length_),
kNoDexPc));
MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorizedIdiom);
diff --git a/compiler/optimizing/loop_optimization_test.cc b/compiler/optimizing/loop_optimization_test.cc
index db8368986c..c21bd65d97 100644
--- a/compiler/optimizing/loop_optimization_test.cc
+++ b/compiler/optimizing/loop_optimization_test.cc
@@ -227,11 +227,14 @@ TEST_F(LoopOptimizationTest, SimplifyLoopReoderPredecessors) {
graph_->ClearDominanceInformation();
graph_->BuildDominatorTree();
+ // BuildDominatorTree inserts a block beetween loop header and entry block.
+ EXPECT_EQ(header->GetPredecessors()[0]->GetSinglePredecessor(), entry_block_);
+
// Check that after optimizations in BuildDominatorTree()/SimplifyCFG() phi inputs
// are still mapped correctly to the block predecessors.
for (size_t i = 0, e = phi->InputCount(); i < e; i++) {
HInstruction* input = phi->InputAt(i);
- ASSERT_TRUE(input->GetBlock()->Dominates(header->GetPredecessors()[i]));
+ EXPECT_TRUE(input->GetBlock()->Dominates(header->GetPredecessors()[i]));
}
}
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index f6ba19f22a..d3212cbbc0 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -2891,6 +2891,8 @@ std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::MethodLoadKind
return os << "BootImageLinkTimePcRelative";
case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
return os << "DirectAddress";
+ case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo:
+ return os << "BootImageRelRo";
case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry:
return os << "BssEntry";
case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall:
@@ -2925,7 +2927,7 @@ bool HLoadClass::InstructionDataEquals(const HInstruction* other) const {
}
switch (GetLoadKind()) {
case LoadKind::kBootImageAddress:
- case LoadKind::kBootImageClassTable:
+ case LoadKind::kBootImageRelRo:
case LoadKind::kJitTableAddress: {
ScopedObjectAccess soa(Thread::Current());
return GetClass().Get() == other_load_class->GetClass().Get();
@@ -2944,8 +2946,8 @@ std::ostream& operator<<(std::ostream& os, HLoadClass::LoadKind rhs) {
return os << "BootImageLinkTimePcRelative";
case HLoadClass::LoadKind::kBootImageAddress:
return os << "BootImageAddress";
- case HLoadClass::LoadKind::kBootImageClassTable:
- return os << "BootImageClassTable";
+ case HLoadClass::LoadKind::kBootImageRelRo:
+ return os << "BootImageRelRo";
case HLoadClass::LoadKind::kBssEntry:
return os << "BssEntry";
case HLoadClass::LoadKind::kJitTableAddress:
@@ -2968,7 +2970,7 @@ bool HLoadString::InstructionDataEquals(const HInstruction* other) const {
}
switch (GetLoadKind()) {
case LoadKind::kBootImageAddress:
- case LoadKind::kBootImageInternTable:
+ case LoadKind::kBootImageRelRo:
case LoadKind::kJitTableAddress: {
ScopedObjectAccess soa(Thread::Current());
return GetString().Get() == other_load_string->GetString().Get();
@@ -2984,8 +2986,8 @@ std::ostream& operator<<(std::ostream& os, HLoadString::LoadKind rhs) {
return os << "BootImageLinkTimePcRelative";
case HLoadString::LoadKind::kBootImageAddress:
return os << "BootImageAddress";
- case HLoadString::LoadKind::kBootImageInternTable:
- return os << "BootImageInternTable";
+ case HLoadString::LoadKind::kBootImageRelRo:
+ return os << "BootImageRelRo";
case HLoadString::LoadKind::kBssEntry:
return os << "BssEntry";
case HLoadString::LoadKind::kJitTableAddress:
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 99d80d77c5..a8364e0680 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -26,6 +26,7 @@
#include "base/arena_object.h"
#include "base/array_ref.h"
#include "base/iteration_range.h"
+#include "base/quasi_atomic.h"
#include "base/stl_util.h"
#include "base/transform_array_ref.h"
#include "data_type.h"
@@ -1383,7 +1384,9 @@ class HLoopInformationOutwardIterator : public ValueObject {
M(LoadException, Instruction) \
M(LoadString, Instruction) \
M(LongConstant, Constant) \
+ M(Max, Instruction) \
M(MemoryBarrier, Instruction) \
+ M(Min, BinaryOperation) \
M(MonitorOperation, Instruction) \
M(Mul, BinaryOperation) \
M(NativeDebugInfo, Instruction) \
@@ -4428,6 +4431,10 @@ class HInvokeStaticOrDirect FINAL : public HInvoke {
// Used for app->boot calls with non-relocatable image and for JIT-compiled calls.
kDirectAddress,
+ // Load from an entry in the .data.bimg.rel.ro using a PC-relative load.
+ // Used for app->boot calls with relocatable image.
+ kBootImageRelRo,
+
// Load from an entry in the .bss section using a PC-relative load.
// Used for classes outside boot image when .bss is accessible with a PC-relative load.
kBssEntry,
@@ -4560,6 +4567,7 @@ class HInvokeStaticOrDirect FINAL : public HInvoke {
bool HasMethodAddress() const { return GetMethodLoadKind() == MethodLoadKind::kDirectAddress; }
bool HasPcRelativeMethodLoadKind() const {
return GetMethodLoadKind() == MethodLoadKind::kBootImageLinkTimePcRelative ||
+ GetMethodLoadKind() == MethodLoadKind::kBootImageRelRo ||
GetMethodLoadKind() == MethodLoadKind::kBssEntry;
}
bool HasCurrentMethodInput() const {
@@ -5016,6 +5024,76 @@ class HRem FINAL : public HBinaryOperation {
DEFAULT_COPY_CONSTRUCTOR(Rem);
};
+class HMin FINAL : public HBinaryOperation {
+ public:
+ HMin(DataType::Type result_type,
+ HInstruction* left,
+ HInstruction* right,
+ uint32_t dex_pc)
+ : HBinaryOperation(kMin, result_type, left, right, SideEffects::None(), dex_pc) {}
+
+ bool IsCommutative() const OVERRIDE { return true; }
+
+ // Evaluation for integral values.
+ template <typename T> static T ComputeIntegral(T x, T y) {
+ return (x <= y) ? x : y;
+ }
+
+ HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
+ return GetBlock()->GetGraph()->GetIntConstant(
+ ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc());
+ }
+ HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
+ return GetBlock()->GetGraph()->GetLongConstant(
+ ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc());
+ }
+ // TODO: Evaluation for floating-point values.
+ HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
+ HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { return nullptr; }
+ HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
+ HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { return nullptr; }
+
+ DECLARE_INSTRUCTION(Min);
+
+ protected:
+ DEFAULT_COPY_CONSTRUCTOR(Min);
+};
+
+class HMax FINAL : public HBinaryOperation {
+ public:
+ HMax(DataType::Type result_type,
+ HInstruction* left,
+ HInstruction* right,
+ uint32_t dex_pc)
+ : HBinaryOperation(kMax, result_type, left, right, SideEffects::None(), dex_pc) {}
+
+ bool IsCommutative() const OVERRIDE { return true; }
+
+ // Evaluation for integral values.
+ template <typename T> static T ComputeIntegral(T x, T y) {
+ return (x >= y) ? x : y;
+ }
+
+ HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
+ return GetBlock()->GetGraph()->GetIntConstant(
+ ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc());
+ }
+ HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
+ return GetBlock()->GetGraph()->GetLongConstant(
+ ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc());
+ }
+ // TODO: Evaluation for floating-point values.
+ HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
+ HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { return nullptr; }
+ HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
+ HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { return nullptr; }
+
+ DECLARE_INSTRUCTION(Max);
+
+ protected:
+ DEFAULT_COPY_CONSTRUCTOR(Max);
+};
+
class HAbs FINAL : public HUnaryOperation {
public:
HAbs(DataType::Type result_type, HInstruction* input, uint32_t dex_pc = kNoDexPc)
@@ -6066,12 +6144,12 @@ class HLoadClass FINAL : public HInstruction {
kBootImageLinkTimePcRelative,
// Use a known boot image Class* address, embedded in the code by the codegen.
- // Used for boot image classes referenced by apps in AOT- and JIT-compiled code.
+ // Used for boot image classes referenced by apps in JIT- and AOT-compiled code (non-PIC).
kBootImageAddress,
- // Use a PC-relative load from a boot image ClassTable mmapped into the .bss
- // of the oat file.
- kBootImageClassTable,
+ // Load from an entry in the .data.bimg.rel.ro using a PC-relative load.
+ // Used for boot image classes referenced by apps in AOT-compiled code (PIC).
+ kBootImageRelRo,
// Load from an entry in the .bss section using a PC-relative load.
// Used for classes outside boot image when .bss is accessible with a PC-relative load.
@@ -6119,6 +6197,12 @@ class HLoadClass FINAL : public HInstruction {
return GetPackedField<LoadKindField>();
}
+ bool HasPcRelativeLoadKind() const {
+ return GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative ||
+ GetLoadKind() == LoadKind::kBootImageRelRo ||
+ GetLoadKind() == LoadKind::kBssEntry;
+ }
+
bool CanBeMoved() const OVERRIDE { return true; }
bool InstructionDataEquals(const HInstruction* other) const;
@@ -6223,7 +6307,6 @@ class HLoadClass FINAL : public HInstruction {
static bool HasTypeReference(LoadKind load_kind) {
return load_kind == LoadKind::kReferrersClass ||
load_kind == LoadKind::kBootImageLinkTimePcRelative ||
- load_kind == LoadKind::kBootImageClassTable ||
load_kind == LoadKind::kBssEntry ||
load_kind == LoadKind::kRuntimeCall;
}
@@ -6269,7 +6352,7 @@ inline void HLoadClass::AddSpecialInput(HInstruction* special_input) {
// including literal pool loads, which are PC-relative too.
DCHECK(GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative ||
GetLoadKind() == LoadKind::kBootImageAddress ||
- GetLoadKind() == LoadKind::kBootImageClassTable ||
+ GetLoadKind() == LoadKind::kBootImageRelRo ||
GetLoadKind() == LoadKind::kBssEntry) << GetLoadKind();
DCHECK(special_input_.GetInstruction() == nullptr);
special_input_ = HUserRecord<HInstruction*>(special_input);
@@ -6285,12 +6368,12 @@ class HLoadString FINAL : public HInstruction {
kBootImageLinkTimePcRelative,
// Use a known boot image String* address, embedded in the code by the codegen.
- // Used for boot image strings referenced by apps in AOT- and JIT-compiled code.
+ // Used for boot image strings referenced by apps in JIT- and AOT-compiled code (non-PIC).
kBootImageAddress,
- // Use a PC-relative load from a boot image InternTable mmapped into the .bss
- // of the oat file.
- kBootImageInternTable,
+ // Load from an entry in the .data.bimg.rel.ro using a PC-relative load.
+ // Used for boot image strings referenced by apps in AOT-compiled code (PIC).
+ kBootImageRelRo,
// Load from an entry in the .bss section using a PC-relative load.
// Used for strings outside boot image when .bss is accessible with a PC-relative load.
@@ -6325,6 +6408,12 @@ class HLoadString FINAL : public HInstruction {
return GetPackedField<LoadKindField>();
}
+ bool HasPcRelativeLoadKind() const {
+ return GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative ||
+ GetLoadKind() == LoadKind::kBootImageRelRo ||
+ GetLoadKind() == LoadKind::kBssEntry;
+ }
+
const DexFile& GetDexFile() const {
return dex_file_;
}
@@ -6353,7 +6442,7 @@ class HLoadString FINAL : public HInstruction {
LoadKind load_kind = GetLoadKind();
if (load_kind == LoadKind::kBootImageLinkTimePcRelative ||
load_kind == LoadKind::kBootImageAddress ||
- load_kind == LoadKind::kBootImageInternTable ||
+ load_kind == LoadKind::kBootImageRelRo ||
load_kind == LoadKind::kJitTableAddress) {
return false;
}
@@ -6431,7 +6520,7 @@ inline void HLoadString::AddSpecialInput(HInstruction* special_input) {
// including literal pool loads, which are PC-relative too.
DCHECK(GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative ||
GetLoadKind() == LoadKind::kBootImageAddress ||
- GetLoadKind() == LoadKind::kBootImageInternTable ||
+ GetLoadKind() == LoadKind::kBootImageRelRo ||
GetLoadKind() == LoadKind::kBssEntry) << GetLoadKind();
// HLoadString::GetInputRecords() returns an empty array at this point,
// so use the GetInputRecords() from the base class to set the input record.
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index 0023265e50..00194ff1fe 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -22,9 +22,9 @@
#include <string>
#include <type_traits>
-#include "atomic.h"
+#include "base/atomic.h"
+#include "base/globals.h"
#include "base/logging.h" // For VLOG_IS_ON.
-#include "globals.h"
namespace art {
diff --git a/compiler/optimizing/pc_relative_fixups_mips.cc b/compiler/optimizing/pc_relative_fixups_mips.cc
index 9d5358514e..0102254206 100644
--- a/compiler/optimizing/pc_relative_fixups_mips.cc
+++ b/compiler/optimizing/pc_relative_fixups_mips.cc
@@ -75,7 +75,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor {
switch (load_kind) {
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
case HLoadClass::LoadKind::kBootImageAddress:
- case HLoadClass::LoadKind::kBootImageClassTable:
+ case HLoadClass::LoadKind::kBootImageRelRo:
case HLoadClass::LoadKind::kBssEntry:
// Add a base register for PC-relative literals on R2.
InitializePCRelativeBasePointer();
@@ -91,7 +91,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor {
switch (load_kind) {
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
case HLoadString::LoadKind::kBootImageAddress:
- case HLoadString::LoadKind::kBootImageInternTable:
+ case HLoadString::LoadKind::kBootImageRelRo:
case HLoadString::LoadKind::kBssEntry:
// Add a base register for PC-relative literals on R2.
InitializePCRelativeBasePointer();
diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc
index a3ca6311c2..647336b6b9 100644
--- a/compiler/optimizing/pc_relative_fixups_x86.cc
+++ b/compiler/optimizing/pc_relative_fixups_x86.cc
@@ -81,20 +81,14 @@ class PCRelativeHandlerVisitor : public HGraphVisitor {
}
void VisitLoadClass(HLoadClass* load_class) OVERRIDE {
- HLoadClass::LoadKind load_kind = load_class->GetLoadKind();
- if (load_kind == HLoadClass::LoadKind::kBootImageLinkTimePcRelative ||
- load_kind == HLoadClass::LoadKind::kBootImageClassTable ||
- load_kind == HLoadClass::LoadKind::kBssEntry) {
+ if (load_class->HasPcRelativeLoadKind()) {
HX86ComputeBaseMethodAddress* method_address = GetPCRelativeBasePointer(load_class);
load_class->AddSpecialInput(method_address);
}
}
void VisitLoadString(HLoadString* load_string) OVERRIDE {
- HLoadString::LoadKind load_kind = load_string->GetLoadKind();
- if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative ||
- load_kind == HLoadString::LoadKind::kBootImageInternTable ||
- load_kind == HLoadString::LoadKind::kBssEntry) {
+ if (load_string->HasPcRelativeLoadKind()) {
HX86ComputeBaseMethodAddress* method_address = GetPCRelativeBasePointer(load_string);
load_string->AddSpecialInput(method_address);
}
@@ -234,12 +228,13 @@ class PCRelativeHandlerVisitor : public HGraphVisitor {
switch (invoke->GetIntrinsic()) {
case Intrinsics::kMathAbsDouble:
case Intrinsics::kMathAbsFloat:
- LOG(FATAL) << "Unreachable abs";
- UNREACHABLE();
case Intrinsics::kMathMaxDoubleDouble:
case Intrinsics::kMathMaxFloatFloat:
case Intrinsics::kMathMinDoubleDouble:
case Intrinsics::kMathMinFloatFloat:
+ LOG(FATAL) << "Unreachable min/max/abs: intrinsics should have been lowered "
+ "to IR nodes by instruction simplifier";
+ UNREACHABLE();
case Intrinsics::kMathRoundFloat:
if (!base_added) {
DCHECK(invoke_static_or_direct != nullptr);
diff --git a/compiler/optimizing/scheduler.cc b/compiler/optimizing/scheduler.cc
index cdbe48342d..bca538fb17 100644
--- a/compiler/optimizing/scheduler.cc
+++ b/compiler/optimizing/scheduler.cc
@@ -679,6 +679,8 @@ bool HScheduler::IsSchedulable(const HInstruction* instruction) const {
instruction->IsCompare() ||
instruction->IsCondition() ||
instruction->IsDiv() ||
+ instruction->IsMin() ||
+ instruction->IsMax() ||
instruction->IsMul() ||
instruction->IsOr() ||
instruction->IsRem() ||
diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc
index 1e49411c72..7dffb2a378 100644
--- a/compiler/optimizing/sharpening.cc
+++ b/compiler/optimizing/sharpening.cc
@@ -125,8 +125,12 @@ void HSharpening::SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke,
BootImageAOTCanEmbedMethod(callee, compiler_driver)) {
method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative;
code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
+ } else if (IsInBootImage(callee)) {
+ // Use PC-relative access to the .data.bimg.rel.ro methods array.
+ method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo;
+ code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
} else {
- // Use PC-relative access to the .bss methods arrays.
+ // Use PC-relative access to the .bss methods array.
method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kBssEntry;
code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
}
@@ -207,7 +211,7 @@ HLoadClass::LoadKind HSharpening::ComputeLoadClassKind(
} else if (is_in_boot_image) {
// AOT app compilation, boot image class.
if (codegen->GetCompilerOptions().GetCompilePic()) {
- desired_load_kind = HLoadClass::LoadKind::kBootImageClassTable;
+ desired_load_kind = HLoadClass::LoadKind::kBootImageRelRo;
} else {
desired_load_kind = HLoadClass::LoadKind::kBootImageAddress;
}
@@ -288,7 +292,7 @@ void HSharpening::ProcessLoadString(
string = class_linker->LookupString(string_index, dex_cache.Get());
if (string != nullptr && runtime->GetHeap()->ObjectIsInBootImageSpace(string)) {
if (codegen->GetCompilerOptions().GetCompilePic()) {
- desired_load_kind = HLoadString::LoadKind::kBootImageInternTable;
+ desired_load_kind = HLoadString::LoadKind::kBootImageRelRo;
} else {
desired_load_kind = HLoadString::LoadKind::kBootImageAddress;
}
diff --git a/compiler/optimizing/superblock_cloner.cc b/compiler/optimizing/superblock_cloner.cc
index a7c23bef7e..04942f9a4a 100644
--- a/compiler/optimizing/superblock_cloner.cc
+++ b/compiler/optimizing/superblock_cloner.cc
@@ -28,6 +28,11 @@ using HInstructionMap = SuperblockCloner::HInstructionMap;
using HBasicBlockSet = SuperblockCloner::HBasicBlockSet;
using HEdgeSet = SuperblockCloner::HEdgeSet;
+// When doing peeling we can choose whether to keep original loop (made of original basic blocks)
+// and form a peeled iteration of the copy blocks (preserve the header) or transfer original loop
+// blocks to the peeled iteration and create new loop from the copy blocks. Similar for unrolling.
+static const bool kPeelUnrollPreserveHeader = true;
+
void HEdge::Dump(std::ostream& stream) const {
stream << "(" << from_ << "->" << to_ << ")";
}
@@ -70,20 +75,18 @@ static bool ArePhiInputsTheSame(const HPhi* phi) {
return true;
}
-// Returns a common predecessor of loop1 and loop2 in the loop tree or nullptr if it is the whole
-// graph.
-static HLoopInformation* FindCommonLoop(HLoopInformation* loop1, HLoopInformation* loop2) {
- if (loop1 != nullptr || loop2 != nullptr) {
- return nullptr;
+// Returns whether two Edge sets are equal (ArenaHashSet doesn't have "Equal" method).
+static bool EdgeHashSetsEqual(const HEdgeSet* set1, const HEdgeSet* set2) {
+ if (set1->Size() != set2->Size()) {
+ return false;
}
- if (loop1->IsIn(*loop2)) {
- return loop2;
- } else if (loop2->IsIn(*loop1)) {
- return loop1;
+ for (auto e : *set1) {
+ if (set2->Find(e) == set2->end()) {
+ return false;
+ }
}
- HBasicBlock* block = CommonDominator::ForPair(loop1->GetHeader(), loop2->GetHeader());
- return block->GetLoopInformation();
+ return true;
}
// Calls HGraph::OrderLoopHeaderPredecessors for each loop in the graph.
@@ -95,6 +98,21 @@ static void OrderLoopsHeadersPredecessors(HGraph* graph) {
}
}
+// Performs DFS on the subgraph (specified by 'bb_set') starting from the specified block; while
+// traversing the function removes basic blocks from the bb_set (instead of traditional DFS
+// 'marking'). So what is left in the 'bb_set' after the traversal is not reachable from the start
+// block.
+static void TraverseSubgraphForConnectivity(HBasicBlock* block, HBasicBlockSet* bb_set) {
+ DCHECK(bb_set->IsBitSet(block->GetBlockId()));
+ bb_set->ClearBit(block->GetBlockId());
+
+ for (HBasicBlock* succ : block->GetSuccessors()) {
+ if (bb_set->IsBitSet(succ->GetBlockId())) {
+ TraverseSubgraphForConnectivity(succ, bb_set);
+ }
+ }
+}
+
//
// Helpers for CloneBasicBlock.
//
@@ -268,7 +286,6 @@ void SuperblockCloner::FindBackEdgesLocal(HBasicBlock* entry_block, ArenaBitVect
}
void SuperblockCloner::RecalculateBackEdgesInfo(ArenaBitVector* outer_loop_bb_set) {
- // TODO: DCHECK that after the transformation the graph is connected.
HBasicBlock* block_entry = nullptr;
if (outer_loop_ == nullptr) {
@@ -424,6 +441,11 @@ void SuperblockCloner::FindAndSetLocalAreaForAdjustments() {
outer_loop_ = nullptr;
break;
}
+ if (outer_loop_ == nullptr) {
+ // We should not use the initial outer_loop_ value 'nullptr' when finding the most outer
+ // common loop.
+ outer_loop_ = loop_exit_loop_info;
+ }
outer_loop_ = FindCommonLoop(outer_loop_, loop_exit_loop_info);
}
@@ -507,6 +529,34 @@ void SuperblockCloner::ResolveDataFlow() {
// Debug and logging methods.
//
+// Debug function to dump graph' BasicBlocks info.
+void DumpBB(HGraph* graph) {
+ for (HBasicBlock* bb : graph->GetBlocks()) {
+ if (bb == nullptr) {
+ continue;
+ }
+ std::cout << bb->GetBlockId();
+ std::cout << " <- ";
+ for (HBasicBlock* pred : bb->GetPredecessors()) {
+ std::cout << pred->GetBlockId() << " ";
+ }
+ std::cout << " -> ";
+ for (HBasicBlock* succ : bb->GetSuccessors()) {
+ std::cout << succ->GetBlockId() << " ";
+ }
+
+ if (bb->GetDominator()) {
+ std::cout << " dom " << bb->GetDominator()->GetBlockId();
+ }
+
+ if (bb->GetLoopInformation()) {
+ std::cout << "\tloop: " << bb->GetLoopInformation()->GetHeader()->GetBlockId();
+ }
+
+ std::cout << std::endl;
+ }
+}
+
void SuperblockCloner::CheckInstructionInputsRemapping(HInstruction* orig_instr) {
DCHECK(!orig_instr->IsPhi());
HInstruction* copy_instr = GetInstrCopy(orig_instr);
@@ -542,6 +592,82 @@ void SuperblockCloner::CheckInstructionInputsRemapping(HInstruction* orig_instr)
}
}
+bool SuperblockCloner::CheckRemappingInfoIsValid() {
+ for (HEdge edge : *remap_orig_internal_) {
+ if (!IsEdgeValid(edge, graph_) ||
+ !IsInOrigBBSet(edge.GetFrom()) ||
+ !IsInOrigBBSet(edge.GetTo())) {
+ return false;
+ }
+ }
+
+ for (auto edge : *remap_copy_internal_) {
+ if (!IsEdgeValid(edge, graph_) ||
+ !IsInOrigBBSet(edge.GetFrom()) ||
+ !IsInOrigBBSet(edge.GetTo())) {
+ return false;
+ }
+ }
+
+ for (auto edge : *remap_incoming_) {
+ if (!IsEdgeValid(edge, graph_) ||
+ IsInOrigBBSet(edge.GetFrom()) ||
+ !IsInOrigBBSet(edge.GetTo())) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+void SuperblockCloner::VerifyGraph() {
+ for (auto it : *hir_map_) {
+ HInstruction* orig_instr = it.first;
+ HInstruction* copy_instr = it.second;
+ if (!orig_instr->IsPhi() && !orig_instr->IsSuspendCheck()) {
+ DCHECK(it.first->GetBlock() != nullptr);
+ }
+ if (!copy_instr->IsPhi() && !copy_instr->IsSuspendCheck()) {
+ DCHECK(it.second->GetBlock() != nullptr);
+ }
+ }
+
+ GraphChecker checker(graph_);
+ checker.Run();
+ if (!checker.IsValid()) {
+ for (const std::string& error : checker.GetErrors()) {
+ std::cout << error << std::endl;
+ }
+ LOG(FATAL) << "GraphChecker failed: superblock cloner\n";
+ }
+}
+
+void DumpBBSet(const ArenaBitVector* set) {
+ for (uint32_t idx : set->Indexes()) {
+ std::cout << idx << "\n";
+ }
+}
+
+void SuperblockCloner::DumpInputSets() {
+ std::cout << graph_->PrettyMethod() << "\n";
+ std::cout << "orig_bb_set:\n";
+ for (uint32_t idx : orig_bb_set_.Indexes()) {
+ std::cout << idx << "\n";
+ }
+ std::cout << "remap_orig_internal:\n";
+ for (HEdge e : *remap_orig_internal_) {
+ std::cout << e << "\n";
+ }
+ std::cout << "remap_copy_internal:\n";
+ for (auto e : *remap_copy_internal_) {
+ std::cout << e << "\n";
+ }
+ std::cout << "remap_incoming:\n";
+ for (auto e : *remap_incoming_) {
+ std::cout << e << "\n";
+ }
+}
+
//
// Public methods.
//
@@ -569,6 +695,7 @@ void SuperblockCloner::SetSuccessorRemappingInfo(const HEdgeSet* remap_orig_inte
remap_orig_internal_ = remap_orig_internal;
remap_copy_internal_ = remap_copy_internal;
remap_incoming_ = remap_incoming;
+ DCHECK(CheckRemappingInfoIsValid());
}
bool SuperblockCloner::IsSubgraphClonable() const {
@@ -602,6 +729,63 @@ bool SuperblockCloner::IsSubgraphClonable() const {
return true;
}
+bool SuperblockCloner::IsFastCase() const {
+ // Check that loop unrolling/loop peeling is being conducted.
+ // Check that all the basic blocks belong to the same loop.
+ bool flag = false;
+ HLoopInformation* common_loop_info = nullptr;
+ for (uint32_t idx : orig_bb_set_.Indexes()) {
+ HBasicBlock* block = GetBlockById(idx);
+ HLoopInformation* block_loop_info = block->GetLoopInformation();
+ if (!flag) {
+ common_loop_info = block_loop_info;
+ } else {
+ if (block_loop_info != common_loop_info) {
+ return false;
+ }
+ }
+ }
+
+ // Check that orig_bb_set_ corresponds to loop peeling/unrolling.
+ if (common_loop_info == nullptr || !orig_bb_set_.SameBitsSet(&common_loop_info->GetBlocks())) {
+ return false;
+ }
+
+ bool peeling_or_unrolling = false;
+ HEdgeSet remap_orig_internal(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+ HEdgeSet remap_copy_internal(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+ HEdgeSet remap_incoming(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+
+
+ // Check whether remapping info corresponds to loop unrolling.
+ CollectRemappingInfoForPeelUnroll(/* to_unroll*/ true,
+ common_loop_info,
+ &remap_orig_internal,
+ &remap_copy_internal,
+ &remap_incoming);
+
+ peeling_or_unrolling |= EdgeHashSetsEqual(&remap_orig_internal, remap_orig_internal_) &&
+ EdgeHashSetsEqual(&remap_copy_internal, remap_copy_internal_) &&
+ EdgeHashSetsEqual(&remap_incoming, remap_incoming_);
+
+ remap_orig_internal.Clear();
+ remap_copy_internal.Clear();
+ remap_incoming.Clear();
+
+ // Check whether remapping info corresponds to loop peeling.
+ CollectRemappingInfoForPeelUnroll(/* to_unroll*/ false,
+ common_loop_info,
+ &remap_orig_internal,
+ &remap_copy_internal,
+ &remap_incoming);
+
+ peeling_or_unrolling |= EdgeHashSetsEqual(&remap_orig_internal, remap_orig_internal_) &&
+ EdgeHashSetsEqual(&remap_copy_internal, remap_copy_internal_) &&
+ EdgeHashSetsEqual(&remap_incoming, remap_incoming_);
+
+ return peeling_or_unrolling;
+}
+
void SuperblockCloner::Run() {
DCHECK(bb_map_ != nullptr);
DCHECK(hir_map_ != nullptr);
@@ -609,6 +793,11 @@ void SuperblockCloner::Run() {
remap_copy_internal_ != nullptr &&
remap_incoming_ != nullptr);
DCHECK(IsSubgraphClonable());
+ DCHECK(IsFastCase());
+
+ if (kSuperblockClonerLogging) {
+ DumpInputSets();
+ }
// Find an area in the graph for which control flow information should be adjusted.
FindAndSetLocalAreaForAdjustments();
@@ -618,6 +807,19 @@ void SuperblockCloner::Run() {
// Connect the blocks together/remap successors and fix phis which are directly affected my the
// remapping.
RemapEdgesSuccessors();
+
+ // Check that the subgraph is connected.
+ if (kIsDebugBuild) {
+ HBasicBlockSet work_set(arena_, orig_bb_set_.GetSizeOf(), true, kArenaAllocSuperblockCloner);
+
+ // Add original and copy blocks of the subgraph to the work set.
+ for (auto iter : *bb_map_) {
+ work_set.SetBit(iter.first->GetBlockId()); // Original block.
+ work_set.SetBit(iter.second->GetBlockId()); // Copy block.
+ }
+ CHECK(IsSubgraphConnected(&work_set, graph_));
+ }
+
// Recalculate dominance and backedge information which is required by the next stage.
AdjustControlFlowInfo();
// Fix data flow of the graph.
@@ -650,6 +852,10 @@ void SuperblockCloner::CleanUp() {
}
}
}
+
+ if (kSuperblockClonerVerify) {
+ VerifyGraph();
+ }
}
HBasicBlock* SuperblockCloner::CloneBasicBlock(const HBasicBlock* orig_block) {
@@ -701,4 +907,125 @@ void SuperblockCloner::CloneBasicBlocks() {
}
}
+//
+// Stand-alone methods.
+//
+
+void CollectRemappingInfoForPeelUnroll(bool to_unroll,
+ HLoopInformation* loop_info,
+ HEdgeSet* remap_orig_internal,
+ HEdgeSet* remap_copy_internal,
+ HEdgeSet* remap_incoming) {
+ DCHECK(loop_info != nullptr);
+ HBasicBlock* loop_header = loop_info->GetHeader();
+ // Set up remap_orig_internal edges set - set is empty.
+ // Set up remap_copy_internal edges set.
+ for (HBasicBlock* back_edge_block : loop_info->GetBackEdges()) {
+ HEdge e = HEdge(back_edge_block, loop_header);
+ if (to_unroll) {
+ remap_orig_internal->Insert(e);
+ remap_copy_internal->Insert(e);
+ } else {
+ if (kPeelUnrollPreserveHeader) {
+ remap_copy_internal->Insert(e);
+ } else {
+ remap_orig_internal->Insert(e);
+ }
+ }
+ }
+
+ // Set up remap_incoming edges set.
+ if (to_unroll != kPeelUnrollPreserveHeader) {
+ remap_incoming->Insert(HEdge(loop_info->GetPreHeader(), loop_header));
+ }
+}
+
+bool IsSubgraphConnected(SuperblockCloner::HBasicBlockSet* work_set, HGraph* graph) {
+ ArenaVector<HBasicBlock*> entry_blocks(
+ graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+
+ // Find subgraph entry blocks.
+ for (uint32_t orig_block_id : work_set->Indexes()) {
+ HBasicBlock* block = graph->GetBlocks()[orig_block_id];
+ for (HBasicBlock* pred : block->GetPredecessors()) {
+ if (!work_set->IsBitSet(pred->GetBlockId())) {
+ entry_blocks.push_back(block);
+ break;
+ }
+ }
+ }
+
+ for (HBasicBlock* entry_block : entry_blocks) {
+ if (work_set->IsBitSet(entry_block->GetBlockId())) {
+ TraverseSubgraphForConnectivity(entry_block, work_set);
+ }
+ }
+
+ // Return whether there are unvisited - unreachable - blocks.
+ return work_set->NumSetBits() == 0;
+}
+
+HLoopInformation* FindCommonLoop(HLoopInformation* loop1, HLoopInformation* loop2) {
+ if (loop1 == nullptr || loop2 == nullptr) {
+ return nullptr;
+ }
+
+ if (loop1->IsIn(*loop2)) {
+ return loop2;
+ }
+
+ HLoopInformation* current = loop1;
+ while (current != nullptr && !loop2->IsIn(*current)) {
+ current = current->GetPreHeader()->GetLoopInformation();
+ }
+
+ return current;
+}
+
+bool PeelUnrollHelper::IsLoopClonable(HLoopInformation* loop_info) {
+ PeelUnrollHelper helper(loop_info, nullptr, nullptr);
+ return helper.IsLoopClonable();
+}
+
+HBasicBlock* PeelUnrollHelper::DoPeelUnrollImpl(bool to_unroll) {
+ // For now do peeling only for natural loops.
+ DCHECK(!loop_info_->IsIrreducible());
+
+ HBasicBlock* loop_header = loop_info_->GetHeader();
+ HGraph* graph = loop_header->GetGraph();
+ ArenaAllocator allocator(graph->GetAllocator()->GetArenaPool());
+
+ HEdgeSet remap_orig_internal(graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+ HEdgeSet remap_copy_internal(graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+ HEdgeSet remap_incoming(graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+
+ CollectRemappingInfoForPeelUnroll(to_unroll,
+ loop_info_,
+ &remap_orig_internal,
+ &remap_copy_internal,
+ &remap_incoming);
+
+ cloner_.SetSuccessorRemappingInfo(&remap_orig_internal, &remap_copy_internal, &remap_incoming);
+ cloner_.Run();
+ cloner_.CleanUp();
+
+ return kPeelUnrollPreserveHeader ? loop_header : cloner_.GetBlockCopy(loop_header);
+}
+
+PeelUnrollSimpleHelper::PeelUnrollSimpleHelper(HLoopInformation* info)
+ : bb_map_(std::less<HBasicBlock*>(),
+ info->GetHeader()->GetGraph()->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)),
+ hir_map_(std::less<HInstruction*>(),
+ info->GetHeader()->GetGraph()->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)),
+ helper_(info, &bb_map_, &hir_map_) {}
+
} // namespace art
+
+namespace std {
+
+ostream& operator<<(ostream& os, const art::HEdge& e) {
+ e.Dump(os);
+ return os;
+}
+
+} // namespace std
diff --git a/compiler/optimizing/superblock_cloner.h b/compiler/optimizing/superblock_cloner.h
index 23de692673..19c9dd471c 100644
--- a/compiler/optimizing/superblock_cloner.h
+++ b/compiler/optimizing/superblock_cloner.h
@@ -152,6 +152,15 @@ class SuperblockCloner : public ValueObject {
// TODO: Start from small range of graph patterns then extend it.
bool IsSubgraphClonable() const;
+ // Returns whether selected subgraph satisfies the criteria for fast data flow resolution
+ // when iterative DF algorithm is not required and dominators/instructions inputs can be
+ // trivially adjusted.
+ //
+ // TODO: formally describe the criteria.
+ //
+ // Loop peeling and unrolling satisfy the criteria.
+ bool IsFastCase() const;
+
// Runs the copy algorithm according to the description.
void Run();
@@ -202,11 +211,17 @@ class SuperblockCloner : public ValueObject {
return IsInOrigBBSet(block->GetBlockId());
}
+ // Returns the area (the most outer loop) in the graph for which control flow (back edges, loops,
+ // dominators) needs to be adjusted.
+ HLoopInformation* GetRegionToBeAdjusted() const {
+ return outer_loop_;
+ }
+
private:
// Fills the 'exits' vector with the subgraph exits.
void SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits);
- // Finds and records information about the area in the graph for which control-flow (back edges,
+ // Finds and records information about the area in the graph for which control flow (back edges,
// loops, dominators) needs to be adjusted.
void FindAndSetLocalAreaForAdjustments();
@@ -217,7 +232,7 @@ class SuperblockCloner : public ValueObject {
// phis' nor instructions' inputs values are resolved.
void RemapEdgesSuccessors();
- // Adjusts control-flow (back edges, loops, dominators) for the local area defined by
+ // Adjusts control flow (back edges, loops, dominators) for the local area defined by
// FindAndSetLocalAreaForAdjustments.
void AdjustControlFlowInfo();
@@ -272,6 +287,9 @@ class SuperblockCloner : public ValueObject {
// Debug and logging methods.
//
void CheckInstructionInputsRemapping(HInstruction* orig_instr);
+ bool CheckRemappingInfoIsValid();
+ void VerifyGraph();
+ void DumpInputSets();
HBasicBlock* GetBlockById(uint32_t block_id) const {
DCHECK(block_id < graph_->GetBlocks().size());
@@ -295,15 +313,94 @@ class SuperblockCloner : public ValueObject {
HBasicBlockMap* bb_map_;
// Correspondence map for instructions: (original HInstruction, copy HInstruction).
HInstructionMap* hir_map_;
- // Area in the graph for which control-flow (back edges, loops, dominators) needs to be adjusted.
+ // Area in the graph for which control flow (back edges, loops, dominators) needs to be adjusted.
HLoopInformation* outer_loop_;
HBasicBlockSet outer_loop_bb_set_;
ART_FRIEND_TEST(SuperblockClonerTest, AdjustControlFlowInfo);
+ ART_FRIEND_TEST(SuperblockClonerTest, IsGraphConnected);
DISALLOW_COPY_AND_ASSIGN(SuperblockCloner);
};
+// Helper class to perform loop peeling/unrolling.
+//
+// This helper should be used when correspondence map between original and copied
+// basic blocks/instructions are demanded.
+class PeelUnrollHelper : public ValueObject {
+ public:
+ explicit PeelUnrollHelper(HLoopInformation* info,
+ SuperblockCloner::HBasicBlockMap* bb_map,
+ SuperblockCloner::HInstructionMap* hir_map) :
+ loop_info_(info),
+ cloner_(info->GetHeader()->GetGraph(), &info->GetBlocks(), bb_map, hir_map) {
+ // For now do peeling/unrolling only for natural loops.
+ DCHECK(!info->IsIrreducible());
+ }
+
+ // Returns whether the loop can be peeled/unrolled (static function).
+ static bool IsLoopClonable(HLoopInformation* loop_info);
+
+ // Returns whether the loop can be peeled/unrolled.
+ bool IsLoopClonable() const { return cloner_.IsSubgraphClonable(); }
+
+ HBasicBlock* DoPeeling() { return DoPeelUnrollImpl(/* to_unroll */ false); }
+ HBasicBlock* DoUnrolling() { return DoPeelUnrollImpl(/* to_unroll */ true); }
+ HLoopInformation* GetRegionToBeAdjusted() const { return cloner_.GetRegionToBeAdjusted(); }
+
+ protected:
+ // Applies loop peeling/unrolling for the loop specified by 'loop_info'.
+ //
+ // Depending on 'do_unroll' either unrolls loop by 2 or peels one iteration from it.
+ HBasicBlock* DoPeelUnrollImpl(bool to_unroll);
+
+ private:
+ HLoopInformation* loop_info_;
+ SuperblockCloner cloner_;
+
+ DISALLOW_COPY_AND_ASSIGN(PeelUnrollHelper);
+};
+
+// Helper class to perform loop peeling/unrolling.
+//
+// This helper should be used when there is no need to get correspondence information between
+// original and copied basic blocks/instructions.
+class PeelUnrollSimpleHelper : public ValueObject {
+ public:
+ explicit PeelUnrollSimpleHelper(HLoopInformation* info);
+ bool IsLoopClonable() const { return helper_.IsLoopClonable(); }
+ HBasicBlock* DoPeeling() { return helper_.DoPeeling(); }
+ HBasicBlock* DoUnrolling() { return helper_.DoUnrolling(); }
+ HLoopInformation* GetRegionToBeAdjusted() const { return helper_.GetRegionToBeAdjusted(); }
+
+ private:
+ SuperblockCloner::HBasicBlockMap bb_map_;
+ SuperblockCloner::HInstructionMap hir_map_;
+ PeelUnrollHelper helper_;
+
+ DISALLOW_COPY_AND_ASSIGN(PeelUnrollSimpleHelper);
+};
+
+// Collects edge remapping info for loop peeling/unrolling for the loop specified by loop info.
+void CollectRemappingInfoForPeelUnroll(bool to_unroll,
+ HLoopInformation* loop_info,
+ SuperblockCloner::HEdgeSet* remap_orig_internal,
+ SuperblockCloner::HEdgeSet* remap_copy_internal,
+ SuperblockCloner::HEdgeSet* remap_incoming);
+
+// Returns whether blocks from 'work_set' are reachable from the rest of the graph.
+//
+// Returns whether such a set 'outer_entries' of basic blocks exists that:
+// - each block from 'outer_entries' is not from 'work_set'.
+// - each block from 'work_set' is reachable from at least one block from 'outer_entries'.
+//
+// After the function returns work_set contains only blocks from the original 'work_set'
+// which are unreachable from the rest of the graph.
+bool IsSubgraphConnected(SuperblockCloner::HBasicBlockSet* work_set, HGraph* graph);
+
+// Returns a common predecessor of loop1 and loop2 in the loop tree or nullptr if it is the whole
+// graph.
+HLoopInformation* FindCommonLoop(HLoopInformation* loop1, HLoopInformation* loop2);
} // namespace art
namespace std {
@@ -312,11 +409,12 @@ template <>
struct hash<art::HEdge> {
size_t operator()(art::HEdge const& x) const noexcept {
// Use Cantor pairing function as the hash function.
- uint32_t a = x.GetFrom();
- uint32_t b = x.GetTo();
+ size_t a = x.GetFrom();
+ size_t b = x.GetTo();
return (a + b) * (a + b + 1) / 2 + b;
}
};
+ostream& operator<<(ostream& os, const art::HEdge& e);
} // namespace std
diff --git a/compiler/optimizing/superblock_cloner_test.cc b/compiler/optimizing/superblock_cloner_test.cc
index f1b7bffdf5..df2e517aff 100644
--- a/compiler/optimizing/superblock_cloner_test.cc
+++ b/compiler/optimizing/superblock_cloner_test.cc
@@ -25,52 +25,65 @@ namespace art {
using HBasicBlockMap = SuperblockCloner::HBasicBlockMap;
using HInstructionMap = SuperblockCloner::HInstructionMap;
+using HBasicBlockSet = SuperblockCloner::HBasicBlockSet;
+using HEdgeSet = SuperblockCloner::HEdgeSet;
// This class provides methods and helpers for testing various cloning and copying routines:
// individual instruction cloning and cloning of the more coarse-grain structures.
class SuperblockClonerTest : public OptimizingUnitTest {
public:
- SuperblockClonerTest()
- : graph_(CreateGraph()), entry_block_(nullptr), exit_block_(nullptr), parameter_(nullptr) {}
+ SuperblockClonerTest() : graph_(CreateGraph()),
+ entry_block_(nullptr),
+ return_block_(nullptr),
+ exit_block_(nullptr),
+ parameter_(nullptr) {}
- void CreateBasicLoopControlFlow(/* out */ HBasicBlock** header_p,
- /* out */ HBasicBlock** body_p) {
+ void InitGraph() {
entry_block_ = new (GetAllocator()) HBasicBlock(graph_);
graph_->AddBlock(entry_block_);
graph_->SetEntryBlock(entry_block_);
+ return_block_ = new (GetAllocator()) HBasicBlock(graph_);
+ graph_->AddBlock(return_block_);
+
+ exit_block_ = new (GetAllocator()) HBasicBlock(graph_);
+ graph_->AddBlock(exit_block_);
+ graph_->SetExitBlock(exit_block_);
+
+ entry_block_->AddSuccessor(return_block_);
+ return_block_->AddSuccessor(exit_block_);
+
+ parameter_ = new (GetAllocator()) HParameterValue(graph_->GetDexFile(),
+ dex::TypeIndex(0),
+ 0,
+ DataType::Type::kInt32);
+ entry_block_->AddInstruction(parameter_);
+ return_block_->AddInstruction(new (GetAllocator()) HReturnVoid());
+ exit_block_->AddInstruction(new (GetAllocator()) HExit());
+ }
+
+ void CreateBasicLoopControlFlow(HBasicBlock* position,
+ HBasicBlock* successor,
+ /* out */ HBasicBlock** header_p,
+ /* out */ HBasicBlock** body_p) {
HBasicBlock* loop_preheader = new (GetAllocator()) HBasicBlock(graph_);
HBasicBlock* loop_header = new (GetAllocator()) HBasicBlock(graph_);
HBasicBlock* loop_body = new (GetAllocator()) HBasicBlock(graph_);
- HBasicBlock* loop_exit = new (GetAllocator()) HBasicBlock(graph_);
graph_->AddBlock(loop_preheader);
graph_->AddBlock(loop_header);
graph_->AddBlock(loop_body);
- graph_->AddBlock(loop_exit);
- exit_block_ = new (GetAllocator()) HBasicBlock(graph_);
- graph_->AddBlock(exit_block_);
- graph_->SetExitBlock(exit_block_);
+ position->ReplaceSuccessor(successor, loop_preheader);
- entry_block_->AddSuccessor(loop_preheader);
loop_preheader->AddSuccessor(loop_header);
// Loop exit first to have a proper exit condition/target for HIf.
- loop_header->AddSuccessor(loop_exit);
+ loop_header->AddSuccessor(successor);
loop_header->AddSuccessor(loop_body);
loop_body->AddSuccessor(loop_header);
- loop_exit->AddSuccessor(exit_block_);
*header_p = loop_header;
*body_p = loop_body;
-
- parameter_ = new (GetAllocator()) HParameterValue(graph_->GetDexFile(),
- dex::TypeIndex(0),
- 0,
- DataType::Type::kInt32);
- entry_block_->AddInstruction(parameter_);
- loop_exit->AddInstruction(new (GetAllocator()) HReturnVoid());
- exit_block_->AddInstruction(new (GetAllocator()) HExit());
}
void CreateBasicLoopDataFlow(HBasicBlock* loop_header, HBasicBlock* loop_body) {
@@ -84,11 +97,12 @@ class SuperblockClonerTest : public OptimizingUnitTest {
// Header block.
HPhi* phi = new (GetAllocator()) HPhi(GetAllocator(), 0, 0, DataType::Type::kInt32);
HInstruction* suspend_check = new (GetAllocator()) HSuspendCheck();
+ HInstruction* loop_check = new (GetAllocator()) HGreaterThanOrEqual(phi, const_128);
loop_header->AddPhi(phi);
loop_header->AddInstruction(suspend_check);
- loop_header->AddInstruction(new (GetAllocator()) HGreaterThanOrEqual(phi, const_128));
- loop_header->AddInstruction(new (GetAllocator()) HIf(parameter_));
+ loop_header->AddInstruction(loop_check);
+ loop_header->AddInstruction(new (GetAllocator()) HIf(loop_check));
// Loop body block.
HInstruction* null_check = new (GetAllocator()) HNullCheck(parameter_, dex_pc);
@@ -97,8 +111,8 @@ class SuperblockClonerTest : public OptimizingUnitTest {
HInstruction* array_get =
new (GetAllocator()) HArrayGet(null_check, bounds_check, DataType::Type::kInt32, dex_pc);
HInstruction* add = new (GetAllocator()) HAdd(DataType::Type::kInt32, array_get, const_1);
- HInstruction* array_set =
- new (GetAllocator()) HArraySet(null_check, bounds_check, add, DataType::Type::kInt32, dex_pc);
+ HInstruction* array_set = new (GetAllocator()) HArraySet(
+ null_check, bounds_check, add, DataType::Type::kInt32, dex_pc);
HInstruction* induction_inc = new (GetAllocator()) HAdd(DataType::Type::kInt32, phi, const_1);
loop_body->AddInstruction(null_check);
@@ -153,6 +167,7 @@ class SuperblockClonerTest : public OptimizingUnitTest {
HGraph* graph_;
HBasicBlock* entry_block_;
+ HBasicBlock* return_block_;
HBasicBlock* exit_block_;
HInstruction* parameter_;
@@ -162,10 +177,11 @@ TEST_F(SuperblockClonerTest, IndividualInstrCloner) {
HBasicBlock* header = nullptr;
HBasicBlock* loop_body = nullptr;
- CreateBasicLoopControlFlow(&header, &loop_body);
+ InitGraph();
+ CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
CreateBasicLoopDataFlow(header, loop_body);
graph_->BuildDominatorTree();
- ASSERT_TRUE(CheckGraph());
+ EXPECT_TRUE(CheckGraph());
HSuspendCheck* old_suspend_check = header->GetLoopInformation()->GetSuspendCheck();
CloneAndReplaceInstructionVisitor visitor(graph_);
@@ -193,7 +209,8 @@ TEST_F(SuperblockClonerTest, CloneBasicBlocks) {
HBasicBlock* loop_body = nullptr;
ArenaAllocator* arena = graph_->GetAllocator();
- CreateBasicLoopControlFlow(&header, &loop_body);
+ InitGraph();
+ CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
CreateBasicLoopDataFlow(header, loop_body);
graph_->BuildDominatorTree();
ASSERT_TRUE(CheckGraph());
@@ -272,7 +289,8 @@ TEST_F(SuperblockClonerTest, AdjustControlFlowInfo) {
HBasicBlock* loop_body = nullptr;
ArenaAllocator* arena = graph_->GetAllocator();
- CreateBasicLoopControlFlow(&header, &loop_body);
+ InitGraph();
+ CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
CreateBasicLoopDataFlow(header, loop_body);
graph_->BuildDominatorTree();
ASSERT_TRUE(CheckGraph());
@@ -303,4 +321,487 @@ TEST_F(SuperblockClonerTest, AdjustControlFlowInfo) {
EXPECT_TRUE(loop_info->IsBackEdge(*loop_body));
}
+// Tests IsSubgraphConnected function for negative case.
+TEST_F(SuperblockClonerTest, IsGraphConnected) {
+ HBasicBlock* header = nullptr;
+ HBasicBlock* loop_body = nullptr;
+ ArenaAllocator* arena = graph_->GetAllocator();
+
+ InitGraph();
+ CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ HBasicBlock* unreachable_block = new (GetAllocator()) HBasicBlock(graph_);
+ graph_->AddBlock(unreachable_block);
+
+ HBasicBlockSet bb_set(
+ arena, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner);
+ bb_set.SetBit(header->GetBlockId());
+ bb_set.SetBit(loop_body->GetBlockId());
+ bb_set.SetBit(unreachable_block->GetBlockId());
+
+ EXPECT_FALSE(IsSubgraphConnected(&bb_set, graph_));
+ EXPECT_EQ(bb_set.NumSetBits(), 1u);
+ EXPECT_TRUE(bb_set.IsBitSet(unreachable_block->GetBlockId()));
+}
+
+// Tests SuperblockCloner for loop peeling case.
+//
+// Control Flow of the example (ignoring critical edges splitting).
+//
+// Before After
+//
+// |B| |B|
+// | |
+// v v
+// |1| |1|
+// | |
+// v v
+// |2|<-\ (6) |2A|
+// / \ / / \
+// v v/ / v
+// |4| |3| / |3A| (7)
+// | / /
+// v | v
+// |E| \ |2|<-\
+// \ / \ /
+// v v /
+// |4| |3|
+// |
+// v
+// |E|
+TEST_F(SuperblockClonerTest, LoopPeeling) {
+ HBasicBlock* header = nullptr;
+ HBasicBlock* loop_body = nullptr;
+
+ InitGraph();
+ CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ graph_->BuildDominatorTree();
+ EXPECT_TRUE(CheckGraph());
+
+ HBasicBlockMap bb_map(
+ std::less<HBasicBlock*>(), graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+ HInstructionMap hir_map(
+ std::less<HInstruction*>(), graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+
+ HLoopInformation* loop_info = header->GetLoopInformation();
+ PeelUnrollHelper helper(loop_info, &bb_map, &hir_map);
+ EXPECT_TRUE(helper.IsLoopClonable());
+ HBasicBlock* new_header = helper.DoPeeling();
+ HLoopInformation* new_loop_info = new_header->GetLoopInformation();
+
+ EXPECT_TRUE(CheckGraph());
+
+ // Check loop body successors.
+ EXPECT_EQ(loop_body->GetSingleSuccessor(), header);
+ EXPECT_EQ(bb_map.Get(loop_body)->GetSingleSuccessor(), header);
+
+ // Check loop structure.
+ EXPECT_EQ(header, new_header);
+ EXPECT_EQ(new_loop_info->GetHeader(), header);
+ EXPECT_EQ(new_loop_info->GetBackEdges().size(), 1u);
+ EXPECT_EQ(new_loop_info->GetBackEdges()[0], loop_body);
+}
+
+// Tests SuperblockCloner for loop unrolling case.
+//
+// Control Flow of the example (ignoring critical edges splitting).
+//
+// Before After
+//
+// |B| |B|
+// | |
+// v v
+// |1| |1|
+// | |
+// v v
+// |2|<-\ (6) |2A|<-\
+// / \ / / \ \
+// v v/ / v \
+// |4| |3| /(7)|3A| |
+// | / / /
+// v | v /
+// |E| \ |2| /
+// \ / \ /
+// v v/
+// |4| |3|
+// |
+// v
+// |E|
+TEST_F(SuperblockClonerTest, LoopUnrolling) {
+ HBasicBlock* header = nullptr;
+ HBasicBlock* loop_body = nullptr;
+
+ InitGraph();
+ CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ graph_->BuildDominatorTree();
+ EXPECT_TRUE(CheckGraph());
+
+ HBasicBlockMap bb_map(
+ std::less<HBasicBlock*>(), graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+ HInstructionMap hir_map(
+ std::less<HInstruction*>(), graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+
+ HLoopInformation* loop_info = header->GetLoopInformation();
+ PeelUnrollHelper helper(loop_info, &bb_map, &hir_map);
+ EXPECT_TRUE(helper.IsLoopClonable());
+ HBasicBlock* new_header = helper.DoUnrolling();
+
+ EXPECT_TRUE(CheckGraph());
+
+ // Check loop body successors.
+ EXPECT_EQ(loop_body->GetSingleSuccessor(), bb_map.Get(header));
+ EXPECT_EQ(bb_map.Get(loop_body)->GetSingleSuccessor(), header);
+
+ // Check loop structure.
+ EXPECT_EQ(header, new_header);
+ EXPECT_EQ(loop_info, new_header->GetLoopInformation());
+ EXPECT_EQ(loop_info->GetHeader(), new_header);
+ EXPECT_EQ(loop_info->GetBackEdges().size(), 1u);
+ EXPECT_EQ(loop_info->GetBackEdges()[0], bb_map.Get(loop_body));
+}
+
+// Checks that loop unrolling works fine for a loop with multiple back edges. Tests that after
+// the transformation the loop has a single preheader.
+TEST_F(SuperblockClonerTest, LoopPeelingMultipleBackEdges) {
+ HBasicBlock* header = nullptr;
+ HBasicBlock* loop_body = nullptr;
+
+ InitGraph();
+ CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+
+ // Transform a basic loop to have multiple back edges.
+ HBasicBlock* latch = header->GetSuccessors()[1];
+ HBasicBlock* if_block = new (GetAllocator()) HBasicBlock(graph_);
+ HBasicBlock* temp1 = new (GetAllocator()) HBasicBlock(graph_);
+ graph_->AddBlock(if_block);
+ graph_->AddBlock(temp1);
+ header->ReplaceSuccessor(latch, if_block);
+ if_block->AddSuccessor(latch);
+ if_block->AddSuccessor(temp1);
+ temp1->AddSuccessor(header);
+
+ if_block->AddInstruction(new (GetAllocator()) HIf(parameter_));
+
+ HInstructionIterator it(header->GetPhis());
+ DCHECK(!it.Done());
+ HPhi* loop_phi = it.Current()->AsPhi();
+ HInstruction* temp_add = new (GetAllocator()) HAdd(DataType::Type::kInt32,
+ loop_phi,
+ graph_->GetIntConstant(2));
+ temp1->AddInstruction(temp_add);
+ temp1->AddInstruction(new (GetAllocator()) HGoto());
+ loop_phi->AddInput(temp_add);
+
+ graph_->BuildDominatorTree();
+ EXPECT_TRUE(CheckGraph());
+
+ HLoopInformation* loop_info = header->GetLoopInformation();
+ PeelUnrollSimpleHelper helper(loop_info);
+ HBasicBlock* new_header = helper.DoPeeling();
+ EXPECT_EQ(header, new_header);
+
+ EXPECT_TRUE(CheckGraph());
+ EXPECT_EQ(header->GetPredecessors().size(), 3u);
+}
+
+static void CheckLoopStructureForLoopPeelingNested(HBasicBlock* loop1_header,
+ HBasicBlock* loop2_header,
+ HBasicBlock* loop3_header) {
+ EXPECT_EQ(loop1_header->GetLoopInformation()->GetHeader(), loop1_header);
+ EXPECT_EQ(loop2_header->GetLoopInformation()->GetHeader(), loop2_header);
+ EXPECT_EQ(loop3_header->GetLoopInformation()->GetHeader(), loop3_header);
+ EXPECT_EQ(loop1_header->GetLoopInformation()->GetPreHeader()->GetLoopInformation(), nullptr);
+ EXPECT_EQ(loop2_header->GetLoopInformation()->GetPreHeader()->GetLoopInformation(), nullptr);
+ EXPECT_EQ(loop3_header->GetLoopInformation()->GetPreHeader()->GetLoopInformation()->GetHeader(),
+ loop2_header);
+}
+
+TEST_F(SuperblockClonerTest, LoopPeelingNested) {
+ HBasicBlock* header = nullptr;
+ HBasicBlock* loop_body = nullptr;
+
+ InitGraph();
+
+ // Create the following nested structure of loops
+ // Headers: 1 2 3
+ // [ ], [ [ ] ]
+ CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ HBasicBlock* loop1_header = header;
+
+ CreateBasicLoopControlFlow(header, return_block_, &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ HBasicBlock* loop2_header = header;
+
+ CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ HBasicBlock* loop3_header = header;
+
+ graph_->BuildDominatorTree();
+ EXPECT_TRUE(CheckGraph());
+
+ HLoopInformation* loop2_info_before = loop2_header->GetLoopInformation();
+ HLoopInformation* loop3_info_before = loop3_header->GetLoopInformation();
+
+ // Check nested loops structure.
+ CheckLoopStructureForLoopPeelingNested(loop1_header, loop2_header, loop3_header);
+ PeelUnrollSimpleHelper helper(loop1_header->GetLoopInformation());
+ helper.DoPeeling();
+ // Check that nested loops structure has not changed after the transformation.
+ CheckLoopStructureForLoopPeelingNested(loop1_header, loop2_header, loop3_header);
+
+ // Test that the loop info is preserved.
+ EXPECT_EQ(loop2_info_before, loop2_header->GetLoopInformation());
+ EXPECT_EQ(loop3_info_before, loop3_header->GetLoopInformation());
+
+ EXPECT_EQ(loop3_info_before->GetPreHeader()->GetLoopInformation(), loop2_info_before);
+ EXPECT_EQ(loop2_info_before->GetPreHeader()->GetLoopInformation(), nullptr);
+
+ EXPECT_EQ(helper.GetRegionToBeAdjusted(), nullptr);
+
+ EXPECT_TRUE(CheckGraph());
+}
+
+// Checks that the loop population is correctly propagated after an inner loop is peeled.
+TEST_F(SuperblockClonerTest, OuterLoopPopulationAfterInnerPeeled) {
+ HBasicBlock* header = nullptr;
+ HBasicBlock* loop_body = nullptr;
+
+ InitGraph();
+
+ // Create the following nested structure of loops
+ // Headers: 1 2 3 4
+ // [ [ [ ] ] ], [ ]
+ CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ HBasicBlock* loop1_header = header;
+
+ CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ HBasicBlock* loop2_header = header;
+
+ CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ HBasicBlock* loop3_header = header;
+
+ CreateBasicLoopControlFlow(loop1_header, return_block_, &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ HBasicBlock* loop4_header = header;
+
+ graph_->BuildDominatorTree();
+ EXPECT_TRUE(CheckGraph());
+
+ PeelUnrollSimpleHelper helper(loop3_header->GetLoopInformation());
+ helper.DoPeeling();
+ HLoopInformation* loop1 = loop1_header->GetLoopInformation();
+ HLoopInformation* loop2 = loop2_header->GetLoopInformation();
+ HLoopInformation* loop3 = loop3_header->GetLoopInformation();
+ HLoopInformation* loop4 = loop4_header->GetLoopInformation();
+
+ EXPECT_TRUE(loop1->Contains(*loop2_header));
+ EXPECT_TRUE(loop1->Contains(*loop3_header));
+ EXPECT_TRUE(loop1->Contains(*loop3_header->GetLoopInformation()->GetPreHeader()));
+
+ // Check that loop4 info has not been touched after local run of AnalyzeLoops.
+ EXPECT_EQ(loop4, loop4_header->GetLoopInformation());
+
+ EXPECT_TRUE(loop1->IsIn(*loop1));
+ EXPECT_TRUE(loop2->IsIn(*loop1));
+ EXPECT_TRUE(loop3->IsIn(*loop1));
+ EXPECT_TRUE(loop3->IsIn(*loop2));
+ EXPECT_TRUE(!loop4->IsIn(*loop1));
+
+ EXPECT_EQ(loop4->GetPreHeader()->GetLoopInformation(), nullptr);
+
+ EXPECT_EQ(helper.GetRegionToBeAdjusted(), loop2);
+
+ EXPECT_TRUE(CheckGraph());
+}
+
+// Checks the case when inner loop have an exit not to its immediate outer_loop but some other loop
+// in the hierarchy. Loop population information must be valid after loop peeling.
+TEST_F(SuperblockClonerTest, NestedCaseExitToOutermost) {
+ HBasicBlock* header = nullptr;
+ HBasicBlock* loop_body = nullptr;
+
+ InitGraph();
+
+ // Create the following nested structure of loops then peel loop3.
+ // Headers: 1 2 3
+ // [ [ [ ] ] ]
+ CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ HBasicBlock* loop1_header = header;
+ HBasicBlock* loop_body1 = loop_body;
+
+ CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+
+ CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ HBasicBlock* loop3_header = header;
+ HBasicBlock* loop_body3 = loop_body;
+
+ // Change the loop3 - insert an exit which leads to loop1.
+ HBasicBlock* loop3_extra_if_block = new (GetAllocator()) HBasicBlock(graph_);
+ graph_->AddBlock(loop3_extra_if_block);
+ loop3_extra_if_block->AddInstruction(new (GetAllocator()) HIf(parameter_));
+
+ loop3_header->ReplaceSuccessor(loop_body3, loop3_extra_if_block);
+ loop3_extra_if_block->AddSuccessor(loop_body1); // Long exit.
+ loop3_extra_if_block->AddSuccessor(loop_body3);
+
+ graph_->BuildDominatorTree();
+ EXPECT_TRUE(CheckGraph());
+
+ HBasicBlock* loop3_long_exit = loop3_extra_if_block->GetSuccessors()[0];
+ EXPECT_TRUE(loop1_header->GetLoopInformation()->Contains(*loop3_long_exit));
+
+ PeelUnrollSimpleHelper helper(loop3_header->GetLoopInformation());
+ helper.DoPeeling();
+
+ HLoopInformation* loop1 = loop1_header->GetLoopInformation();
+ // Check that after the transformation the local area for CF adjustments has been chosen
+ // correctly and loop population has been updated.
+ loop3_long_exit = loop3_extra_if_block->GetSuccessors()[0];
+ EXPECT_TRUE(loop1->Contains(*loop3_long_exit));
+
+ EXPECT_EQ(helper.GetRegionToBeAdjusted(), loop1);
+
+ EXPECT_TRUE(loop1->Contains(*loop3_header));
+ EXPECT_TRUE(loop1->Contains(*loop3_header->GetLoopInformation()->GetPreHeader()));
+
+ EXPECT_TRUE(CheckGraph());
+}
+
+TEST_F(SuperblockClonerTest, FastCaseCheck) {
+ HBasicBlock* header = nullptr;
+ HBasicBlock* loop_body = nullptr;
+ ArenaAllocator* arena = graph_->GetAllocator();
+
+ InitGraph();
+ CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ graph_->BuildDominatorTree();
+
+ HLoopInformation* loop_info = header->GetLoopInformation();
+
+ ArenaBitVector orig_bb_set(
+ arena, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner);
+ orig_bb_set.Union(&loop_info->GetBlocks());
+
+ HEdgeSet remap_orig_internal(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+ HEdgeSet remap_copy_internal(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+ HEdgeSet remap_incoming(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+
+ CollectRemappingInfoForPeelUnroll(true,
+ loop_info,
+ &remap_orig_internal,
+ &remap_copy_internal,
+ &remap_incoming);
+
+ // Insert some extra nodes and edges.
+ HBasicBlock* preheader = loop_info->GetPreHeader();
+ orig_bb_set.SetBit(preheader->GetBlockId());
+
+ // Adjust incoming edges.
+ remap_incoming.Clear();
+ remap_incoming.Insert(HEdge(preheader->GetSinglePredecessor(), preheader));
+
+ HBasicBlockMap bb_map(std::less<HBasicBlock*>(), arena->Adapter(kArenaAllocSuperblockCloner));
+ HInstructionMap hir_map(std::less<HInstruction*>(), arena->Adapter(kArenaAllocSuperblockCloner));
+
+ SuperblockCloner cloner(graph_,
+ &orig_bb_set,
+ &bb_map,
+ &hir_map);
+ cloner.SetSuccessorRemappingInfo(&remap_orig_internal, &remap_copy_internal, &remap_incoming);
+
+ EXPECT_FALSE(cloner.IsFastCase());
+}
+
+// Helper for FindCommonLoop which also check that FindCommonLoop is symmetric.
+static HLoopInformation* FindCommonLoopCheck(HLoopInformation* loop1, HLoopInformation* loop2) {
+ HLoopInformation* common_loop12 = FindCommonLoop(loop1, loop2);
+ HLoopInformation* common_loop21 = FindCommonLoop(loop2, loop1);
+ EXPECT_EQ(common_loop21, common_loop12);
+ return common_loop12;
+}
+
+// Tests FindCommonLoop function on a loop nest.
+TEST_F(SuperblockClonerTest, FindCommonLoop) {
+ HBasicBlock* header = nullptr;
+ HBasicBlock* loop_body = nullptr;
+
+ InitGraph();
+
+ // Create the following nested structure of loops
+ // Headers: 1 2 3 4 5
+ // [ [ [ ] ], [ ] ], [ ]
+ CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ HBasicBlock* loop1_header = header;
+
+ CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ HBasicBlock* loop2_header = header;
+
+ CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ HBasicBlock* loop3_header = header;
+
+ CreateBasicLoopControlFlow(loop2_header, loop2_header->GetSuccessors()[0], &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ HBasicBlock* loop4_header = header;
+
+ CreateBasicLoopControlFlow(loop1_header, return_block_, &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ HBasicBlock* loop5_header = header;
+
+ graph_->BuildDominatorTree();
+ EXPECT_TRUE(CheckGraph());
+
+ HLoopInformation* loop1 = loop1_header->GetLoopInformation();
+ HLoopInformation* loop2 = loop2_header->GetLoopInformation();
+ HLoopInformation* loop3 = loop3_header->GetLoopInformation();
+ HLoopInformation* loop4 = loop4_header->GetLoopInformation();
+ HLoopInformation* loop5 = loop5_header->GetLoopInformation();
+
+ EXPECT_TRUE(loop1->IsIn(*loop1));
+ EXPECT_TRUE(loop2->IsIn(*loop1));
+ EXPECT_TRUE(loop3->IsIn(*loop1));
+ EXPECT_TRUE(loop3->IsIn(*loop2));
+ EXPECT_TRUE(loop4->IsIn(*loop1));
+
+ EXPECT_FALSE(loop5->IsIn(*loop1));
+ EXPECT_FALSE(loop4->IsIn(*loop2));
+ EXPECT_FALSE(loop4->IsIn(*loop3));
+
+ EXPECT_EQ(loop1->GetPreHeader()->GetLoopInformation(), nullptr);
+ EXPECT_EQ(loop4->GetPreHeader()->GetLoopInformation(), loop1);
+
+ EXPECT_EQ(FindCommonLoopCheck(nullptr, nullptr), nullptr);
+ EXPECT_EQ(FindCommonLoopCheck(loop2, nullptr), nullptr);
+
+ EXPECT_EQ(FindCommonLoopCheck(loop1, loop1), loop1);
+ EXPECT_EQ(FindCommonLoopCheck(loop1, loop2), loop1);
+ EXPECT_EQ(FindCommonLoopCheck(loop1, loop3), loop1);
+ EXPECT_EQ(FindCommonLoopCheck(loop1, loop4), loop1);
+ EXPECT_EQ(FindCommonLoopCheck(loop1, loop5), nullptr);
+
+ EXPECT_EQ(FindCommonLoopCheck(loop2, loop3), loop2);
+ EXPECT_EQ(FindCommonLoopCheck(loop2, loop4), loop1);
+ EXPECT_EQ(FindCommonLoopCheck(loop2, loop5), nullptr);
+
+ EXPECT_EQ(FindCommonLoopCheck(loop3, loop4), loop1);
+ EXPECT_EQ(FindCommonLoopCheck(loop3, loop5), nullptr);
+
+ EXPECT_EQ(FindCommonLoopCheck(loop4, loop5), nullptr);
+
+ EXPECT_EQ(FindCommonLoopCheck(loop5, loop5), loop5);
+}
+
} // namespace art
diff --git a/compiler/utils/assembler_test_base.h b/compiler/utils/assembler_test_base.h
index 1482210ac4..778a01566c 100644
--- a/compiler/utils/assembler_test_base.h
+++ b/compiler/utils/assembler_test_base.h
@@ -25,9 +25,9 @@
#include "android-base/strings.h"
+#include "base/utils.h"
#include "common_runtime_test.h" // For ScratchFile
#include "exec_utils.h"
-#include "utils.h"
namespace art {
diff --git a/compiler/utils/atomic_dex_ref_map.h b/compiler/utils/atomic_dex_ref_map.h
index cabd79e9f1..fc2437999e 100644
--- a/compiler/utils/atomic_dex_ref_map.h
+++ b/compiler/utils/atomic_dex_ref_map.h
@@ -17,7 +17,7 @@
#ifndef ART_COMPILER_UTILS_ATOMIC_DEX_REF_MAP_H_
#define ART_COMPILER_UTILS_ATOMIC_DEX_REF_MAP_H_
-#include "atomic.h"
+#include "base/atomic.h"
#include "base/dchecked_vector.h"
#include "base/safe_map.h"
#include "dex/dex_file_reference.h"
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index 2218ef9af2..b2ad490a57 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -2793,6 +2793,26 @@ void MipsAssembler::Hadd_uD(VectorRegister wd, VectorRegister ws, VectorRegister
DsFsmInstr(EmitMsa3R(0x5, 0x3, wt, ws, wd, 0x15)).FprOuts(wd).FprIns(ws, wt);
}
+void MipsAssembler::PcntB(VectorRegister wd, VectorRegister ws) {
+ CHECK(HasMsa());
+ DsFsmInstr(EmitMsa2R(0xc1, 0x0, ws, wd, 0x1e)).FprOuts(wd).FprIns(ws);
+}
+
+void MipsAssembler::PcntH(VectorRegister wd, VectorRegister ws) {
+ CHECK(HasMsa());
+ DsFsmInstr(EmitMsa2R(0xc1, 0x1, ws, wd, 0x1e)).FprOuts(wd).FprIns(ws);
+}
+
+void MipsAssembler::PcntW(VectorRegister wd, VectorRegister ws) {
+ CHECK(HasMsa());
+ DsFsmInstr(EmitMsa2R(0xc1, 0x2, ws, wd, 0x1e)).FprOuts(wd).FprIns(ws);
+}
+
+void MipsAssembler::PcntD(VectorRegister wd, VectorRegister ws) {
+ CHECK(HasMsa());
+ DsFsmInstr(EmitMsa2R(0xc1, 0x3, ws, wd, 0x1e)).FprOuts(wd).FprIns(ws);
+}
+
void MipsAssembler::ReplicateFPToVectorRegister(VectorRegister dst,
FRegister src,
bool is_double) {
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index 7de8e2e366..c6ce62b4f4 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -756,6 +756,11 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi
void Hadd_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
void Hadd_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+ void PcntB(VectorRegister wd, VectorRegister ws);
+ void PcntH(VectorRegister wd, VectorRegister ws);
+ void PcntW(VectorRegister wd, VectorRegister ws);
+ void PcntD(VectorRegister wd, VectorRegister ws);
+
// Helper for replicating floating point value in all destination elements.
void ReplicateFPToVectorRegister(VectorRegister dst, FRegister src, bool is_double);
diff --git a/compiler/utils/mips/assembler_mips32r6_test.cc b/compiler/utils/mips/assembler_mips32r6_test.cc
index 937ee25bcb..691c33f3e7 100644
--- a/compiler/utils/mips/assembler_mips32r6_test.cc
+++ b/compiler/utils/mips/assembler_mips32r6_test.cc
@@ -2277,6 +2277,22 @@ TEST_F(AssemblerMIPS32r6Test, FillW) {
DriverStr(RepeatVR(&mips::MipsAssembler::FillW, "fill.w ${reg1}, ${reg2}"), "fill.w");
}
+TEST_F(AssemblerMIPS32r6Test, PcntB) {
+ DriverStr(RepeatVV(&mips::MipsAssembler::PcntB, "pcnt.b ${reg1}, ${reg2}"), "pcnt.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, PcntH) {
+ DriverStr(RepeatVV(&mips::MipsAssembler::PcntH, "pcnt.h ${reg1}, ${reg2}"), "pcnt.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, PcntW) {
+ DriverStr(RepeatVV(&mips::MipsAssembler::PcntW, "pcnt.w ${reg1}, ${reg2}"), "pcnt.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, PcntD) {
+ DriverStr(RepeatVV(&mips::MipsAssembler::PcntD, "pcnt.d ${reg1}, ${reg2}"), "pcnt.d");
+}
+
TEST_F(AssemblerMIPS32r6Test, LdiB) {
DriverStr(RepeatVIb(&mips::MipsAssembler::LdiB, -8, "ldi.b ${reg}, {imm}"), "ldi.b");
}
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index e1b0e75108..5a817fa960 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -2279,6 +2279,26 @@ void Mips64Assembler::Hadd_uD(VectorRegister wd, VectorRegister ws, VectorRegist
EmitMsa3R(0x5, 0x3, wt, ws, wd, 0x15);
}
+void Mips64Assembler::PcntB(VectorRegister wd, VectorRegister ws) {
+ CHECK(HasMsa());
+ EmitMsa2R(0xc1, 0x0, ws, wd, 0x1e);
+}
+
+void Mips64Assembler::PcntH(VectorRegister wd, VectorRegister ws) {
+ CHECK(HasMsa());
+ EmitMsa2R(0xc1, 0x1, ws, wd, 0x1e);
+}
+
+void Mips64Assembler::PcntW(VectorRegister wd, VectorRegister ws) {
+ CHECK(HasMsa());
+ EmitMsa2R(0xc1, 0x2, ws, wd, 0x1e);
+}
+
+void Mips64Assembler::PcntD(VectorRegister wd, VectorRegister ws) {
+ CHECK(HasMsa());
+ EmitMsa2R(0xc1, 0x3, ws, wd, 0x1e);
+}
+
void Mips64Assembler::ReplicateFPToVectorRegister(VectorRegister dst,
FpuRegister src,
bool is_double) {
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index 7a61f39e64..542dbafc87 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -863,6 +863,11 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer
void Hadd_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
void Hadd_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+ void PcntB(VectorRegister wd, VectorRegister ws);
+ void PcntH(VectorRegister wd, VectorRegister ws);
+ void PcntW(VectorRegister wd, VectorRegister ws);
+ void PcntD(VectorRegister wd, VectorRegister ws);
+
// Helper for replicating floating point value in all destination elements.
void ReplicateFPToVectorRegister(VectorRegister dst, FpuRegister src, bool is_double);
diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc
index b0e1d91c3f..fb5f12be93 100644
--- a/compiler/utils/mips64/assembler_mips64_test.cc
+++ b/compiler/utils/mips64/assembler_mips64_test.cc
@@ -3529,6 +3529,22 @@ TEST_F(AssemblerMIPS64Test, FillD) {
DriverStr(RepeatVR(&mips64::Mips64Assembler::FillD, "fill.d ${reg1}, ${reg2}"), "fill.d");
}
+TEST_F(AssemblerMIPS64Test, PcntB) {
+ DriverStr(RepeatVV(&mips64::Mips64Assembler::PcntB, "pcnt.b ${reg1}, ${reg2}"), "pcnt.b");
+}
+
+TEST_F(AssemblerMIPS64Test, PcntH) {
+ DriverStr(RepeatVV(&mips64::Mips64Assembler::PcntH, "pcnt.h ${reg1}, ${reg2}"), "pcnt.h");
+}
+
+TEST_F(AssemblerMIPS64Test, PcntW) {
+ DriverStr(RepeatVV(&mips64::Mips64Assembler::PcntW, "pcnt.w ${reg1}, ${reg2}"), "pcnt.w");
+}
+
+TEST_F(AssemblerMIPS64Test, PcntD) {
+ DriverStr(RepeatVV(&mips64::Mips64Assembler::PcntD, "pcnt.d ${reg1}, ${reg2}"), "pcnt.d");
+}
+
TEST_F(AssemblerMIPS64Test, LdiB) {
DriverStr(RepeatVIb(&mips64::Mips64Assembler::LdiB, -8, "ldi.b ${reg}, {imm}"), "ldi.b");
}
diff --git a/compiler/utils/swap_space_test.cc b/compiler/utils/swap_space_test.cc
index f4bca59cb3..1650080e66 100644
--- a/compiler/utils/swap_space_test.cc
+++ b/compiler/utils/swap_space_test.cc
@@ -24,9 +24,9 @@
#include "gtest/gtest.h"
+#include "base/os.h"
#include "base/unix_file/fd_file.h"
#include "common_runtime_test.h"
-#include "os.h"
namespace art {
diff --git a/compiler/utils/test_dex_file_builder_test.cc b/compiler/utils/test_dex_file_builder_test.cc
index 736a17edac..788afd8e1a 100644
--- a/compiler/utils/test_dex_file_builder_test.cc
+++ b/compiler/utils/test_dex_file_builder_test.cc
@@ -16,9 +16,9 @@
#include "test_dex_file_builder.h"
+#include "base/utils.h"
#include "dex/dex_file-inl.h"
#include "gtest/gtest.h"
-#include "utils.h"
namespace art {
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index ea160c8993..42c2541421 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -913,6 +913,78 @@ void X86Assembler::psubq(XmmRegister dst, XmmRegister src) {
}
+void X86Assembler::paddusb(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0xDC);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::paddsb(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0xEC);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::paddusw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0xDD);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::paddsw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0xED);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::psubusb(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0xD8);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::psubsb(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0xE8);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::psubusw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0xD9);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::psubsw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0xE9);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
void X86Assembler::cvtsi2ss(XmmRegister dst, Register src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF3);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index a085677083..22eaedce61 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -449,6 +449,15 @@ class X86Assembler FINAL : public Assembler {
void paddq(XmmRegister dst, XmmRegister src);
void psubq(XmmRegister dst, XmmRegister src);
+ void paddusb(XmmRegister dst, XmmRegister src);
+ void paddsb(XmmRegister dst, XmmRegister src);
+ void paddusw(XmmRegister dst, XmmRegister src);
+ void paddsw(XmmRegister dst, XmmRegister src);
+ void psubusb(XmmRegister dst, XmmRegister src);
+ void psubsb(XmmRegister dst, XmmRegister src);
+ void psubusw(XmmRegister dst, XmmRegister src);
+ void psubsw(XmmRegister dst, XmmRegister src);
+
void cvtsi2ss(XmmRegister dst, Register src);
void cvtsi2sd(XmmRegister dst, Register src);
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index 2fd1b27182..8f72db748b 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -600,6 +600,38 @@ TEST_F(AssemblerX86Test, PSubQ) {
DriverStr(RepeatFF(&x86::X86Assembler::psubq, "psubq %{reg2}, %{reg1}"), "psubq");
}
+TEST_F(AssemblerX86Test, PAddUSB) {
+ DriverStr(RepeatFF(&x86::X86Assembler::paddusb, "paddusb %{reg2}, %{reg1}"), "paddusb");
+}
+
+TEST_F(AssemblerX86Test, PAddSB) {
+ DriverStr(RepeatFF(&x86::X86Assembler::paddsb, "paddsb %{reg2}, %{reg1}"), "paddsb");
+}
+
+TEST_F(AssemblerX86Test, PAddUSW) {
+ DriverStr(RepeatFF(&x86::X86Assembler::paddusw, "paddusw %{reg2}, %{reg1}"), "paddusw");
+}
+
+TEST_F(AssemblerX86Test, PAddSW) {
+ DriverStr(RepeatFF(&x86::X86Assembler::psubsw, "psubsw %{reg2}, %{reg1}"), "psubsw");
+}
+
+TEST_F(AssemblerX86Test, PSubUSB) {
+ DriverStr(RepeatFF(&x86::X86Assembler::psubusb, "psubusb %{reg2}, %{reg1}"), "psubusb");
+}
+
+TEST_F(AssemblerX86Test, PSubSB) {
+ DriverStr(RepeatFF(&x86::X86Assembler::psubsb, "psubsb %{reg2}, %{reg1}"), "psubsb");
+}
+
+TEST_F(AssemblerX86Test, PSubUSW) {
+ DriverStr(RepeatFF(&x86::X86Assembler::psubusw, "psubusw %{reg2}, %{reg1}"), "psubusw");
+}
+
+TEST_F(AssemblerX86Test, PSubSW) {
+ DriverStr(RepeatFF(&x86::X86Assembler::psubsw, "psubsw %{reg2}, %{reg1}"), "psubsw");
+}
+
TEST_F(AssemblerX86Test, XorPD) {
DriverStr(RepeatFF(&x86::X86Assembler::xorpd, "xorpd %{reg2}, %{reg1}"), "xorpd");
}
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index ff5a357c5e..c6e16e754c 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -1011,6 +1011,86 @@ void X86_64Assembler::psubq(XmmRegister dst, XmmRegister src) {
}
+void X86_64Assembler::paddusb(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0xDC);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
+void X86_64Assembler::paddsb(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0xEC);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
+void X86_64Assembler::paddusw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0xDD);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
+void X86_64Assembler::paddsw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0xED);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
+void X86_64Assembler::psubusb(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0xD8);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
+void X86_64Assembler::psubsb(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0xE8);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
+void X86_64Assembler::psubusw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0xD9);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
+void X86_64Assembler::psubsw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0xE9);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
void X86_64Assembler::cvtsi2ss(XmmRegister dst, CpuRegister src) {
cvtsi2ss(dst, src, false);
}
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 7a5fdb502f..ab761fb1fc 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -485,6 +485,15 @@ class X86_64Assembler FINAL : public Assembler {
void paddq(XmmRegister dst, XmmRegister src);
void psubq(XmmRegister dst, XmmRegister src);
+ void paddusb(XmmRegister dst, XmmRegister src);
+ void paddsb(XmmRegister dst, XmmRegister src);
+ void paddusw(XmmRegister dst, XmmRegister src);
+ void paddsw(XmmRegister dst, XmmRegister src);
+ void psubusb(XmmRegister dst, XmmRegister src);
+ void psubsb(XmmRegister dst, XmmRegister src);
+ void psubusw(XmmRegister dst, XmmRegister src);
+ void psubsw(XmmRegister dst, XmmRegister src);
+
void cvtsi2ss(XmmRegister dst, CpuRegister src); // Note: this is the r/m32 version.
void cvtsi2ss(XmmRegister dst, CpuRegister src, bool is64bit);
void cvtsi2ss(XmmRegister dst, const Address& src, bool is64bit);
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index 6b1e53c35a..104e215cf5 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -1282,6 +1282,38 @@ TEST_F(AssemblerX86_64Test, Psubq) {
DriverStr(RepeatFF(&x86_64::X86_64Assembler::psubq, "psubq %{reg2}, %{reg1}"), "psubq");
}
+TEST_F(AssemblerX86_64Test, Paddusb) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::paddusb, "paddusb %{reg2}, %{reg1}"), "paddusb");
+}
+
+TEST_F(AssemblerX86_64Test, Paddsb) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::paddsb, "paddsb %{reg2}, %{reg1}"), "paddsb");
+}
+
+TEST_F(AssemblerX86_64Test, Paddusw) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::paddusw, "paddusw %{reg2}, %{reg1}"), "paddusw");
+}
+
+TEST_F(AssemblerX86_64Test, Paddsw) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::paddsw, "paddsw %{reg2}, %{reg1}"), "paddsw");
+}
+
+TEST_F(AssemblerX86_64Test, Psubusb) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::psubusb, "psubusb %{reg2}, %{reg1}"), "psubusb");
+}
+
+TEST_F(AssemblerX86_64Test, Psubsb) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::psubsb, "psubsb %{reg2}, %{reg1}"), "psubsb");
+}
+
+TEST_F(AssemblerX86_64Test, Psubusw) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::psubusw, "psubusw %{reg2}, %{reg1}"), "psubusw");
+}
+
+TEST_F(AssemblerX86_64Test, Psubsw) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::psubsw, "psubsw %{reg2}, %{reg1}"), "psubsw");
+}
+
TEST_F(AssemblerX86_64Test, Cvtsi2ss) {
DriverStr(RepeatFr(&x86_64::X86_64Assembler::cvtsi2ss, "cvtsi2ss %{reg2}, %{reg1}"), "cvtsi2ss");
}