summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'compiler')
-rw-r--r--compiler/common_compiler_test.cc10
-rw-r--r--compiler/compiled_method.h2
-rw-r--r--compiler/dex/quick/x86/assemble_x86.cc1
-rwxr-xr-xcompiler/dex/quick/x86/target_x86.cc32
-rw-r--r--compiler/dex/quick/x86/x86_lir.h1
-rw-r--r--compiler/driver/compiled_method_storage.h2
-rw-r--r--compiler/driver/compiler_driver.cc5
-rw-r--r--compiler/driver/compiler_driver_test.cc5
-rw-r--r--compiler/image_writer.cc44
-rw-r--r--compiler/image_writer.h2
-rw-r--r--compiler/oat_test.cc12
-rw-r--r--compiler/optimizing/builder.h5
-rw-r--r--compiler/optimizing/code_generator_arm.cc30
-rw-r--r--compiler/optimizing/code_generator_arm64.cc35
-rw-r--r--compiler/optimizing/code_generator_mips.cc36
-rw-r--r--compiler/optimizing/code_generator_mips64.cc704
-rw-r--r--compiler/optimizing/code_generator_mips64.h10
-rw-r--r--compiler/optimizing/code_generator_x86.cc88
-rw-r--r--compiler/optimizing/code_generator_x86.h24
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc56
-rw-r--r--compiler/optimizing/code_generator_x86_64.h13
-rw-r--r--compiler/optimizing/instruction_simplifier.cc7
-rw-r--r--compiler/optimizing/intrinsics_arm.cc25
-rw-r--r--compiler/optimizing/intrinsics_arm64.cc25
-rw-r--r--compiler/optimizing/intrinsics_mips64.cc2
-rw-r--r--compiler/optimizing/intrinsics_x86.cc27
-rw-r--r--compiler/optimizing/intrinsics_x86_64.cc27
-rw-r--r--compiler/optimizing/load_store_elimination.cc73
-rw-r--r--compiler/optimizing/parallel_move_resolver.cc2
-rw-r--r--compiler/optimizing/parallel_move_test.cc32
-rw-r--r--compiler/optimizing/pc_relative_fixups_x86.cc5
-rw-r--r--compiler/utils/arm/assembler_arm.h8
-rw-r--r--compiler/utils/arm/assembler_arm32.cc1
-rw-r--r--compiler/utils/arm/assembler_arm32.h2
-rw-r--r--compiler/utils/arm/assembler_thumb2.cc64
-rw-r--r--compiler/utils/arm/assembler_thumb2.h2
-rw-r--r--compiler/utils/assembler_thumb_test.cc249
-rw-r--r--compiler/utils/assembler_thumb_test_expected.cc.inc319
-rw-r--r--compiler/utils/mips64/assembler_mips64.cc114
-rw-r--r--compiler/utils/mips64/assembler_mips64.h26
-rw-r--r--compiler/utils/mips64/assembler_mips64_test.cc152
41 files changed, 1841 insertions, 438 deletions
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc
index 638b897eea..c7c190793c 100644
--- a/compiler/common_compiler_test.cc
+++ b/compiler/common_compiler_test.cc
@@ -142,10 +142,7 @@ void CommonCompilerTest::MakeExecutable(mirror::ClassLoader* class_loader, const
mirror::Class* klass = class_linker_->FindClass(self, class_descriptor.c_str(), loader);
CHECK(klass != nullptr) << "Class not found " << class_name;
size_t pointer_size = class_linker_->GetImagePointerSize();
- for (auto& m : klass->GetDirectMethods(pointer_size)) {
- MakeExecutable(&m);
- }
- for (auto& m : klass->GetVirtualMethods(pointer_size)) {
+ for (auto& m : klass->GetMethods(pointer_size)) {
MakeExecutable(&m);
}
}
@@ -259,10 +256,7 @@ void CommonCompilerTest::CompileClass(mirror::ClassLoader* class_loader, const c
mirror::Class* klass = class_linker_->FindClass(self, class_descriptor.c_str(), loader);
CHECK(klass != nullptr) << "Class not found " << class_name;
auto pointer_size = class_linker_->GetImagePointerSize();
- for (auto& m : klass->GetDirectMethods(pointer_size)) {
- CompileMethod(&m);
- }
- for (auto& m : klass->GetVirtualMethods(pointer_size)) {
+ for (auto& m : klass->GetMethods(pointer_size)) {
CompileMethod(&m);
}
}
diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h
index 15a4ba0f6f..7a93613481 100644
--- a/compiler/compiled_method.h
+++ b/compiler/compiled_method.h
@@ -23,7 +23,7 @@
#include "arch/instruction_set.h"
#include "base/bit_utils.h"
-#include "length_prefixed_array.h"
+#include "base/length_prefixed_array.h"
#include "method_reference.h"
#include "utils/array_ref.h"
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index e5d3841b14..1c2a619020 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -508,6 +508,7 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0,
{ kX86Lfence, kReg, NO_OPERAND, { 0, 0, 0x0F, 0xAE, 0, 5, 0, 0, false }, "Lfence", "" },
{ kX86Mfence, kReg, NO_OPERAND, { 0, 0, 0x0F, 0xAE, 0, 6, 0, 0, false }, "Mfence", "" },
{ kX86Sfence, kReg, NO_OPERAND, { 0, 0, 0x0F, 0xAE, 0, 7, 0, 0, false }, "Sfence", "" },
+ { kX86LockAdd32MI8, kMemImm, IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES, { 0xF0, 0, 0x83, 0x0, 0x0, 0, 0, 1, false }, "LockAdd32MI8", "[!0r+!1d],!2d" },
EXT_0F_ENCODING_MAP(Imul16, 0x66, 0xAF, REG_USE0 | REG_DEF0 | SETS_CCODES),
EXT_0F_ENCODING_MAP(Imul32, 0x00, 0xAF, REG_USE0 | REG_DEF0 | SETS_CCODES),
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 75f3fef599..4ff79935d7 100755
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -20,7 +20,7 @@
#include <inttypes.h>
#include <string>
-#include "arch/instruction_set_features.h"
+#include "arch/x86/instruction_set_features_x86.h"
#include "art_method.h"
#include "backend_x86.h"
#include "base/logging.h"
@@ -585,6 +585,8 @@ bool X86Mir2Lir::ProvidesFullMemoryBarrier(X86OpCode opcode) {
case kX86LockCmpxchgAR:
case kX86LockCmpxchg64M:
case kX86LockCmpxchg64A:
+ case kX86LockCmpxchg64AR:
+ case kX86LockAdd32MI8:
case kX86XchgMR:
case kX86Mfence:
// Atomic memory instructions provide full barrier.
@@ -598,7 +600,9 @@ bool X86Mir2Lir::ProvidesFullMemoryBarrier(X86OpCode opcode) {
}
bool X86Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
- if (!cu_->compiler_driver->GetInstructionSetFeatures()->IsSmp()) {
+ const X86InstructionSetFeatures* features =
+ cu_->compiler_driver->GetInstructionSetFeatures()->AsX86InstructionSetFeatures();
+ if (!features->IsSmp()) {
return false;
}
// Start off with using the last LIR as the barrier. If it is not enough, then we will update it.
@@ -610,20 +614,34 @@ bool X86Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
* All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model.
* For those cases, all we need to ensure is that there is a scheduling barrier in place.
*/
+ const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
+ bool use_locked_add = features->PrefersLockedAddSynchronization();
if (barrier_kind == kAnyAny) {
- // If no LIR exists already that can be used a barrier, then generate an mfence.
+ // If no LIR exists already that can be used a barrier, then generate a barrier.
if (mem_barrier == nullptr) {
- mem_barrier = NewLIR0(kX86Mfence);
+ if (use_locked_add) {
+ mem_barrier = NewLIR3(kX86LockAdd32MI8, rs_rSP.GetReg(), 0, 0);
+ } else {
+ mem_barrier = NewLIR0(kX86Mfence);
+ }
ret = true;
}
- // If last instruction does not provide full barrier, then insert an mfence.
+ // If last instruction does not provide full barrier, then insert a barrier.
if (ProvidesFullMemoryBarrier(static_cast<X86OpCode>(mem_barrier->opcode)) == false) {
- mem_barrier = NewLIR0(kX86Mfence);
+ if (use_locked_add) {
+ mem_barrier = NewLIR3(kX86LockAdd32MI8, rs_rSP.GetReg(), 0, 0);
+ } else {
+ mem_barrier = NewLIR0(kX86Mfence);
+ }
ret = true;
}
} else if (barrier_kind == kNTStoreStore) {
- mem_barrier = NewLIR0(kX86Sfence);
+ if (use_locked_add) {
+ mem_barrier = NewLIR3(kX86LockAdd32MI8, rs_rSP.GetReg(), 0, 0);
+ } else {
+ mem_barrier = NewLIR0(kX86Sfence);
+ }
ret = true;
}
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index d6a6a60d3d..8cd6574443 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -606,6 +606,7 @@ enum X86OpCode {
// load-from-memory and store-to-memory instructions
kX86Sfence, // memory barrier to serialize all previous
// store-to-memory instructions
+ kX86LockAdd32MI8, // locked add used to serialize memory instructions
Binary0fOpCode(kX86Imul16), // 16bit multiply
Binary0fOpCode(kX86Imul32), // 32bit multiply
Binary0fOpCode(kX86Imul64), // 64bit multiply
diff --git a/compiler/driver/compiled_method_storage.h b/compiler/driver/compiled_method_storage.h
index ef10b6768b..d6961a0876 100644
--- a/compiler/driver/compiled_method_storage.h
+++ b/compiler/driver/compiled_method_storage.h
@@ -20,8 +20,8 @@
#include <iosfwd>
#include <memory>
+#include "base/length_prefixed_array.h"
#include "base/macros.h"
-#include "length_prefixed_array.h"
#include "utils/array_ref.h"
#include "utils/dedupe_set.h"
#include "utils/swap_space.h"
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index ba8f1d0df1..56839f85f9 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -790,10 +790,7 @@ class ResolveCatchBlockExceptionsClassVisitor : public ClassVisitor {
virtual bool Visit(mirror::Class* c) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
const auto pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
- for (auto& m : c->GetVirtualMethods(pointer_size)) {
- ResolveExceptionsForMethod(&m, pointer_size);
- }
- for (auto& m : c->GetDirectMethods(pointer_size)) {
+ for (auto& m : c->GetMethods(pointer_size)) {
ResolveExceptionsForMethod(&m, pointer_size);
}
return true;
diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc
index 2e43c2c71a..462c511a83 100644
--- a/compiler/driver/compiler_driver_test.cc
+++ b/compiler/driver/compiler_driver_test.cc
@@ -86,10 +86,7 @@ class CompilerDriverTest : public CommonCompilerTest {
mirror::Class* c = class_linker->FindClass(soa.Self(), descriptor, loader);
CHECK(c != nullptr);
const auto pointer_size = class_linker->GetImagePointerSize();
- for (auto& m : c->GetDirectMethods(pointer_size)) {
- MakeExecutable(&m);
- }
- for (auto& m : c->GetVirtualMethods(pointer_size)) {
+ for (auto& m : c->GetMethods(pointer_size)) {
MakeExecutable(&m);
}
}
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index fce08ea5f0..9545c83eaf 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -1030,44 +1030,42 @@ void ImageWriter::WalkFieldsInOrder(mirror::Object* obj) {
}
}
// Visit and assign offsets for methods.
- LengthPrefixedArray<ArtMethod>* method_arrays[] = {
- as_klass->GetDirectMethodsPtr(), as_klass->GetVirtualMethodsPtr(),
- };
- for (LengthPrefixedArray<ArtMethod>* array : method_arrays) {
- if (array == nullptr) {
- continue;
- }
+ size_t num_methods = as_klass->NumMethods();
+ if (num_methods != 0) {
bool any_dirty = false;
- size_t count = 0;
- const size_t method_alignment = ArtMethod::Alignment(target_ptr_size_);
- const size_t method_size = ArtMethod::Size(target_ptr_size_);
- auto iteration_range =
- MakeIterationRangeFromLengthPrefixedArray(array, method_size, method_alignment);
- for (auto& m : iteration_range) {
- any_dirty = any_dirty || WillMethodBeDirty(&m);
- ++count;
+ for (auto& m : as_klass->GetMethods(target_ptr_size_)) {
+ if (WillMethodBeDirty(&m)) {
+ any_dirty = true;
+ break;
+ }
}
NativeObjectRelocationType type = any_dirty
? kNativeObjectRelocationTypeArtMethodDirty
: kNativeObjectRelocationTypeArtMethodClean;
Bin bin_type = BinTypeForNativeRelocationType(type);
// Forward the entire array at once, but header first.
+ const size_t method_alignment = ArtMethod::Alignment(target_ptr_size_);
+ const size_t method_size = ArtMethod::Size(target_ptr_size_);
const size_t header_size = LengthPrefixedArray<ArtMethod>::ComputeSize(0,
method_size,
method_alignment);
+ LengthPrefixedArray<ArtMethod>* array = as_klass->GetMethodsPtr();
auto it = native_object_relocations_.find(array);
- CHECK(it == native_object_relocations_.end()) << "Method array " << array
- << " already forwarded";
+ CHECK(it == native_object_relocations_.end())
+ << "Method array " << array << " already forwarded";
size_t& offset = bin_slot_sizes_[bin_type];
DCHECK(!IsInBootImage(array));
- native_object_relocations_.emplace(array, NativeObjectRelocation { offset,
- any_dirty ? kNativeObjectRelocationTypeArtMethodArrayDirty :
- kNativeObjectRelocationTypeArtMethodArrayClean });
+ native_object_relocations_.emplace(
+ array, NativeObjectRelocation {
+ offset,
+ any_dirty ? kNativeObjectRelocationTypeArtMethodArrayDirty
+ : kNativeObjectRelocationTypeArtMethodArrayClean
+ });
offset += header_size;
- for (auto& m : iteration_range) {
+ for (auto& m : as_klass->GetMethods(target_ptr_size_)) {
AssignMethodOffset(&m, type);
}
- (any_dirty ? dirty_methods_ : clean_methods_) += count;
+ (any_dirty ? dirty_methods_ : clean_methods_) += num_methods;
}
} else if (h_obj->IsObjectArray()) {
// Walk elements of an object array.
@@ -1275,6 +1273,8 @@ void ImageWriter::CreateHeader(size_t oat_loaded_size, size_t oat_data_offset) {
auto* interned_strings_section = &sections[ImageHeader::kSectionInternedStrings];
*interned_strings_section = ImageSection(cur_pos, intern_table_bytes_);
cur_pos = interned_strings_section->End();
+ // Round up to the alignment the class table expects. See HashSet::WriteToMemory.
+ cur_pos = RoundUp(cur_pos, sizeof(uint64_t));
// Calculate the size of the class table section.
auto* class_table_section = &sections[ImageHeader::kSectionClassTable];
*class_table_section = ImageSection(cur_pos, class_table_bytes_);
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index 8e930f0373..f1b2965a12 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -27,11 +27,11 @@
#include <ostream>
#include "base/bit_utils.h"
+#include "base/length_prefixed_array.h"
#include "base/macros.h"
#include "driver/compiler_driver.h"
#include "gc/space/space.h"
#include "image.h"
-#include "length_prefixed_array.h"
#include "lock_word.h"
#include "mem_map.h"
#include "oat_file.h"
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 8865ea2243..4fd73be55a 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -238,12 +238,12 @@ TEST_F(OatTest, WriteRead) {
++method_index;
}
size_t visited_virtuals = 0;
- for (auto& m : klass->GetVirtualMethods(pointer_size)) {
- if (!m.IsMiranda()) {
- CheckMethod(&m, oat_class.GetOatMethod(method_index), dex_file);
- ++method_index;
- ++visited_virtuals;
- }
+ // TODO We should also check copied methods in this test.
+ for (auto& m : klass->GetDeclaredVirtualMethods(pointer_size)) {
+ EXPECT_FALSE(m.IsMiranda());
+ CheckMethod(&m, oat_class.GetOatMethod(method_index), dex_file);
+ ++method_index;
+ ++visited_virtuals;
}
EXPECT_EQ(visited_virtuals, num_virtual_methods);
}
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index c3979f3dd1..ca71c32802 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -90,8 +90,9 @@ class HGraphBuilder : public ValueObject {
static constexpr const char* kBuilderPassName = "builder";
- // The number of entries in a packed switch before we use a jump table.
- static constexpr uint16_t kSmallSwitchThreshold = 5;
+ // The number of entries in a packed switch before we use a jump table or specified
+ // compare/jump series.
+ static constexpr uint16_t kSmallSwitchThreshold = 3;
private:
// Analyzes the dex instruction and adds HInstruction to the graph
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 3630dbec24..9fda83840c 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -59,7 +59,7 @@ static constexpr SRegister kFpuCalleeSaves[] =
// S registers. Therefore there is no need to block it.
static constexpr DRegister DTMP = D31;
-static constexpr uint32_t kPackedSwitchJumpTableThreshold = 6;
+static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
#define __ down_cast<ArmAssembler*>(codegen->GetAssembler())->
#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmWordSize, x).Int32Value()
@@ -6250,7 +6250,7 @@ void LocationsBuilderARM::VisitPackedSwitch(HPackedSwitch* switch_instr) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
locations->SetInAt(0, Location::RequiresRegister());
- if (switch_instr->GetNumEntries() >= kPackedSwitchJumpTableThreshold &&
+ if (switch_instr->GetNumEntries() > kPackedSwitchCompareJumpThreshold &&
codegen_->GetAssembler()->IsThumb()) {
locations->AddTemp(Location::RequiresRegister()); // We need a temp for the table base.
if (switch_instr->GetStartValue() != 0) {
@@ -6266,12 +6266,30 @@ void InstructionCodeGeneratorARM::VisitPackedSwitch(HPackedSwitch* switch_instr)
Register value_reg = locations->InAt(0).AsRegister<Register>();
HBasicBlock* default_block = switch_instr->GetDefaultBlock();
- if (num_entries < kPackedSwitchJumpTableThreshold || !codegen_->GetAssembler()->IsThumb()) {
+ if (num_entries <= kPackedSwitchCompareJumpThreshold || !codegen_->GetAssembler()->IsThumb()) {
// Create a series of compare/jumps.
+ Register temp_reg = IP;
+ // Note: It is fine for the below AddConstantSetFlags() using IP register to temporarily store
+ // the immediate, because IP is used as the destination register. For the other
+ // AddConstantSetFlags() and GenerateCompareWithImmediate(), the immediate values are constant,
+ // and they can be encoded in the instruction without making use of IP register.
+ __ AddConstantSetFlags(temp_reg, value_reg, -lower_bound);
+
const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
- for (uint32_t i = 0; i < num_entries; i++) {
- GenerateCompareWithImmediate(value_reg, lower_bound + i);
- __ b(codegen_->GetLabelOf(successors[i]), EQ);
+ // Jump to successors[0] if value == lower_bound.
+ __ b(codegen_->GetLabelOf(successors[0]), EQ);
+ int32_t last_index = 0;
+ for (; num_entries - last_index > 2; last_index += 2) {
+ __ AddConstantSetFlags(temp_reg, temp_reg, -2);
+ // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
+ __ b(codegen_->GetLabelOf(successors[last_index + 1]), LO);
+ // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
+ __ b(codegen_->GetLabelOf(successors[last_index + 2]), EQ);
+ }
+ if (num_entries - last_index == 2) {
+ // The last missing case_value.
+ GenerateCompareWithImmediate(temp_reg, 1);
+ __ b(codegen_->GetLabelOf(successors[last_index + 1]), EQ);
}
// And the default for any other value.
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 451470f271..52058302be 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -71,10 +71,10 @@ using helpers::ARM64EncodableConstantOrRegister;
using helpers::ArtVixlRegCodeCoherentForRegSet;
static constexpr int kCurrentMethodStackOffset = 0;
-// The compare/jump sequence will generate about (2 * num_entries + 1) instructions. While jump
+// The compare/jump sequence will generate about (1.5 * num_entries + 3) instructions. While jump
// table version generates 7 instructions and num_entries literals. Compare/jump sequence will
// generates less code/data with a small num_entries.
-static constexpr uint32_t kPackedSwitchJumpTableThreshold = 6;
+static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
inline Condition ARM64Condition(IfCondition cond) {
switch (cond) {
@@ -546,7 +546,7 @@ class ArraySetSlowPathARM64 : public SlowPathCodeARM64 {
void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) {
uint32_t num_entries = switch_instr_->GetNumEntries();
- DCHECK_GE(num_entries, kPackedSwitchJumpTableThreshold);
+ DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold);
// We are about to use the assembler to place literals directly. Make sure we have enough
// underlying code buffer and we have generated the jump table with right size.
@@ -4582,20 +4582,29 @@ void InstructionCodeGeneratorARM64::VisitPackedSwitch(HPackedSwitch* switch_inst
// ranges and emit the tables only as required.
static constexpr int32_t kJumpTableInstructionThreshold = 1* MB / kMaxExpectedSizePerHInstruction;
- if (num_entries < kPackedSwitchJumpTableThreshold ||
+ if (num_entries <= kPackedSwitchCompareJumpThreshold ||
// Current instruction id is an upper bound of the number of HIRs in the graph.
GetGraph()->GetCurrentInstructionId() > kJumpTableInstructionThreshold) {
// Create a series of compare/jumps.
+ UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
+ Register temp = temps.AcquireW();
+ __ Subs(temp, value_reg, Operand(lower_bound));
+
const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
- for (uint32_t i = 0; i < num_entries; i++) {
- int32_t case_value = lower_bound + i;
- vixl::Label* succ = codegen_->GetLabelOf(successors[i]);
- if (case_value == 0) {
- __ Cbz(value_reg, succ);
- } else {
- __ Cmp(value_reg, Operand(case_value));
- __ B(eq, succ);
- }
+ // Jump to successors[0] if value == lower_bound.
+ __ B(eq, codegen_->GetLabelOf(successors[0]));
+ int32_t last_index = 0;
+ for (; num_entries - last_index > 2; last_index += 2) {
+ __ Subs(temp, temp, Operand(2));
+ // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
+ __ B(lo, codegen_->GetLabelOf(successors[last_index + 1]));
+ // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
+ __ B(eq, codegen_->GetLabelOf(successors[last_index + 2]));
+ }
+ if (num_entries - last_index == 2) {
+ // The last missing case_value.
+ __ Cmp(temp, Operand(1));
+ __ B(eq, codegen_->GetLabelOf(successors[last_index + 1]));
}
// And the default for any other value.
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 5dc101b199..ae0f2c8935 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -4897,19 +4897,31 @@ void InstructionCodeGeneratorMIPS::VisitPackedSwitch(HPackedSwitch* switch_instr
HBasicBlock* default_block = switch_instr->GetDefaultBlock();
// Create a set of compare/jumps.
- const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
- for (int32_t i = 0; i < num_entries; ++i) {
- int32_t case_value = lower_bound + i;
- MipsLabel* successor_label = codegen_->GetLabelOf(successors[i]);
- if (case_value == 0) {
- __ Beqz(value_reg, successor_label);
- } else {
- __ LoadConst32(TMP, case_value);
- __ Beq(value_reg, TMP, successor_label);
- }
- }
+ Register temp_reg = TMP;
+ __ Addiu32(temp_reg, value_reg, -lower_bound);
+ // Jump to default if index is negative
+ // Note: We don't check the case that index is positive while value < lower_bound, because in
+ // this case, index >= num_entries must be true. So that we can save one branch instruction.
+ __ Bltz(temp_reg, codegen_->GetLabelOf(default_block));
- // Insert the default branch for every other value.
+ const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
+ // Jump to successors[0] if value == lower_bound.
+ __ Beqz(temp_reg, codegen_->GetLabelOf(successors[0]));
+ int32_t last_index = 0;
+ for (; num_entries - last_index > 2; last_index += 2) {
+ __ Addiu(temp_reg, temp_reg, -2);
+ // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
+ __ Bltz(temp_reg, codegen_->GetLabelOf(successors[last_index + 1]));
+ // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
+ __ Beqz(temp_reg, codegen_->GetLabelOf(successors[last_index + 2]));
+ }
+ if (num_entries - last_index == 2) {
+ // The last missing case_value.
+ __ Addiu(temp_reg, temp_reg, -1);
+ __ Beqz(temp_reg, codegen_->GetLabelOf(successors[last_index + 1]));
+ }
+
+ // And the default for any other value.
if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
__ B(codegen_->GetLabelOf(default_block));
}
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 99f58dd2c5..1e428a06e1 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -1752,11 +1752,7 @@ void InstructionCodeGeneratorMIPS64::VisitClinitCheck(HClinitCheck* check) {
void LocationsBuilderMIPS64::VisitCompare(HCompare* compare) {
Primitive::Type in_type = compare->InputAt(0)->GetType();
- LocationSummary::CallKind call_kind = Primitive::IsFloatingPointType(in_type)
- ? LocationSummary::kCall
- : LocationSummary::kNoCall;
-
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare, call_kind);
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare);
switch (in_type) {
case Primitive::kPrimLong:
@@ -1766,13 +1762,11 @@ void LocationsBuilderMIPS64::VisitCompare(HCompare* compare) {
break;
case Primitive::kPrimFloat:
- case Primitive::kPrimDouble: {
- InvokeRuntimeCallingConvention calling_convention;
- locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
- locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
- locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt));
+ case Primitive::kPrimDouble:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
break;
- }
default:
LOG(FATAL) << "Unexpected type for compare operation " << in_type;
@@ -1781,14 +1775,15 @@ void LocationsBuilderMIPS64::VisitCompare(HCompare* compare) {
void InstructionCodeGeneratorMIPS64::VisitCompare(HCompare* instruction) {
LocationSummary* locations = instruction->GetLocations();
+ GpuRegister res = locations->Out().AsRegister<GpuRegister>();
Primitive::Type in_type = instruction->InputAt(0)->GetType();
+ bool gt_bias = instruction->IsGtBias();
// 0 if: left == right
// 1 if: left > right
// -1 if: left < right
switch (in_type) {
case Primitive::kPrimLong: {
- GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
Location rhs_location = locations->InAt(1);
bool use_imm = rhs_location.IsConstant();
@@ -1803,35 +1798,52 @@ void InstructionCodeGeneratorMIPS64::VisitCompare(HCompare* instruction) {
rhs = rhs_location.AsRegister<GpuRegister>();
}
__ Slt(TMP, lhs, rhs);
- __ Slt(dst, rhs, lhs);
- __ Subu(dst, dst, TMP);
+ __ Slt(res, rhs, lhs);
+ __ Subu(res, res, TMP);
break;
}
- case Primitive::kPrimFloat:
- case Primitive::kPrimDouble: {
- int32_t entry_point_offset;
- if (in_type == Primitive::kPrimFloat) {
- entry_point_offset = instruction->IsGtBias() ? QUICK_ENTRY_POINT(pCmpgFloat)
- : QUICK_ENTRY_POINT(pCmplFloat);
+ case Primitive::kPrimFloat: {
+ FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>();
+ FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>();
+ Mips64Label done;
+ __ CmpEqS(FTMP, lhs, rhs);
+ __ LoadConst32(res, 0);
+ __ Bc1nez(FTMP, &done);
+ if (gt_bias) {
+ __ CmpLtS(FTMP, lhs, rhs);
+ __ LoadConst32(res, -1);
+ __ Bc1nez(FTMP, &done);
+ __ LoadConst32(res, 1);
} else {
- entry_point_offset = instruction->IsGtBias() ? QUICK_ENTRY_POINT(pCmpgDouble)
- : QUICK_ENTRY_POINT(pCmplDouble);
+ __ CmpLtS(FTMP, rhs, lhs);
+ __ LoadConst32(res, 1);
+ __ Bc1nez(FTMP, &done);
+ __ LoadConst32(res, -1);
}
- codegen_->InvokeRuntime(entry_point_offset, instruction, instruction->GetDexPc(), nullptr);
- if (in_type == Primitive::kPrimFloat) {
- if (instruction->IsGtBias()) {
- CheckEntrypointTypes<kQuickCmpgFloat, int32_t, float, float>();
- } else {
- CheckEntrypointTypes<kQuickCmplFloat, int32_t, float, float>();
- }
+ __ Bind(&done);
+ break;
+ }
+
+ case Primitive::kPrimDouble: {
+ FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>();
+ FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>();
+ Mips64Label done;
+ __ CmpEqD(FTMP, lhs, rhs);
+ __ LoadConst32(res, 0);
+ __ Bc1nez(FTMP, &done);
+ if (gt_bias) {
+ __ CmpLtD(FTMP, lhs, rhs);
+ __ LoadConst32(res, -1);
+ __ Bc1nez(FTMP, &done);
+ __ LoadConst32(res, 1);
} else {
- if (instruction->IsGtBias()) {
- CheckEntrypointTypes<kQuickCmpgDouble, int32_t, double, double>();
- } else {
- CheckEntrypointTypes<kQuickCmplDouble, int32_t, double, double>();
- }
+ __ CmpLtD(FTMP, rhs, lhs);
+ __ LoadConst32(res, 1);
+ __ Bc1nez(FTMP, &done);
+ __ LoadConst32(res, -1);
}
+ __ Bind(&done);
break;
}
@@ -1842,8 +1854,19 @@ void InstructionCodeGeneratorMIPS64::VisitCompare(HCompare* instruction) {
void LocationsBuilderMIPS64::VisitCondition(HCondition* instruction) {
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+ switch (instruction->InputAt(0)->GetType()) {
+ default:
+ case Primitive::kPrimLong:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+ break;
+
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ break;
+ }
if (instruction->NeedsMaterialization()) {
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
}
@@ -1854,129 +1877,42 @@ void InstructionCodeGeneratorMIPS64::VisitCondition(HCondition* instruction) {
return;
}
- // TODO: generalize to long
- DCHECK_NE(instruction->InputAt(0)->GetType(), Primitive::kPrimLong);
-
+ Primitive::Type type = instruction->InputAt(0)->GetType();
LocationSummary* locations = instruction->GetLocations();
-
GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
- GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
- Location rhs_location = locations->InAt(1);
+ Mips64Label true_label;
- GpuRegister rhs_reg = ZERO;
- int64_t rhs_imm = 0;
- bool use_imm = rhs_location.IsConstant();
- if (use_imm) {
- rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant());
- } else {
- rhs_reg = rhs_location.AsRegister<GpuRegister>();
- }
-
- IfCondition if_cond = instruction->GetCondition();
-
- switch (if_cond) {
- case kCondEQ:
- case kCondNE:
- if (use_imm && IsUint<16>(rhs_imm)) {
- __ Xori(dst, lhs, rhs_imm);
- } else {
- if (use_imm) {
- rhs_reg = TMP;
- __ LoadConst32(rhs_reg, rhs_imm);
- }
- __ Xor(dst, lhs, rhs_reg);
- }
- if (if_cond == kCondEQ) {
- __ Sltiu(dst, dst, 1);
- } else {
- __ Sltu(dst, ZERO, dst);
- }
- break;
+ switch (type) {
+ default:
+ // Integer case.
+ GenerateIntLongCompare(instruction->GetCondition(), /* is64bit */ false, locations);
+ return;
+ case Primitive::kPrimLong:
+ GenerateIntLongCompare(instruction->GetCondition(), /* is64bit */ true, locations);
+ return;
- case kCondLT:
- case kCondGE:
- if (use_imm && IsInt<16>(rhs_imm)) {
- __ Slti(dst, lhs, rhs_imm);
- } else {
- if (use_imm) {
- rhs_reg = TMP;
- __ LoadConst32(rhs_reg, rhs_imm);
- }
- __ Slt(dst, lhs, rhs_reg);
- }
- if (if_cond == kCondGE) {
- // Simulate lhs >= rhs via !(lhs < rhs) since there's
- // only the slt instruction but no sge.
- __ Xori(dst, dst, 1);
- }
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ // TODO: don't use branches.
+ GenerateFpCompareAndBranch(instruction->GetCondition(),
+ instruction->IsGtBias(),
+ type,
+ locations,
+ &true_label);
break;
+ }
- case kCondLE:
- case kCondGT:
- if (use_imm && IsInt<16>(rhs_imm + 1)) {
- // Simulate lhs <= rhs via lhs < rhs + 1.
- __ Slti(dst, lhs, rhs_imm + 1);
- if (if_cond == kCondGT) {
- // Simulate lhs > rhs via !(lhs <= rhs) since there's
- // only the slti instruction but no sgti.
- __ Xori(dst, dst, 1);
- }
- } else {
- if (use_imm) {
- rhs_reg = TMP;
- __ LoadConst32(rhs_reg, rhs_imm);
- }
- __ Slt(dst, rhs_reg, lhs);
- if (if_cond == kCondLE) {
- // Simulate lhs <= rhs via !(rhs < lhs) since there's
- // only the slt instruction but no sle.
- __ Xori(dst, dst, 1);
- }
- }
- break;
+ // Convert the branches into the result.
+ Mips64Label done;
- case kCondB:
- case kCondAE:
- if (use_imm && 0 <= rhs_imm && rhs_imm <= 0x7fff) {
- __ Sltiu(dst, lhs, rhs_imm);
- } else {
- if (use_imm) {
- rhs_reg = TMP;
- __ LoadConst32(rhs_reg, rhs_imm);
- }
- __ Sltu(dst, lhs, rhs_reg);
- }
- if (if_cond == kCondAE) {
- // Simulate lhs >= rhs via !(lhs < rhs) since there's
- // only the sltu instruction but no sgeu.
- __ Xori(dst, dst, 1);
- }
- break;
+ // False case: result = 0.
+ __ LoadConst32(dst, 0);
+ __ Bc(&done);
- case kCondBE:
- case kCondA:
- if (use_imm && 0 <= rhs_imm && rhs_imm <= 0x7ffe) {
- // Simulate lhs <= rhs via lhs < rhs + 1.
- __ Sltiu(dst, lhs, rhs_imm + 1);
- if (if_cond == kCondA) {
- // Simulate lhs > rhs via !(lhs <= rhs) since there's
- // only the sltiu instruction but no sgtiu.
- __ Xori(dst, dst, 1);
- }
- } else {
- if (use_imm) {
- rhs_reg = TMP;
- __ LoadConst32(rhs_reg, rhs_imm);
- }
- __ Sltu(dst, rhs_reg, lhs);
- if (if_cond == kCondBE) {
- // Simulate lhs <= rhs via !(rhs < lhs) since there's
- // only the sltu instruction but no sleu.
- __ Xori(dst, dst, 1);
- }
- }
- break;
- }
+ // True case: result = 1.
+ __ Bind(&true_label);
+ __ LoadConst32(dst, 1);
+ __ Bind(&done);
}
void InstructionCodeGeneratorMIPS64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
@@ -2375,6 +2311,329 @@ void InstructionCodeGeneratorMIPS64::VisitTryBoundary(HTryBoundary* try_boundary
}
}
+void InstructionCodeGeneratorMIPS64::GenerateIntLongCompare(IfCondition cond,
+ bool is64bit,
+ LocationSummary* locations) {
+ GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
+ GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
+ Location rhs_location = locations->InAt(1);
+ GpuRegister rhs_reg = ZERO;
+ int64_t rhs_imm = 0;
+ bool use_imm = rhs_location.IsConstant();
+ if (use_imm) {
+ if (is64bit) {
+ rhs_imm = CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant());
+ } else {
+ rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant());
+ }
+ } else {
+ rhs_reg = rhs_location.AsRegister<GpuRegister>();
+ }
+ int64_t rhs_imm_plus_one = rhs_imm + UINT64_C(1);
+
+ switch (cond) {
+ case kCondEQ:
+ case kCondNE:
+ if (use_imm && IsUint<16>(rhs_imm)) {
+ __ Xori(dst, lhs, rhs_imm);
+ } else {
+ if (use_imm) {
+ rhs_reg = TMP;
+ __ LoadConst64(rhs_reg, rhs_imm);
+ }
+ __ Xor(dst, lhs, rhs_reg);
+ }
+ if (cond == kCondEQ) {
+ __ Sltiu(dst, dst, 1);
+ } else {
+ __ Sltu(dst, ZERO, dst);
+ }
+ break;
+
+ case kCondLT:
+ case kCondGE:
+ if (use_imm && IsInt<16>(rhs_imm)) {
+ __ Slti(dst, lhs, rhs_imm);
+ } else {
+ if (use_imm) {
+ rhs_reg = TMP;
+ __ LoadConst64(rhs_reg, rhs_imm);
+ }
+ __ Slt(dst, lhs, rhs_reg);
+ }
+ if (cond == kCondGE) {
+ // Simulate lhs >= rhs via !(lhs < rhs) since there's
+ // only the slt instruction but no sge.
+ __ Xori(dst, dst, 1);
+ }
+ break;
+
+ case kCondLE:
+ case kCondGT:
+ if (use_imm && IsInt<16>(rhs_imm_plus_one)) {
+ // Simulate lhs <= rhs via lhs < rhs + 1.
+ __ Slti(dst, lhs, rhs_imm_plus_one);
+ if (cond == kCondGT) {
+ // Simulate lhs > rhs via !(lhs <= rhs) since there's
+ // only the slti instruction but no sgti.
+ __ Xori(dst, dst, 1);
+ }
+ } else {
+ if (use_imm) {
+ rhs_reg = TMP;
+ __ LoadConst64(rhs_reg, rhs_imm);
+ }
+ __ Slt(dst, rhs_reg, lhs);
+ if (cond == kCondLE) {
+ // Simulate lhs <= rhs via !(rhs < lhs) since there's
+ // only the slt instruction but no sle.
+ __ Xori(dst, dst, 1);
+ }
+ }
+ break;
+
+ case kCondB:
+ case kCondAE:
+ if (use_imm && IsInt<16>(rhs_imm)) {
+ // Sltiu sign-extends its 16-bit immediate operand before
+ // the comparison and thus lets us compare directly with
+ // unsigned values in the ranges [0, 0x7fff] and
+ // [0x[ffffffff]ffff8000, 0x[ffffffff]ffffffff].
+ __ Sltiu(dst, lhs, rhs_imm);
+ } else {
+ if (use_imm) {
+ rhs_reg = TMP;
+ __ LoadConst64(rhs_reg, rhs_imm);
+ }
+ __ Sltu(dst, lhs, rhs_reg);
+ }
+ if (cond == kCondAE) {
+ // Simulate lhs >= rhs via !(lhs < rhs) since there's
+ // only the sltu instruction but no sgeu.
+ __ Xori(dst, dst, 1);
+ }
+ break;
+
+ case kCondBE:
+ case kCondA:
+ if (use_imm && (rhs_imm_plus_one != 0) && IsInt<16>(rhs_imm_plus_one)) {
+ // Simulate lhs <= rhs via lhs < rhs + 1.
+ // Note that this only works if rhs + 1 does not overflow
+ // to 0, hence the check above.
+ // Sltiu sign-extends its 16-bit immediate operand before
+ // the comparison and thus lets us compare directly with
+ // unsigned values in the ranges [0, 0x7fff] and
+ // [0x[ffffffff]ffff8000, 0x[ffffffff]ffffffff].
+ __ Sltiu(dst, lhs, rhs_imm_plus_one);
+ if (cond == kCondA) {
+ // Simulate lhs > rhs via !(lhs <= rhs) since there's
+ // only the sltiu instruction but no sgtiu.
+ __ Xori(dst, dst, 1);
+ }
+ } else {
+ if (use_imm) {
+ rhs_reg = TMP;
+ __ LoadConst64(rhs_reg, rhs_imm);
+ }
+ __ Sltu(dst, rhs_reg, lhs);
+ if (cond == kCondBE) {
+ // Simulate lhs <= rhs via !(rhs < lhs) since there's
+ // only the sltu instruction but no sleu.
+ __ Xori(dst, dst, 1);
+ }
+ }
+ break;
+ }
+}
+
+void InstructionCodeGeneratorMIPS64::GenerateIntLongCompareAndBranch(IfCondition cond,
+ bool is64bit,
+ LocationSummary* locations,
+ Mips64Label* label) {
+ GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
+ Location rhs_location = locations->InAt(1);
+ GpuRegister rhs_reg = ZERO;
+ int64_t rhs_imm = 0;
+ bool use_imm = rhs_location.IsConstant();
+ if (use_imm) {
+ if (is64bit) {
+ rhs_imm = CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant());
+ } else {
+ rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant());
+ }
+ } else {
+ rhs_reg = rhs_location.AsRegister<GpuRegister>();
+ }
+
+ if (use_imm && rhs_imm == 0) {
+ switch (cond) {
+ case kCondEQ:
+ case kCondBE: // <= 0 if zero
+ __ Beqzc(lhs, label);
+ break;
+ case kCondNE:
+ case kCondA: // > 0 if non-zero
+ __ Bnezc(lhs, label);
+ break;
+ case kCondLT:
+ __ Bltzc(lhs, label);
+ break;
+ case kCondGE:
+ __ Bgezc(lhs, label);
+ break;
+ case kCondLE:
+ __ Blezc(lhs, label);
+ break;
+ case kCondGT:
+ __ Bgtzc(lhs, label);
+ break;
+ case kCondB: // always false
+ break;
+ case kCondAE: // always true
+ __ Bc(label);
+ break;
+ }
+ } else {
+ if (use_imm) {
+ rhs_reg = TMP;
+ __ LoadConst64(rhs_reg, rhs_imm);
+ }
+ switch (cond) {
+ case kCondEQ:
+ __ Beqc(lhs, rhs_reg, label);
+ break;
+ case kCondNE:
+ __ Bnec(lhs, rhs_reg, label);
+ break;
+ case kCondLT:
+ __ Bltc(lhs, rhs_reg, label);
+ break;
+ case kCondGE:
+ __ Bgec(lhs, rhs_reg, label);
+ break;
+ case kCondLE:
+ __ Bgec(rhs_reg, lhs, label);
+ break;
+ case kCondGT:
+ __ Bltc(rhs_reg, lhs, label);
+ break;
+ case kCondB:
+ __ Bltuc(lhs, rhs_reg, label);
+ break;
+ case kCondAE:
+ __ Bgeuc(lhs, rhs_reg, label);
+ break;
+ case kCondBE:
+ __ Bgeuc(rhs_reg, lhs, label);
+ break;
+ case kCondA:
+ __ Bltuc(rhs_reg, lhs, label);
+ break;
+ }
+ }
+}
+
+void InstructionCodeGeneratorMIPS64::GenerateFpCompareAndBranch(IfCondition cond,
+ bool gt_bias,
+ Primitive::Type type,
+ LocationSummary* locations,
+ Mips64Label* label) {
+ FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>();
+ FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>();
+ if (type == Primitive::kPrimFloat) {
+ switch (cond) {
+ case kCondEQ:
+ __ CmpEqS(FTMP, lhs, rhs);
+ __ Bc1nez(FTMP, label);
+ break;
+ case kCondNE:
+ __ CmpEqS(FTMP, lhs, rhs);
+ __ Bc1eqz(FTMP, label);
+ break;
+ case kCondLT:
+ if (gt_bias) {
+ __ CmpLtS(FTMP, lhs, rhs);
+ } else {
+ __ CmpUltS(FTMP, lhs, rhs);
+ }
+ __ Bc1nez(FTMP, label);
+ break;
+ case kCondLE:
+ if (gt_bias) {
+ __ CmpLeS(FTMP, lhs, rhs);
+ } else {
+ __ CmpUleS(FTMP, lhs, rhs);
+ }
+ __ Bc1nez(FTMP, label);
+ break;
+ case kCondGT:
+ if (gt_bias) {
+ __ CmpUltS(FTMP, rhs, lhs);
+ } else {
+ __ CmpLtS(FTMP, rhs, lhs);
+ }
+ __ Bc1nez(FTMP, label);
+ break;
+ case kCondGE:
+ if (gt_bias) {
+ __ CmpUleS(FTMP, rhs, lhs);
+ } else {
+ __ CmpLeS(FTMP, rhs, lhs);
+ }
+ __ Bc1nez(FTMP, label);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected non-floating-point condition";
+ }
+ } else {
+ DCHECK_EQ(type, Primitive::kPrimDouble);
+ switch (cond) {
+ case kCondEQ:
+ __ CmpEqD(FTMP, lhs, rhs);
+ __ Bc1nez(FTMP, label);
+ break;
+ case kCondNE:
+ __ CmpEqD(FTMP, lhs, rhs);
+ __ Bc1eqz(FTMP, label);
+ break;
+ case kCondLT:
+ if (gt_bias) {
+ __ CmpLtD(FTMP, lhs, rhs);
+ } else {
+ __ CmpUltD(FTMP, lhs, rhs);
+ }
+ __ Bc1nez(FTMP, label);
+ break;
+ case kCondLE:
+ if (gt_bias) {
+ __ CmpLeD(FTMP, lhs, rhs);
+ } else {
+ __ CmpUleD(FTMP, lhs, rhs);
+ }
+ __ Bc1nez(FTMP, label);
+ break;
+ case kCondGT:
+ if (gt_bias) {
+ __ CmpUltD(FTMP, rhs, lhs);
+ } else {
+ __ CmpLtD(FTMP, rhs, lhs);
+ }
+ __ Bc1nez(FTMP, label);
+ break;
+ case kCondGE:
+ if (gt_bias) {
+ __ CmpUleD(FTMP, rhs, lhs);
+ } else {
+ __ CmpLeD(FTMP, rhs, lhs);
+ }
+ __ Bc1nez(FTMP, label);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected non-floating-point condition";
+ }
+ }
+}
+
void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruction,
size_t condition_input_index,
Mips64Label* true_target,
@@ -2420,97 +2679,27 @@ void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruc
// The condition instruction has not been materialized, use its inputs as
// the comparison and its condition as the branch condition.
HCondition* condition = cond->AsCondition();
+ Primitive::Type type = condition->InputAt(0)->GetType();
+ LocationSummary* locations = cond->GetLocations();
+ IfCondition if_cond = condition->GetCondition();
+ Mips64Label* branch_target = true_target;
- GpuRegister lhs = condition->GetLocations()->InAt(0).AsRegister<GpuRegister>();
- Location rhs_location = condition->GetLocations()->InAt(1);
- GpuRegister rhs_reg = ZERO;
- int32_t rhs_imm = 0;
- bool use_imm = rhs_location.IsConstant();
- if (use_imm) {
- rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant());
- } else {
- rhs_reg = rhs_location.AsRegister<GpuRegister>();
- }
-
- IfCondition if_cond;
- Mips64Label* non_fallthrough_target;
if (true_target == nullptr) {
if_cond = condition->GetOppositeCondition();
- non_fallthrough_target = false_target;
- } else {
- if_cond = condition->GetCondition();
- non_fallthrough_target = true_target;
- }
-
- if (use_imm && rhs_imm == 0) {
- switch (if_cond) {
- case kCondEQ:
- __ Beqzc(lhs, non_fallthrough_target);
- break;
- case kCondNE:
- __ Bnezc(lhs, non_fallthrough_target);
- break;
- case kCondLT:
- __ Bltzc(lhs, non_fallthrough_target);
- break;
- case kCondGE:
- __ Bgezc(lhs, non_fallthrough_target);
- break;
- case kCondLE:
- __ Blezc(lhs, non_fallthrough_target);
- break;
- case kCondGT:
- __ Bgtzc(lhs, non_fallthrough_target);
- break;
- case kCondB:
- break; // always false
- case kCondBE:
- __ Beqzc(lhs, non_fallthrough_target); // <= 0 if zero
- break;
- case kCondA:
- __ Bnezc(lhs, non_fallthrough_target); // > 0 if non-zero
- break;
- case kCondAE:
- __ Bc(non_fallthrough_target); // always true
- break;
- }
- } else {
- if (use_imm) {
- rhs_reg = TMP;
- __ LoadConst32(rhs_reg, rhs_imm);
- }
- switch (if_cond) {
- case kCondEQ:
- __ Beqc(lhs, rhs_reg, non_fallthrough_target);
- break;
- case kCondNE:
- __ Bnec(lhs, rhs_reg, non_fallthrough_target);
- break;
- case kCondLT:
- __ Bltc(lhs, rhs_reg, non_fallthrough_target);
- break;
- case kCondGE:
- __ Bgec(lhs, rhs_reg, non_fallthrough_target);
- break;
- case kCondLE:
- __ Bgec(rhs_reg, lhs, non_fallthrough_target);
- break;
- case kCondGT:
- __ Bltc(rhs_reg, lhs, non_fallthrough_target);
- break;
- case kCondB:
- __ Bltuc(lhs, rhs_reg, non_fallthrough_target);
- break;
- case kCondAE:
- __ Bgeuc(lhs, rhs_reg, non_fallthrough_target);
- break;
- case kCondBE:
- __ Bgeuc(rhs_reg, lhs, non_fallthrough_target);
- break;
- case kCondA:
- __ Bltuc(rhs_reg, lhs, non_fallthrough_target);
- break;
- }
+ branch_target = false_target;
+ }
+
+ switch (type) {
+ default:
+ GenerateIntLongCompareAndBranch(if_cond, /* is64bit */ false, locations, branch_target);
+ break;
+ case Primitive::kPrimLong:
+ GenerateIntLongCompareAndBranch(if_cond, /* is64bit */ true, locations, branch_target);
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ GenerateFpCompareAndBranch(if_cond, condition->IsGtBias(), type, locations, branch_target);
+ break;
}
}
@@ -3991,17 +4180,34 @@ void InstructionCodeGeneratorMIPS64::VisitPackedSwitch(HPackedSwitch* switch_ins
GpuRegister value_reg = locations->InAt(0).AsRegister<GpuRegister>();
HBasicBlock* default_block = switch_instr->GetDefaultBlock();
- // Create a series of compare/jumps.
+ // Create a set of compare/jumps.
+ GpuRegister temp_reg = TMP;
+ if (IsInt<16>(-lower_bound)) {
+ __ Addiu(temp_reg, value_reg, -lower_bound);
+ } else {
+ __ LoadConst32(AT, -lower_bound);
+ __ Addu(temp_reg, value_reg, AT);
+ }
+ // Jump to default if index is negative
+ // Note: We don't check the case that index is positive while value < lower_bound, because in
+ // this case, index >= num_entries must be true. So that we can save one branch instruction.
+ __ Bltzc(temp_reg, codegen_->GetLabelOf(default_block));
+
const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
- for (int32_t i = 0; i < num_entries; i++) {
- int32_t case_value = lower_bound + i;
- Mips64Label* succ = codegen_->GetLabelOf(successors[i]);
- if (case_value == 0) {
- __ Beqzc(value_reg, succ);
- } else {
- __ LoadConst32(TMP, case_value);
- __ Beqc(value_reg, TMP, succ);
- }
+ // Jump to successors[0] if value == lower_bound.
+ __ Beqzc(temp_reg, codegen_->GetLabelOf(successors[0]));
+ int32_t last_index = 0;
+ for (; num_entries - last_index > 2; last_index += 2) {
+ __ Addiu(temp_reg, temp_reg, -2);
+ // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
+ __ Bltzc(temp_reg, codegen_->GetLabelOf(successors[last_index + 1]));
+ // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
+ __ Beqzc(temp_reg, codegen_->GetLabelOf(successors[last_index + 2]));
+ }
+ if (num_entries - last_index == 2) {
+ // The last missing case_value.
+ __ Addiu(temp_reg, temp_reg, -1);
+ __ Beqzc(temp_reg, codegen_->GetLabelOf(successors[last_index + 1]));
}
// And the default for any other value.
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index 85e3a4a3ce..1593cec2a6 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -237,6 +237,16 @@ class InstructionCodeGeneratorMIPS64 : public HGraphVisitor {
void DivRemByPowerOfTwo(HBinaryOperation* instruction);
void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
void GenerateDivRemIntegral(HBinaryOperation* instruction);
+ void GenerateIntLongCompare(IfCondition cond, bool is64bit, LocationSummary* locations);
+ void GenerateIntLongCompareAndBranch(IfCondition cond,
+ bool is64bit,
+ LocationSummary* locations,
+ Mips64Label* label);
+ void GenerateFpCompareAndBranch(IfCondition cond,
+ bool gt_bias,
+ Primitive::Type type,
+ LocationSummary* locations,
+ Mips64Label* label);
void HandleGoto(HInstruction* got, HBasicBlock* successor);
Mips64Assembler* const assembler_;
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index bc3256ec8c..7a5b8dbe46 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -42,7 +42,6 @@ namespace x86 {
static constexpr int kCurrentMethodStackOffset = 0;
static constexpr Register kMethodRegisterArgument = EAX;
-
static constexpr Register kCoreCalleeSaves[] = { EBP, ESI, EDI };
static constexpr int kC2ConditionMask = 0x400;
@@ -4157,7 +4156,7 @@ void CodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) {
*/
switch (kind) {
case MemBarrierKind::kAnyAny: {
- __ mfence();
+ MemoryFence();
break;
}
case MemBarrierKind::kAnyStore:
@@ -6752,31 +6751,67 @@ void LocationsBuilderX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
locations->SetInAt(0, Location::RequiresRegister());
}
-void InstructionCodeGeneratorX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
- int32_t lower_bound = switch_instr->GetStartValue();
- int32_t num_entries = switch_instr->GetNumEntries();
- LocationSummary* locations = switch_instr->GetLocations();
- Register value_reg = locations->InAt(0).AsRegister<Register>();
- HBasicBlock* default_block = switch_instr->GetDefaultBlock();
+void InstructionCodeGeneratorX86::GenPackedSwitchWithCompares(Register value_reg,
+ int32_t lower_bound,
+ uint32_t num_entries,
+ HBasicBlock* switch_block,
+ HBasicBlock* default_block) {
+ // Figure out the correct compare values and jump conditions.
+ // Handle the first compare/branch as a special case because it might
+ // jump to the default case.
+ DCHECK_GT(num_entries, 2u);
+ Condition first_condition;
+ uint32_t index;
+ const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors();
+ if (lower_bound != 0) {
+ first_condition = kLess;
+ __ cmpl(value_reg, Immediate(lower_bound));
+ __ j(first_condition, codegen_->GetLabelOf(default_block));
+ __ j(kEqual, codegen_->GetLabelOf(successors[0]));
- // Create a series of compare/jumps.
- const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
- for (int i = 0; i < num_entries; i++) {
- int32_t case_value = lower_bound + i;
- if (case_value == 0) {
- __ testl(value_reg, value_reg);
- } else {
- __ cmpl(value_reg, Immediate(case_value));
- }
- __ j(kEqual, codegen_->GetLabelOf(successors[i]));
+ index = 1;
+ } else {
+ // Handle all the compare/jumps below.
+ first_condition = kBelow;
+ index = 0;
+ }
+
+ // Handle the rest of the compare/jumps.
+ for (; index + 1 < num_entries; index += 2) {
+ int32_t compare_to_value = lower_bound + index + 1;
+ __ cmpl(value_reg, Immediate(compare_to_value));
+ // Jump to successors[index] if value < case_value[index].
+ __ j(first_condition, codegen_->GetLabelOf(successors[index]));
+ // Jump to successors[index + 1] if value == case_value[index + 1].
+ __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
+ }
+
+ if (index != num_entries) {
+ // There are an odd number of entries. Handle the last one.
+ DCHECK_EQ(index + 1, num_entries);
+ __ cmpl(value_reg, Immediate(lower_bound + index));
+ __ j(kEqual, codegen_->GetLabelOf(successors[index]));
}
// And the default for any other value.
- if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
- __ jmp(codegen_->GetLabelOf(default_block));
+ if (!codegen_->GoesToNextBlock(switch_block, default_block)) {
+ __ jmp(codegen_->GetLabelOf(default_block));
}
}
+void InstructionCodeGeneratorX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
+ int32_t lower_bound = switch_instr->GetStartValue();
+ uint32_t num_entries = switch_instr->GetNumEntries();
+ LocationSummary* locations = switch_instr->GetLocations();
+ Register value_reg = locations->InAt(0).AsRegister<Register>();
+
+ GenPackedSwitchWithCompares(value_reg,
+ lower_bound,
+ num_entries,
+ switch_instr->GetBlock(),
+ switch_instr->GetDefaultBlock());
+}
+
void LocationsBuilderX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
@@ -6791,11 +6826,20 @@ void LocationsBuilderX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
void InstructionCodeGeneratorX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
int32_t lower_bound = switch_instr->GetStartValue();
- int32_t num_entries = switch_instr->GetNumEntries();
+ uint32_t num_entries = switch_instr->GetNumEntries();
LocationSummary* locations = switch_instr->GetLocations();
Register value_reg = locations->InAt(0).AsRegister<Register>();
HBasicBlock* default_block = switch_instr->GetDefaultBlock();
+ if (num_entries <= kPackedSwitchJumpTableThreshold) {
+ GenPackedSwitchWithCompares(value_reg,
+ lower_bound,
+ num_entries,
+ switch_instr->GetBlock(),
+ default_block);
+ return;
+ }
+
// Optimizing has a jump area.
Register temp_reg = locations->GetTemp(0).AsRegister<Register>();
Register constant_area = locations->InAt(1).AsRegister<Register>();
@@ -6807,7 +6851,7 @@ void InstructionCodeGeneratorX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_
}
// Is the value in range?
- DCHECK_GE(num_entries, 1);
+ DCHECK_GE(num_entries, 1u);
__ cmpl(value_reg, Immediate(num_entries - 1));
__ j(kAbove, codegen_->GetLabelOf(default_block));
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 7c292fa103..f0ead0356d 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -17,6 +17,7 @@
#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_H_
#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_H_
+#include "arch/x86/instruction_set_features_x86.h"
#include "code_generator.h"
#include "dex/compiler_enums.h"
#include "driver/compiler_options.h"
@@ -195,6 +196,11 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor {
X86Assembler* GetAssembler() const { return assembler_; }
+ // The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump
+ // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
+ // generates less code/data with a small num_entries.
+ static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5;
+
private:
// Generate code for the given suspend check. If not null, `successor`
// is the block to branch to if the suspend check is not needed, and after
@@ -269,6 +275,11 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor {
void GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label);
void GenerateLongComparesAndJumps(HCondition* cond, Label* true_label, Label* false_label);
void HandleGoto(HInstruction* got, HBasicBlock* successor);
+ void GenPackedSwitchWithCompares(Register value_reg,
+ int32_t lower_bound,
+ uint32_t num_entries,
+ HBasicBlock* switch_block,
+ HBasicBlock* default_block);
X86Assembler* const assembler_;
CodeGeneratorX86* const codegen_;
@@ -496,6 +507,19 @@ class CodeGeneratorX86 : public CodeGenerator {
// artReadBarrierForRootSlow.
void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root);
+ // Ensure that prior stores complete to memory before subsequent loads.
+ // The locked add implementation will avoid serializing device memory, but will
+ // touch (but not change) the top of the stack.
+ // The 'non_temporal' parameter should be used to ensure ordering of non-temporal stores.
+ void MemoryFence(bool non_temporal = false) {
+ if (!non_temporal && isa_features_.PrefersLockedAddSynchronization()) {
+ assembler_.lock()->addl(Address(ESP, 0), Immediate(0));
+ } else {
+ assembler_.mfence();
+ }
+ }
+
+
private:
// Factored implementation of GenerateFieldLoadWithBakerReadBarrier
// and GenerateArrayLoadWithBakerReadBarrier.
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 92cef5f226..1e6d50610b 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -41,6 +41,10 @@ namespace x86_64 {
static constexpr int kCurrentMethodStackOffset = 0;
static constexpr Register kMethodRegisterArgument = RDI;
+// The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump
+// table version generates 7 instructions and num_entries literals. Compare/jump sequence will
+// generates less code/data with a small num_entries.
+static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5;
static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 };
static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 };
@@ -4029,7 +4033,7 @@ void CodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) {
*/
switch (kind) {
case MemBarrierKind::kAnyAny: {
- __ mfence();
+ MemoryFence();
break;
}
case MemBarrierKind::kAnyStore:
@@ -6331,11 +6335,58 @@ void LocationsBuilderX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
int32_t lower_bound = switch_instr->GetStartValue();
- int32_t num_entries = switch_instr->GetNumEntries();
+ uint32_t num_entries = switch_instr->GetNumEntries();
LocationSummary* locations = switch_instr->GetLocations();
CpuRegister value_reg_in = locations->InAt(0).AsRegister<CpuRegister>();
CpuRegister temp_reg = locations->GetTemp(0).AsRegister<CpuRegister>();
CpuRegister base_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
+ HBasicBlock* default_block = switch_instr->GetDefaultBlock();
+
+ // Should we generate smaller inline compare/jumps?
+ if (num_entries <= kPackedSwitchJumpTableThreshold) {
+ // Figure out the correct compare values and jump conditions.
+ // Handle the first compare/branch as a special case because it might
+ // jump to the default case.
+ DCHECK_GT(num_entries, 2u);
+ Condition first_condition;
+ uint32_t index;
+ const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
+ if (lower_bound != 0) {
+ first_condition = kLess;
+ __ cmpl(value_reg_in, Immediate(lower_bound));
+ __ j(first_condition, codegen_->GetLabelOf(default_block));
+ __ j(kEqual, codegen_->GetLabelOf(successors[0]));
+
+ index = 1;
+ } else {
+ // Handle all the compare/jumps below.
+ first_condition = kBelow;
+ index = 0;
+ }
+
+ // Handle the rest of the compare/jumps.
+ for (; index + 1 < num_entries; index += 2) {
+ int32_t compare_to_value = lower_bound + index + 1;
+ __ cmpl(value_reg_in, Immediate(compare_to_value));
+ // Jump to successors[index] if value < case_value[index].
+ __ j(first_condition, codegen_->GetLabelOf(successors[index]));
+ // Jump to successors[index + 1] if value == case_value[index + 1].
+ __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
+ }
+
+ if (index != num_entries) {
+ // There are an odd number of entries. Handle the last one.
+ DCHECK_EQ(index + 1, num_entries);
+ __ cmpl(value_reg_in, Immediate(lower_bound + index));
+ __ j(kEqual, codegen_->GetLabelOf(successors[index]));
+ }
+
+ // And the default for any other value.
+ if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
+ __ jmp(codegen_->GetLabelOf(default_block));
+ }
+ return;
+ }
// Remove the bias, if needed.
Register value_reg_out = value_reg_in.AsRegister();
@@ -6346,7 +6397,6 @@ void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_ins
CpuRegister value_reg(value_reg_out);
// Is the value in range?
- HBasicBlock* default_block = switch_instr->GetDefaultBlock();
__ cmpl(value_reg, Immediate(num_entries - 1));
__ j(kAbove, codegen_->GetLabelOf(default_block));
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index dda9ea22d9..e5a487c761 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -17,6 +17,7 @@
#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_
#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_
+#include "arch/x86_64/instruction_set_features_x86_64.h"
#include "code_generator.h"
#include "dex/compiler_enums.h"
#include "driver/compiler_options.h"
@@ -479,6 +480,18 @@ class CodeGeneratorX86_64 : public CodeGenerator {
int64_t v,
HInstruction* instruction);
+ // Ensure that prior stores complete to memory before subsequent loads.
+ // The locked add implementation will avoid serializing device memory, but will
+ // touch (but not change) the top of the stack. The locked add should not be used for
+ // ordering non-temporal stores.
+ void MemoryFence(bool force_mfence = false) {
+ if (!force_mfence && isa_features_.PrefersLockedAddSynchronization()) {
+ assembler_.lock()->addl(Address(CpuRegister(RSP), 0), Immediate(0));
+ } else {
+ assembler_.mfence();
+ }
+ }
+
private:
// Factored implementation of GenerateFieldLoadWithBakerReadBarrier
// and GenerateArrayLoadWithBakerReadBarrier.
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 67097deaeb..c504ded54c 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -777,13 +777,6 @@ void InstructionSimplifierVisitor::VisitLessThanOrEqual(HLessThanOrEqual* condit
void InstructionSimplifierVisitor::VisitCondition(HCondition* condition) {
// Try to fold an HCompare into this HCondition.
- // This simplification is currently supported on x86, x86_64, ARM and ARM64.
- // TODO: Implement it for MIPS64.
- InstructionSet instruction_set = GetGraph()->GetInstructionSet();
- if (instruction_set == kMips64) {
- return;
- }
-
HInstruction* left = condition->GetLeft();
HInstruction* right = condition->GetRight();
// We can only replace an HCondition which compares a Compare to 0.
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index e8181bbb06..4683aee603 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -825,8 +825,15 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat
Label loop_head;
__ Bind(&loop_head);
+ // TODO: When `type == Primitive::kPrimNot`, add a read barrier for
+ // the reference stored in the object before attempting the CAS,
+ // similar to the one in the art::Unsafe_compareAndSwapObject JNI
+ // implementation.
+ //
+ // Note that this code is not (yet) used when read barriers are
+ // enabled (see IntrinsicLocationsBuilderARM::VisitUnsafeCASObject).
+ DCHECK(!(type == Primitive::kPrimNot && kEmitCompilerReadBarrier));
__ ldrex(tmp_lo, tmp_ptr);
- // TODO: Do we need a read barrier here when `type == Primitive::kPrimNot`?
__ subs(tmp_lo, tmp_lo, ShifterOperand(expected_lo));
@@ -852,15 +859,17 @@ void IntrinsicLocationsBuilderARM::VisitUnsafeCASInt(HInvoke* invoke) {
CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke);
}
void IntrinsicLocationsBuilderARM::VisitUnsafeCASObject(HInvoke* invoke) {
- // The UnsafeCASObject intrinsic does not always work when heap
- // poisoning is enabled (it breaks run-test 004-UnsafeTest); turn it
- // off temporarily as a quick fix.
+ // The UnsafeCASObject intrinsic is missing a read barrier, and
+ // therefore sometimes does not work as expected (b/25883050).
+ // Turn it off temporarily as a quick fix, until the read barrier is
+ // implemented (see TODO in GenCAS below).
//
- // TODO(rpl): Fix it and turn it back on.
+ // Also, the UnsafeCASObject intrinsic does not always work when heap
+ // poisoning is enabled (it breaks run-test 004-UnsafeTest); turn it
+ // off temporarily as a quick fix (b/26204023).
//
- // TODO(rpl): Also, we should investigate whether we need a read
- // barrier in the generated code.
- if (kPoisonHeapReferences) {
+ // TODO(rpl): Fix these two issues and re-enable this intrinsic.
+ if (kEmitCompilerReadBarrier || kPoisonHeapReferences) {
return;
}
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 6b34daadf0..9f6863cf6e 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -1031,10 +1031,15 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat
} else {
__ Dmb(InnerShareable, BarrierWrites);
__ Bind(&loop_head);
- __ Ldxr(tmp_value, MemOperand(tmp_ptr));
- // TODO: Do we need a read barrier here when `type == Primitive::kPrimNot`?
+ // TODO: When `type == Primitive::kPrimNot`, add a read barrier for
+ // the reference stored in the object before attempting the CAS,
+ // similar to the one in the art::Unsafe_compareAndSwapObject JNI
+ // implementation.
+ //
// Note that this code is not (yet) used when read barriers are
// enabled (see IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject).
+ DCHECK(!(type == Primitive::kPrimNot && kEmitCompilerReadBarrier));
+ __ Ldxr(tmp_value, MemOperand(tmp_ptr));
__ Cmp(tmp_value, expected);
__ B(&exit_loop, ne);
__ Stxr(tmp_32, value, MemOperand(tmp_ptr));
@@ -1057,15 +1062,17 @@ void IntrinsicLocationsBuilderARM64::VisitUnsafeCASLong(HInvoke* invoke) {
CreateIntIntIntIntIntToInt(arena_, invoke);
}
void IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject(HInvoke* invoke) {
- // The UnsafeCASObject intrinsic does not always work when heap
- // poisoning is enabled (it breaks run-test 004-UnsafeTest); turn it
- // off temporarily as a quick fix.
+ // The UnsafeCASObject intrinsic is missing a read barrier, and
+ // therefore sometimes does not work as expected (b/25883050).
+ // Turn it off temporarily as a quick fix, until the read barrier is
+ // implemented (see TODO in GenCAS below).
//
- // TODO(rpl): Fix it and turn it back on.
+ // Also, the UnsafeCASObject intrinsic does not always work when heap
+ // poisoning is enabled (it breaks run-test 004-UnsafeTest); turn it
+ // off temporarily as a quick fix (b/26204023).
//
- // TODO(rpl): Also, we should investigate whether we need a read
- // barrier in the generated code.
- if (kPoisonHeapReferences) {
+ // TODO(rpl): Fix these two issues and re-enable this intrinsic.
+ if (kEmitCompilerReadBarrier || kPoisonHeapReferences) {
return;
}
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 8aa7d9ff6f..8b45ea7c4f 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -1299,6 +1299,8 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat
if (type == Primitive::kPrimLong) {
__ Lld(out, TMP);
} else {
+ // Note: We will need a read barrier here, when read barrier
+ // support is added to the MIPS64 back end.
__ Ll(out, TMP);
}
__ Dsubu(out, out, expected); // If we didn't get the 'expected'
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index fd454d8322..80190629ee 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -2005,7 +2005,7 @@ static void GenUnsafePut(LocationSummary* locations,
}
if (is_volatile) {
- __ mfence();
+ codegen->MemoryFence();
}
if (type == Primitive::kPrimNot) {
@@ -2085,6 +2085,17 @@ void IntrinsicLocationsBuilderX86::VisitUnsafeCASLong(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) {
+ // The UnsafeCASObject intrinsic is missing a read barrier, and
+ // therefore sometimes does not work as expected (b/25883050).
+ // Turn it off temporarily as a quick fix, until the read barrier is
+ // implemented.
+ //
+ // TODO(rpl): Implement a read barrier in GenCAS below and re-enable
+ // this intrinsic.
+ if (kEmitCompilerReadBarrier) {
+ return;
+ }
+
CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke);
}
@@ -2136,6 +2147,13 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code
__ PoisonHeapReference(value);
}
+ // TODO: Add a read barrier for the reference stored in the object
+ // before attempting the CAS, similar to the one in the
+ // art::Unsafe_compareAndSwapObject JNI implementation.
+ //
+ // Note that this code is not (yet) used when read barriers are
+ // enabled (see IntrinsicLocationsBuilderX86::VisitUnsafeCASObject).
+ DCHECK(!kEmitCompilerReadBarrier);
__ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value);
// LOCK CMPXCHG has full barrier semantics, and we don't need
@@ -2145,11 +2163,8 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code
__ setb(kZero, out.AsRegister<Register>());
__ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
- // In the case of the `UnsafeCASObject` intrinsic, accessing an
- // object in the heap with LOCK CMPXCHG does not require a read
- // barrier, as we do not keep a reference to this heap location.
- // However, if heap poisoning is enabled, we need to unpoison the
- // values that were poisoned earlier.
+ // If heap poisoning is enabled, we need to unpoison the values
+ // that were poisoned earlier.
if (kPoisonHeapReferences) {
if (base_equals_value) {
// `value` has been moved to a temporary register, no need to
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index ce737e3f7e..aa1c109738 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -2080,7 +2080,7 @@ static void GenUnsafePut(LocationSummary* locations, Primitive::Type type, bool
}
if (is_volatile) {
- __ mfence();
+ codegen->MemoryFence();
}
if (type == Primitive::kPrimNot) {
@@ -2150,6 +2150,17 @@ void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
+ // The UnsafeCASObject intrinsic is missing a read barrier, and
+ // therefore sometimes does not work as expected (b/25883050).
+ // Turn it off temporarily as a quick fix, until the read barrier is
+ // implemented.
+ //
+ // TODO(rpl): Implement a read barrier in GenCAS below and re-enable
+ // this intrinsic.
+ if (kEmitCompilerReadBarrier) {
+ return;
+ }
+
CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke);
}
@@ -2200,6 +2211,13 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* c
__ PoisonHeapReference(CpuRegister(value_reg));
}
+ // TODO: Add a read barrier for the reference stored in the object
+ // before attempting the CAS, similar to the one in the
+ // art::Unsafe_compareAndSwapObject JNI implementation.
+ //
+ // Note that this code is not (yet) used when read barriers are
+ // enabled (see IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject).
+ DCHECK(!kEmitCompilerReadBarrier);
__ LockCmpxchgl(Address(base, offset, TIMES_1, 0), CpuRegister(value_reg));
// LOCK CMPXCHG has full barrier semantics, and we don't need
@@ -2209,11 +2227,8 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* c
__ setcc(kZero, out);
__ movzxb(out, out);
- // In the case of the `UnsafeCASObject` intrinsic, accessing an
- // object in the heap with LOCK CMPXCHG does not require a read
- // barrier, as we do not keep a reference to this heap location.
- // However, if heap poisoning is enabled, we need to unpoison the
- // values that were poisoned earlier.
+ // If heap poisoning is enabled, we need to unpoison the values
+ // that were poisoned earlier.
if (kPoisonHeapReferences) {
if (base_equals_value) {
// `value_reg` has been moved to a temporary register, no need
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index adde00464b..727f2bb717 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -119,10 +119,16 @@ class HeapLocation : public ArenaObject<kArenaAllocMisc> {
: ref_info_(ref_info),
offset_(offset),
index_(index),
- declaring_class_def_index_(declaring_class_def_index) {
+ declaring_class_def_index_(declaring_class_def_index),
+ value_killed_by_loop_side_effects_(true) {
DCHECK(ref_info != nullptr);
DCHECK((offset == kInvalidFieldOffset && index != nullptr) ||
(offset != kInvalidFieldOffset && index == nullptr));
+ if (ref_info->IsSingleton() && !IsArrayElement()) {
+ // Assume this location's value cannot be killed by loop side effects
+ // until proven otherwise.
+ value_killed_by_loop_side_effects_ = false;
+ }
}
ReferenceInfo* GetReferenceInfo() const { return ref_info_; }
@@ -139,11 +145,22 @@ class HeapLocation : public ArenaObject<kArenaAllocMisc> {
return index_ != nullptr;
}
+ bool IsValueKilledByLoopSideEffects() const {
+ return value_killed_by_loop_side_effects_;
+ }
+
+ void SetValueKilledByLoopSideEffects(bool val) {
+ value_killed_by_loop_side_effects_ = val;
+ }
+
private:
ReferenceInfo* const ref_info_; // reference for instance/static field or array access.
const size_t offset_; // offset of static/instance field.
HInstruction* const index_; // index of an array element.
const int16_t declaring_class_def_index_; // declaring class's def's dex index.
+ bool value_killed_by_loop_side_effects_; // value of this location may be killed by loop
+ // side effects because this location is stored
+ // into inside a loop.
DISALLOW_COPY_AND_ASSIGN(HeapLocation);
};
@@ -370,13 +387,13 @@ class HeapLocationCollector : public HGraphVisitor {
return heap_locations_[heap_location_idx];
}
- void VisitFieldAccess(HInstruction* ref, const FieldInfo& field_info) {
+ HeapLocation* VisitFieldAccess(HInstruction* ref, const FieldInfo& field_info) {
if (field_info.IsVolatile()) {
has_volatile_ = true;
}
const uint16_t declaring_class_def_index = field_info.GetDeclaringClassDefIndex();
const size_t offset = field_info.GetFieldOffset().SizeValue();
- GetOrCreateHeapLocation(ref, offset, nullptr, declaring_class_def_index);
+ return GetOrCreateHeapLocation(ref, offset, nullptr, declaring_class_def_index);
}
void VisitArrayAccess(HInstruction* array, HInstruction* index) {
@@ -390,8 +407,11 @@ class HeapLocationCollector : public HGraphVisitor {
}
void VisitInstanceFieldSet(HInstanceFieldSet* instruction) OVERRIDE {
- VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
+ HeapLocation* location = VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
has_heap_stores_ = true;
+ if (instruction->GetBlock()->GetLoopInformation() != nullptr) {
+ location->SetValueKilledByLoopSideEffects(true);
+ }
}
void VisitStaticFieldGet(HStaticFieldGet* instruction) OVERRIDE {
@@ -565,23 +585,26 @@ class LSEVisitor : public HGraphVisitor {
HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader();
ArenaVector<HInstruction*>& pre_header_heap_values =
heap_values_for_[pre_header->GetBlockId()];
+ // Inherit the values from pre-header.
+ for (size_t i = 0; i < heap_values.size(); i++) {
+ heap_values[i] = pre_header_heap_values[i];
+ }
+
// We do a single pass in reverse post order. For loops, use the side effects as a hint
// to see if the heap values should be killed.
if (side_effects_.GetLoopEffects(block).DoesAnyWrite()) {
- for (size_t i = 0; i < pre_header_heap_values.size(); i++) {
- // heap value is killed by loop side effects, need to keep the last store.
- KeepIfIsStore(pre_header_heap_values[i]);
- }
- if (kIsDebugBuild) {
- // heap_values should all be kUnknownHeapValue that it is inited with.
- for (size_t i = 0; i < heap_values.size(); i++) {
- DCHECK_EQ(heap_values[i], kUnknownHeapValue);
- }
- }
- } else {
- // Inherit the values from pre-header.
for (size_t i = 0; i < heap_values.size(); i++) {
- heap_values[i] = pre_header_heap_values[i];
+ HeapLocation* location = heap_location_collector_.GetHeapLocation(i);
+ ReferenceInfo* ref_info = location->GetReferenceInfo();
+ if (!ref_info->IsSingleton() || location->IsValueKilledByLoopSideEffects()) {
+ // heap value is killed by loop side effects (stored into directly, or due to
+ // aliasing).
+ KeepIfIsStore(pre_header_heap_values[i]);
+ heap_values[i] = kUnknownHeapValue;
+ } else {
+ // A singleton's field that's not stored into inside a loop is invariant throughout
+ // the loop.
+ }
}
}
}
@@ -655,6 +678,16 @@ class LSEVisitor : public HGraphVisitor {
}
}
+ static bool IsIntFloatAlias(Primitive::Type type1, Primitive::Type type2) {
+ return (type1 == Primitive::kPrimFloat && type2 == Primitive::kPrimInt) ||
+ (type2 == Primitive::kPrimFloat && type1 == Primitive::kPrimInt);
+ }
+
+ static bool IsLongDoubleAlias(Primitive::Type type1, Primitive::Type type2) {
+ return (type1 == Primitive::kPrimDouble && type2 == Primitive::kPrimLong) ||
+ (type2 == Primitive::kPrimDouble && type1 == Primitive::kPrimLong);
+ }
+
void VisitGetLocation(HInstruction* instruction,
HInstruction* ref,
size_t offset,
@@ -686,7 +719,8 @@ class LSEVisitor : public HGraphVisitor {
if ((heap_value != kUnknownHeapValue) &&
// Keep the load due to possible I/F, J/D array aliasing.
// See b/22538329 for details.
- (heap_value->GetType() == instruction->GetType())) {
+ !IsIntFloatAlias(heap_value->GetType(), instruction->GetType()) &&
+ !IsLongDoubleAlias(heap_value->GetType(), instruction->GetType())) {
removed_loads_.push_back(instruction);
substitute_instructions_for_loads_.push_back(heap_value);
TryRemovingNullCheck(instruction);
@@ -751,6 +785,9 @@ class LSEVisitor : public HGraphVisitor {
if (loop_info != nullptr) {
// instruction is a store in the loop so the loop must does write.
DCHECK(side_effects_.GetLoopEffects(loop_info->GetHeader()).DoesAnyWrite());
+ // If it's a singleton, IsValueKilledByLoopSideEffects() must be true.
+ DCHECK(!ref_info->IsSingleton() ||
+ heap_location_collector_.GetHeapLocation(idx)->IsValueKilledByLoopSideEffects());
if (loop_info->IsDefinedOutOfTheLoop(original_ref)) {
DCHECK(original_ref->GetBlock()->Dominates(loop_info->GetPreHeader()));
diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc
index 30bcf19c64..176c50ce21 100644
--- a/compiler/optimizing/parallel_move_resolver.cc
+++ b/compiler/optimizing/parallel_move_resolver.cc
@@ -169,7 +169,7 @@ MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) {
// If `other_move` was swapped, we iterate again to find a new
// potential cycle.
required_swap = nullptr;
- i = 0;
+ i = -1;
} else if (required_swap != nullptr) {
// A move is required to swap. We walk back the cycle to find the
// move by just returning from this `PerforrmMove`.
diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc
index 46e6f3e5d0..5e8fe37669 100644
--- a/compiler/optimizing/parallel_move_test.cc
+++ b/compiler/optimizing/parallel_move_test.cc
@@ -609,4 +609,36 @@ TYPED_TEST(ParallelMoveTest, CyclesWith64BitsMoves) {
}
}
+TYPED_TEST(ParallelMoveTest, CyclesWith64BitsMoves2) {
+ ArenaPool pool;
+ ArenaAllocator allocator(&pool);
+
+ {
+ TypeParam resolver(&allocator);
+ HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
+ moves->AddMove(
+ Location::RegisterLocation(0),
+ Location::RegisterLocation(3),
+ Primitive::kPrimInt,
+ nullptr);
+ moves->AddMove(
+ Location::RegisterPairLocation(2, 3),
+ Location::RegisterPairLocation(0, 1),
+ Primitive::kPrimLong,
+ nullptr);
+ moves->AddMove(
+ Location::RegisterLocation(7),
+ Location::RegisterLocation(2),
+ Primitive::kPrimInt,
+ nullptr);
+ resolver.EmitNativeCode(moves);
+ if (TestFixture::has_swap) {
+ ASSERT_STREQ("(2,3 <-> 0,1) (2 -> 3) (7 -> 2)", resolver.GetMessage().c_str());
+ } else {
+ ASSERT_STREQ("(2,3 -> T0,T1) (0 -> 3) (T0,T1 -> 0,1) (7 -> 2)",
+ resolver.GetMessage().c_str());
+ }
+ }
+}
+
} // namespace art
diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc
index b383f1e1ad..a385448104 100644
--- a/compiler/optimizing/pc_relative_fixups_x86.cc
+++ b/compiler/optimizing/pc_relative_fixups_x86.cc
@@ -15,6 +15,7 @@
*/
#include "pc_relative_fixups_x86.h"
+#include "code_generator_x86.h"
namespace art {
namespace x86 {
@@ -79,6 +80,10 @@ class PCRelativeHandlerVisitor : public HGraphVisitor {
}
void VisitPackedSwitch(HPackedSwitch* switch_insn) OVERRIDE {
+ if (switch_insn->GetNumEntries() <=
+ InstructionCodeGeneratorX86::kPackedSwitchJumpTableThreshold) {
+ return;
+ }
// We need to replace the HPackedSwitch with a HX86PackedSwitch in order to
// address the constant area.
InitializePCRelativeBasePointer();
diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h
index 98a1a8f9a1..b79c2f0f4e 100644
--- a/compiler/utils/arm/assembler_arm.h
+++ b/compiler/utils/arm/assembler_arm.h
@@ -878,7 +878,15 @@ class ArmAssembler : public Assembler {
Register rn,
Opcode opcode,
uint32_t immediate,
+ SetCc set_cc,
ShifterOperand* shifter_op) = 0;
+ bool ShifterOperandCanHold(Register rd,
+ Register rn,
+ Opcode opcode,
+ uint32_t immediate,
+ ShifterOperand* shifter_op) {
+ return ShifterOperandCanHold(rd, rn, opcode, immediate, kCcDontCare, shifter_op);
+ }
virtual bool ShifterOperandCanAlwaysHold(uint32_t immediate) = 0;
diff --git a/compiler/utils/arm/assembler_arm32.cc b/compiler/utils/arm/assembler_arm32.cc
index a7dbacd3a9..ebca25bbf9 100644
--- a/compiler/utils/arm/assembler_arm32.cc
+++ b/compiler/utils/arm/assembler_arm32.cc
@@ -57,6 +57,7 @@ bool Arm32Assembler::ShifterOperandCanHold(Register rd ATTRIBUTE_UNUSED,
Register rn ATTRIBUTE_UNUSED,
Opcode opcode ATTRIBUTE_UNUSED,
uint32_t immediate,
+ SetCc set_cc ATTRIBUTE_UNUSED,
ShifterOperand* shifter_op) {
return ShifterOperandCanHoldArm32(immediate, shifter_op);
}
diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h
index ce3a87275d..bf332feb62 100644
--- a/compiler/utils/arm/assembler_arm32.h
+++ b/compiler/utils/arm/assembler_arm32.h
@@ -297,7 +297,9 @@ class Arm32Assembler FINAL : public ArmAssembler {
Register rn,
Opcode opcode,
uint32_t immediate,
+ SetCc set_cc,
ShifterOperand* shifter_op) OVERRIDE;
+ using ArmAssembler::ShifterOperandCanHold; // Don't hide the non-virtual override.
bool ShifterOperandCanAlwaysHold(uint32_t immediate) OVERRIDE;
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index cdeb443977..f341030c15 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -500,6 +500,7 @@ bool Thumb2Assembler::ShifterOperandCanHold(Register rd ATTRIBUTE_UNUSED,
Register rn ATTRIBUTE_UNUSED,
Opcode opcode,
uint32_t immediate,
+ SetCc set_cc,
ShifterOperand* shifter_op) {
shifter_op->type_ = ShifterOperand::kImmediate;
shifter_op->immed_ = immediate;
@@ -508,7 +509,8 @@ bool Thumb2Assembler::ShifterOperandCanHold(Register rd ATTRIBUTE_UNUSED,
switch (opcode) {
case ADD:
case SUB:
- if (immediate < (1 << 12)) { // Less than (or equal to) 12 bits can always be done.
+ // Less than (or equal to) 12 bits can be done if we don't need to set condition codes.
+ if (immediate < (1 << 12) && set_cc != kCcSet) {
return true;
}
return ArmAssembler::ModifiedImmediate(immediate) != kInvalidModifiedImmediate;
@@ -1239,7 +1241,10 @@ bool Thumb2Assembler::Is32BitDataProcessing(Condition cond,
// The only thumb1 instructions with a register and an immediate are ADD and SUB
// with a 3-bit immediate, and RSB with zero immediate.
if (opcode == ADD || opcode == SUB) {
- if (!IsUint<3>(so.GetImmediate())) {
+ if ((cond == AL) ? set_cc == kCcKeep : set_cc == kCcSet) {
+ return true; // Cannot match "setflags".
+ }
+ if (!IsUint<3>(so.GetImmediate()) && !IsUint<3>(-so.GetImmediate())) {
return true;
}
} else {
@@ -1249,8 +1254,12 @@ bool Thumb2Assembler::Is32BitDataProcessing(Condition cond,
// ADD, SUB, CMP and MOV may be thumb1 only if the immediate is 8 bits.
if (!(opcode == ADD || opcode == SUB || opcode == MOV || opcode == CMP)) {
return true;
+ } else if (opcode != CMP && ((cond == AL) ? set_cc == kCcKeep : set_cc == kCcSet)) {
+ return true; // Cannot match "setflags" for ADD, SUB or MOV.
} else {
- if (!IsUint<8>(so.GetImmediate())) {
+ // For ADD and SUB allow also negative 8-bit immediate as we will emit the oposite opcode.
+ if (!IsUint<8>(so.GetImmediate()) &&
+ (opcode == MOV || opcode == CMP || !IsUint<8>(-so.GetImmediate()))) {
return true;
}
}
@@ -1602,12 +1611,18 @@ void Thumb2Assembler::Emit16BitAddSub(Condition cond,
uint8_t rn_shift = 3;
uint8_t immediate_shift = 0;
bool use_immediate = false;
- uint32_t immediate = 0; // Should be at most 9 bits but keep the full immediate for CHECKs.
+ uint32_t immediate = 0; // Should be at most 10 bits but keep the full immediate for CHECKs.
uint8_t thumb_opcode;
if (so.IsImmediate()) {
use_immediate = true;
immediate = so.GetImmediate();
+ if (!IsUint<10>(immediate)) {
+ // Flip ADD/SUB.
+ opcode = (opcode == ADD) ? SUB : ADD;
+ immediate = -immediate;
+ DCHECK(IsUint<10>(immediate)); // More stringent checks below.
+ }
}
switch (opcode) {
@@ -1644,7 +1659,7 @@ void Thumb2Assembler::Emit16BitAddSub(Condition cond,
dp_opcode = 2U /* 0b10 */;
thumb_opcode = 3U /* 0b11 */;
opcode_shift = 12;
- CHECK_LT(immediate, (1u << 9));
+ CHECK(IsUint<9>(immediate));
CHECK_ALIGNED(immediate, 4);
// Remove rd and rn from instruction by orring it with immed and clearing bits.
@@ -1658,7 +1673,7 @@ void Thumb2Assembler::Emit16BitAddSub(Condition cond,
dp_opcode = 2U /* 0b10 */;
thumb_opcode = 5U /* 0b101 */;
opcode_shift = 11;
- CHECK_LT(immediate, (1u << 10));
+ CHECK(IsUint<10>(immediate));
CHECK_ALIGNED(immediate, 4);
// Remove rn from instruction.
@@ -1668,11 +1683,13 @@ void Thumb2Assembler::Emit16BitAddSub(Condition cond,
immediate >>= 2;
} else if (rn != rd) {
// Must use T1.
+ CHECK(IsUint<3>(immediate));
opcode_shift = 9;
thumb_opcode = 14U /* 0b01110 */;
immediate_shift = 6;
} else {
// T2 encoding.
+ CHECK(IsUint<8>(immediate));
opcode_shift = 11;
thumb_opcode = 6U /* 0b110 */;
rd_shift = 8;
@@ -1702,7 +1719,7 @@ void Thumb2Assembler::Emit16BitAddSub(Condition cond,
dp_opcode = 2U /* 0b10 */;
thumb_opcode = 0x61 /* 0b1100001 */;
opcode_shift = 7;
- CHECK_LT(immediate, (1u << 9));
+ CHECK(IsUint<9>(immediate));
CHECK_ALIGNED(immediate, 4);
// Remove rd and rn from instruction by orring it with immed and clearing bits.
@@ -1713,11 +1730,13 @@ void Thumb2Assembler::Emit16BitAddSub(Condition cond,
immediate >>= 2;
} else if (rn != rd) {
// Must use T1.
+ CHECK(IsUint<3>(immediate));
opcode_shift = 9;
thumb_opcode = 15U /* 0b01111 */;
immediate_shift = 6;
} else {
// T2 encoding.
+ CHECK(IsUint<8>(immediate));
opcode_shift = 11;
thumb_opcode = 7U /* 0b111 */;
rd_shift = 8;
@@ -3401,25 +3420,30 @@ void Thumb2Assembler::AddConstant(Register rd, Register rn, int32_t value,
// positive values and sub for negatives ones, which would slightly improve
// the readability of generated code for some constants.
ShifterOperand shifter_op;
- if (ShifterOperandCanHold(rd, rn, ADD, value, &shifter_op)) {
+ if (ShifterOperandCanHold(rd, rn, ADD, value, set_cc, &shifter_op)) {
add(rd, rn, shifter_op, cond, set_cc);
- } else if (ShifterOperandCanHold(rd, rn, SUB, -value, &shifter_op)) {
+ } else if (ShifterOperandCanHold(rd, rn, SUB, -value, set_cc, &shifter_op)) {
sub(rd, rn, shifter_op, cond, set_cc);
} else {
CHECK(rn != IP);
- if (ShifterOperandCanHold(rd, rn, MVN, ~value, &shifter_op)) {
- mvn(IP, shifter_op, cond, kCcKeep);
- add(rd, rn, ShifterOperand(IP), cond, set_cc);
- } else if (ShifterOperandCanHold(rd, rn, MVN, ~(-value), &shifter_op)) {
- mvn(IP, shifter_op, cond, kCcKeep);
- sub(rd, rn, ShifterOperand(IP), cond, set_cc);
+ // If rd != rn, use rd as temp. This alows 16-bit ADD/SUB in more situations than using IP.
+ Register temp = (rd != rn) ? rd : IP;
+ if (ShifterOperandCanHold(temp, kNoRegister, MVN, ~value, set_cc, &shifter_op)) {
+ mvn(temp, shifter_op, cond, kCcKeep);
+ add(rd, rn, ShifterOperand(temp), cond, set_cc);
+ } else if (ShifterOperandCanHold(temp, kNoRegister, MVN, ~(-value), set_cc, &shifter_op)) {
+ mvn(temp, shifter_op, cond, kCcKeep);
+ sub(rd, rn, ShifterOperand(temp), cond, set_cc);
+ } else if (High16Bits(-value) == 0) {
+ movw(temp, Low16Bits(-value), cond);
+ sub(rd, rn, ShifterOperand(temp), cond, set_cc);
} else {
- movw(IP, Low16Bits(value), cond);
+ movw(temp, Low16Bits(value), cond);
uint16_t value_high = High16Bits(value);
if (value_high != 0) {
- movt(IP, value_high, cond);
+ movt(temp, value_high, cond);
}
- add(rd, rn, ShifterOperand(IP), cond, set_cc);
+ add(rd, rn, ShifterOperand(temp), cond, set_cc);
}
}
}
@@ -3429,9 +3453,9 @@ void Thumb2Assembler::CmpConstant(Register rn, int32_t value, Condition cond) {
// positive values and sub for negatives ones, which would slightly improve
// the readability of generated code for some constants.
ShifterOperand shifter_op;
- if (ShifterOperandCanHold(kNoRegister, rn, CMP, value, &shifter_op)) {
+ if (ShifterOperandCanHold(kNoRegister, rn, CMP, value, kCcSet, &shifter_op)) {
cmp(rn, shifter_op, cond);
- } else if (ShifterOperandCanHold(kNoRegister, rn, CMN, ~value, &shifter_op)) {
+ } else if (ShifterOperandCanHold(kNoRegister, rn, CMN, ~value, kCcSet, &shifter_op)) {
cmn(rn, shifter_op, cond);
} else {
CHECK(rn != IP);
diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h
index 9aeece8e57..bf07b2dbf8 100644
--- a/compiler/utils/arm/assembler_thumb2.h
+++ b/compiler/utils/arm/assembler_thumb2.h
@@ -342,7 +342,9 @@ class Thumb2Assembler FINAL : public ArmAssembler {
Register rn,
Opcode opcode,
uint32_t immediate,
+ SetCc set_cc,
ShifterOperand* shifter_op) OVERRIDE;
+ using ArmAssembler::ShifterOperandCanHold; // Don't hide the non-virtual override.
bool ShifterOperandCanAlwaysHold(uint32_t immediate) OVERRIDE;
diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc
index 5ae2cc28a2..0ef0dc19e6 100644
--- a/compiler/utils/assembler_thumb_test.cc
+++ b/compiler/utils/assembler_thumb_test.cc
@@ -135,7 +135,8 @@ void DumpAndCheck(std::vector<uint8_t>& code, const char* testname, const char*
toolsdir.c_str(), filename);
if (kPrintResults) {
// Print the results only, don't check. This is used to generate new output for inserting
- // into the .inc file.
+ // into the .inc file, so let's add the appropriate prefix/suffix needed in the C++ code.
+ strcat(cmd, " | sed '-es/^/ \"/' | sed '-es/$/\\\\n\",/'");
int cmd_result3 = system(cmd);
ASSERT_EQ(cmd_result3, 0) << strerror(errno);
} else {
@@ -1379,6 +1380,252 @@ TEST(Thumb2AssemblerTest, CompareAndBranch) {
EmitAndCheck(&assembler, "CompareAndBranch");
}
+TEST(Thumb2AssemblerTest, AddConstant) {
+ arm::Thumb2Assembler assembler;
+
+ // Low registers, Rd != Rn.
+ __ AddConstant(R0, R1, 0); // MOV.
+ __ AddConstant(R0, R1, 1); // 16-bit ADDS, encoding T1.
+ __ AddConstant(R0, R1, 7); // 16-bit ADDS, encoding T1.
+ __ AddConstant(R0, R1, 8); // 32-bit ADD, encoding T3.
+ __ AddConstant(R0, R1, 255); // 32-bit ADD, encoding T3.
+ __ AddConstant(R0, R1, 256); // 32-bit ADD, encoding T3.
+ __ AddConstant(R0, R1, 257); // 32-bit ADD, encoding T4.
+ __ AddConstant(R0, R1, 0xfff); // 32-bit ADD, encoding T4.
+ __ AddConstant(R0, R1, 0x1000); // 32-bit ADD, encoding T3.
+ __ AddConstant(R0, R1, 0x1001); // MVN+SUB.
+ __ AddConstant(R0, R1, 0x1002); // MOVW+ADD.
+ __ AddConstant(R0, R1, 0xffff); // MOVW+ADD.
+ __ AddConstant(R0, R1, 0x10000); // 32-bit ADD, encoding T3.
+ __ AddConstant(R0, R1, 0x10001); // 32-bit ADD, encoding T3.
+ __ AddConstant(R0, R1, 0x10002); // MVN+SUB.
+ __ AddConstant(R0, R1, 0x10003); // MOVW+MOVT+ADD.
+ __ AddConstant(R0, R1, -1); // 16-bit SUBS.
+ __ AddConstant(R0, R1, -7); // 16-bit SUBS.
+ __ AddConstant(R0, R1, -8); // 32-bit SUB, encoding T3.
+ __ AddConstant(R0, R1, -255); // 32-bit SUB, encoding T3.
+ __ AddConstant(R0, R1, -256); // 32-bit SUB, encoding T3.
+ __ AddConstant(R0, R1, -257); // 32-bit SUB, encoding T4.
+ __ AddConstant(R0, R1, -0xfff); // 32-bit SUB, encoding T4.
+ __ AddConstant(R0, R1, -0x1000); // 32-bit SUB, encoding T3.
+ __ AddConstant(R0, R1, -0x1001); // MVN+ADD.
+ __ AddConstant(R0, R1, -0x1002); // MOVW+SUB.
+ __ AddConstant(R0, R1, -0xffff); // MOVW+SUB.
+ __ AddConstant(R0, R1, -0x10000); // 32-bit SUB, encoding T3.
+ __ AddConstant(R0, R1, -0x10001); // 32-bit SUB, encoding T3.
+ __ AddConstant(R0, R1, -0x10002); // MVN+ADD.
+ __ AddConstant(R0, R1, -0x10003); // MOVW+MOVT+ADD.
+
+ // Low registers, Rd == Rn.
+ __ AddConstant(R0, R0, 0); // Nothing.
+ __ AddConstant(R1, R1, 1); // 16-bit ADDS, encoding T2,
+ __ AddConstant(R0, R0, 7); // 16-bit ADDS, encoding T2.
+ __ AddConstant(R1, R1, 8); // 16-bit ADDS, encoding T2.
+ __ AddConstant(R0, R0, 255); // 16-bit ADDS, encoding T2.
+ __ AddConstant(R1, R1, 256); // 32-bit ADD, encoding T3.
+ __ AddConstant(R0, R0, 257); // 32-bit ADD, encoding T4.
+ __ AddConstant(R1, R1, 0xfff); // 32-bit ADD, encoding T4.
+ __ AddConstant(R0, R0, 0x1000); // 32-bit ADD, encoding T3.
+ __ AddConstant(R1, R1, 0x1001); // MVN+SUB.
+ __ AddConstant(R0, R0, 0x1002); // MOVW+ADD.
+ __ AddConstant(R1, R1, 0xffff); // MOVW+ADD.
+ __ AddConstant(R0, R0, 0x10000); // 32-bit ADD, encoding T3.
+ __ AddConstant(R1, R1, 0x10001); // 32-bit ADD, encoding T3.
+ __ AddConstant(R0, R0, 0x10002); // MVN+SUB.
+ __ AddConstant(R1, R1, 0x10003); // MOVW+MOVT+ADD.
+ __ AddConstant(R0, R0, -1); // 16-bit SUBS, encoding T2.
+ __ AddConstant(R1, R1, -7); // 16-bit SUBS, encoding T2.
+ __ AddConstant(R0, R0, -8); // 16-bit SUBS, encoding T2.
+ __ AddConstant(R1, R1, -255); // 16-bit SUBS, encoding T2.
+ __ AddConstant(R0, R0, -256); // 32-bit SUB, encoding T3.
+ __ AddConstant(R1, R1, -257); // 32-bit SUB, encoding T4.
+ __ AddConstant(R0, R0, -0xfff); // 32-bit SUB, encoding T4.
+ __ AddConstant(R1, R1, -0x1000); // 32-bit SUB, encoding T3.
+ __ AddConstant(R0, R0, -0x1001); // MVN+ADD.
+ __ AddConstant(R1, R1, -0x1002); // MOVW+SUB.
+ __ AddConstant(R0, R0, -0xffff); // MOVW+SUB.
+ __ AddConstant(R1, R1, -0x10000); // 32-bit SUB, encoding T3.
+ __ AddConstant(R0, R0, -0x10001); // 32-bit SUB, encoding T3.
+ __ AddConstant(R1, R1, -0x10002); // MVN+ADD.
+ __ AddConstant(R0, R0, -0x10003); // MOVW+MOVT+ADD.
+
+ // High registers.
+ __ AddConstant(R8, R8, 0); // Nothing.
+ __ AddConstant(R8, R1, 1); // 32-bit ADD, encoding T3,
+ __ AddConstant(R0, R8, 7); // 32-bit ADD, encoding T3.
+ __ AddConstant(R8, R8, 8); // 32-bit ADD, encoding T3.
+ __ AddConstant(R8, R1, 255); // 32-bit ADD, encoding T3.
+ __ AddConstant(R0, R8, 256); // 32-bit ADD, encoding T3.
+ __ AddConstant(R8, R8, 257); // 32-bit ADD, encoding T4.
+ __ AddConstant(R8, R1, 0xfff); // 32-bit ADD, encoding T4.
+ __ AddConstant(R0, R8, 0x1000); // 32-bit ADD, encoding T3.
+ __ AddConstant(R8, R8, 0x1001); // MVN+SUB.
+ __ AddConstant(R0, R1, 0x1002); // MOVW+ADD.
+ __ AddConstant(R0, R8, 0xffff); // MOVW+ADD.
+ __ AddConstant(R8, R8, 0x10000); // 32-bit ADD, encoding T3.
+ __ AddConstant(R8, R1, 0x10001); // 32-bit ADD, encoding T3.
+ __ AddConstant(R0, R8, 0x10002); // MVN+SUB.
+ __ AddConstant(R0, R8, 0x10003); // MOVW+MOVT+ADD.
+ __ AddConstant(R8, R8, -1); // 32-bit ADD, encoding T3.
+ __ AddConstant(R8, R1, -7); // 32-bit SUB, encoding T3.
+ __ AddConstant(R0, R8, -8); // 32-bit SUB, encoding T3.
+ __ AddConstant(R8, R8, -255); // 32-bit SUB, encoding T3.
+ __ AddConstant(R8, R1, -256); // 32-bit SUB, encoding T3.
+ __ AddConstant(R0, R8, -257); // 32-bit SUB, encoding T4.
+ __ AddConstant(R8, R8, -0xfff); // 32-bit SUB, encoding T4.
+ __ AddConstant(R8, R1, -0x1000); // 32-bit SUB, encoding T3.
+ __ AddConstant(R0, R8, -0x1001); // MVN+ADD.
+ __ AddConstant(R0, R1, -0x1002); // MOVW+SUB.
+ __ AddConstant(R8, R1, -0xffff); // MOVW+SUB.
+ __ AddConstant(R0, R8, -0x10000); // 32-bit SUB, encoding T3.
+ __ AddConstant(R8, R8, -0x10001); // 32-bit SUB, encoding T3.
+ __ AddConstant(R8, R1, -0x10002); // MVN+SUB.
+ __ AddConstant(R0, R8, -0x10003); // MOVW+MOVT+ADD.
+
+ // Low registers, Rd != Rn, kCcKeep.
+ __ AddConstant(R0, R1, 0, AL, kCcKeep); // MOV.
+ __ AddConstant(R0, R1, 1, AL, kCcKeep); // 32-bit ADD, encoding T3.
+ __ AddConstant(R0, R1, 7, AL, kCcKeep); // 32-bit ADD, encoding T3.
+ __ AddConstant(R0, R1, 8, AL, kCcKeep); // 32-bit ADD, encoding T3.
+ __ AddConstant(R0, R1, 255, AL, kCcKeep); // 32-bit ADD, encoding T3.
+ __ AddConstant(R0, R1, 256, AL, kCcKeep); // 32-bit ADD, encoding T3.
+ __ AddConstant(R0, R1, 257, AL, kCcKeep); // 32-bit ADD, encoding T4.
+ __ AddConstant(R0, R1, 0xfff, AL, kCcKeep); // 32-bit ADD, encoding T4.
+ __ AddConstant(R0, R1, 0x1000, AL, kCcKeep); // 32-bit ADD, encoding T3.
+ __ AddConstant(R0, R1, 0x1001, AL, kCcKeep); // MVN+SUB.
+ __ AddConstant(R0, R1, 0x1002, AL, kCcKeep); // MOVW+ADD.
+ __ AddConstant(R0, R1, 0xffff, AL, kCcKeep); // MOVW+ADD.
+ __ AddConstant(R0, R1, 0x10000, AL, kCcKeep); // 32-bit ADD, encoding T3.
+ __ AddConstant(R0, R1, 0x10001, AL, kCcKeep); // 32-bit ADD, encoding T3.
+ __ AddConstant(R0, R1, 0x10002, AL, kCcKeep); // MVN+SUB.
+ __ AddConstant(R0, R1, 0x10003, AL, kCcKeep); // MOVW+MOVT+ADD.
+ __ AddConstant(R0, R1, -1, AL, kCcKeep); // 32-bit ADD, encoding T3.
+ __ AddConstant(R0, R1, -7, AL, kCcKeep); // 32-bit SUB, encoding T3.
+ __ AddConstant(R0, R1, -8, AL, kCcKeep); // 32-bit SUB, encoding T3.
+ __ AddConstant(R0, R1, -255, AL, kCcKeep); // 32-bit SUB, encoding T3.
+ __ AddConstant(R0, R1, -256, AL, kCcKeep); // 32-bit SUB, encoding T3.
+ __ AddConstant(R0, R1, -257, AL, kCcKeep); // 32-bit SUB, encoding T4.
+ __ AddConstant(R0, R1, -0xfff, AL, kCcKeep); // 32-bit SUB, encoding T4.
+ __ AddConstant(R0, R1, -0x1000, AL, kCcKeep); // 32-bit SUB, encoding T3.
+ __ AddConstant(R0, R1, -0x1001, AL, kCcKeep); // MVN+ADD.
+ __ AddConstant(R0, R1, -0x1002, AL, kCcKeep); // MOVW+SUB.
+ __ AddConstant(R0, R1, -0xffff, AL, kCcKeep); // MOVW+SUB.
+ __ AddConstant(R0, R1, -0x10000, AL, kCcKeep); // 32-bit SUB, encoding T3.
+ __ AddConstant(R0, R1, -0x10001, AL, kCcKeep); // 32-bit SUB, encoding T3.
+ __ AddConstant(R0, R1, -0x10002, AL, kCcKeep); // MVN+ADD.
+ __ AddConstant(R0, R1, -0x10003, AL, kCcKeep); // MOVW+MOVT+ADD.
+
+ // Low registers, Rd == Rn, kCcKeep.
+ __ AddConstant(R0, R0, 0, AL, kCcKeep); // Nothing.
+ __ AddConstant(R1, R1, 1, AL, kCcKeep); // 32-bit ADD, encoding T3.
+ __ AddConstant(R0, R0, 7, AL, kCcKeep); // 32-bit ADD, encoding T3.
+ __ AddConstant(R1, R1, 8, AL, kCcKeep); // 32-bit ADD, encoding T3.
+ __ AddConstant(R0, R0, 255, AL, kCcKeep); // 32-bit ADD, encoding T3.
+ __ AddConstant(R1, R1, 256, AL, kCcKeep); // 32-bit ADD, encoding T3.
+ __ AddConstant(R0, R0, 257, AL, kCcKeep); // 32-bit ADD, encoding T4.
+ __ AddConstant(R1, R1, 0xfff, AL, kCcKeep); // 32-bit ADD, encoding T4.
+ __ AddConstant(R0, R0, 0x1000, AL, kCcKeep); // 32-bit ADD, encoding T3.
+ __ AddConstant(R1, R1, 0x1001, AL, kCcKeep); // MVN+SUB.
+ __ AddConstant(R0, R0, 0x1002, AL, kCcKeep); // MOVW+ADD.
+ __ AddConstant(R1, R1, 0xffff, AL, kCcKeep); // MOVW+ADD.
+ __ AddConstant(R0, R0, 0x10000, AL, kCcKeep); // 32-bit ADD, encoding T3.
+ __ AddConstant(R1, R1, 0x10001, AL, kCcKeep); // 32-bit ADD, encoding T3.
+ __ AddConstant(R0, R0, 0x10002, AL, kCcKeep); // MVN+SUB.
+ __ AddConstant(R1, R1, 0x10003, AL, kCcKeep); // MOVW+MOVT+ADD.
+ __ AddConstant(R0, R0, -1, AL, kCcKeep); // 32-bit ADD, encoding T3.
+ __ AddConstant(R1, R1, -7, AL, kCcKeep); // 32-bit SUB, encoding T3.
+ __ AddConstant(R0, R0, -8, AL, kCcKeep); // 32-bit SUB, encoding T3.
+ __ AddConstant(R1, R1, -255, AL, kCcKeep); // 32-bit SUB, encoding T3.
+ __ AddConstant(R0, R0, -256, AL, kCcKeep); // 32-bit SUB, encoding T3.
+ __ AddConstant(R1, R1, -257, AL, kCcKeep); // 32-bit SUB, encoding T4.
+ __ AddConstant(R0, R0, -0xfff, AL, kCcKeep); // 32-bit SUB, encoding T4.
+ __ AddConstant(R1, R1, -0x1000, AL, kCcKeep); // 32-bit SUB, encoding T3.
+ __ AddConstant(R0, R0, -0x1001, AL, kCcKeep); // MVN+ADD.
+ __ AddConstant(R1, R1, -0x1002, AL, kCcKeep); // MOVW+SUB.
+ __ AddConstant(R0, R0, -0xffff, AL, kCcKeep); // MOVW+SUB.
+ __ AddConstant(R1, R1, -0x10000, AL, kCcKeep); // 32-bit SUB, encoding T3.
+ __ AddConstant(R0, R0, -0x10001, AL, kCcKeep); // 32-bit SUB, encoding T3.
+ __ AddConstant(R1, R1, -0x10002, AL, kCcKeep); // MVN+ADD.
+ __ AddConstant(R0, R0, -0x10003, AL, kCcKeep); // MOVW+MOVT+ADD.
+
+ // Low registers, Rd != Rn, kCcSet.
+ __ AddConstant(R0, R1, 0, AL, kCcSet); // 16-bit ADDS.
+ __ AddConstant(R0, R1, 1, AL, kCcSet); // 16-bit ADDS.
+ __ AddConstant(R0, R1, 7, AL, kCcSet); // 16-bit ADDS.
+ __ AddConstant(R0, R1, 8, AL, kCcSet); // 32-bit ADDS, encoding T3.
+ __ AddConstant(R0, R1, 255, AL, kCcSet); // 32-bit ADDS, encoding T3.
+ __ AddConstant(R0, R1, 256, AL, kCcSet); // 32-bit ADDS, encoding T3.
+ __ AddConstant(R0, R1, 257, AL, kCcSet); // MVN+SUBS.
+ __ AddConstant(R0, R1, 0xfff, AL, kCcSet); // MOVW+ADDS.
+ __ AddConstant(R0, R1, 0x1000, AL, kCcSet); // 32-bit ADDS, encoding T3.
+ __ AddConstant(R0, R1, 0x1001, AL, kCcSet); // MVN+SUBS.
+ __ AddConstant(R0, R1, 0x1002, AL, kCcSet); // MOVW+ADDS.
+ __ AddConstant(R0, R1, 0xffff, AL, kCcSet); // MOVW+ADDS.
+ __ AddConstant(R0, R1, 0x10000, AL, kCcSet); // 32-bit ADDS, encoding T3.
+ __ AddConstant(R0, R1, 0x10001, AL, kCcSet); // 32-bit ADDS, encoding T3.
+ __ AddConstant(R0, R1, 0x10002, AL, kCcSet); // MVN+SUBS.
+ __ AddConstant(R0, R1, 0x10003, AL, kCcSet); // MOVW+MOVT+ADDS.
+ __ AddConstant(R0, R1, -1, AL, kCcSet); // 16-bit SUBS.
+ __ AddConstant(R0, R1, -7, AL, kCcSet); // 16-bit SUBS.
+ __ AddConstant(R0, R1, -8, AL, kCcSet); // 32-bit SUBS, encoding T3.
+ __ AddConstant(R0, R1, -255, AL, kCcSet); // 32-bit SUBS, encoding T3.
+ __ AddConstant(R0, R1, -256, AL, kCcSet); // 32-bit SUBS, encoding T3.
+ __ AddConstant(R0, R1, -257, AL, kCcSet); // MVN+ADDS.
+ __ AddConstant(R0, R1, -0xfff, AL, kCcSet); // MOVW+SUBS.
+ __ AddConstant(R0, R1, -0x1000, AL, kCcSet); // 32-bit SUBS, encoding T3.
+ __ AddConstant(R0, R1, -0x1001, AL, kCcSet); // MVN+ADDS.
+ __ AddConstant(R0, R1, -0x1002, AL, kCcSet); // MOVW+SUBS.
+ __ AddConstant(R0, R1, -0xffff, AL, kCcSet); // MOVW+SUBS.
+ __ AddConstant(R0, R1, -0x10000, AL, kCcSet); // 32-bit SUBS, encoding T3.
+ __ AddConstant(R0, R1, -0x10001, AL, kCcSet); // 32-bit SUBS, encoding T3.
+ __ AddConstant(R0, R1, -0x10002, AL, kCcSet); // MVN+ADDS.
+ __ AddConstant(R0, R1, -0x10003, AL, kCcSet); // MOVW+MOVT+ADDS.
+
+ // Low registers, Rd == Rn, kCcSet.
+ __ AddConstant(R0, R0, 0, AL, kCcSet); // 16-bit ADDS, encoding T2.
+ __ AddConstant(R1, R1, 1, AL, kCcSet); // 16-bit ADDS, encoding T2.
+ __ AddConstant(R0, R0, 7, AL, kCcSet); // 16-bit ADDS, encoding T2.
+ __ AddConstant(R1, R1, 8, AL, kCcSet); // 16-bit ADDS, encoding T2.
+ __ AddConstant(R0, R0, 255, AL, kCcSet); // 16-bit ADDS, encoding T2.
+ __ AddConstant(R1, R1, 256, AL, kCcSet); // 32-bit ADDS, encoding T3.
+ __ AddConstant(R0, R0, 257, AL, kCcSet); // MVN+SUBS.
+ __ AddConstant(R1, R1, 0xfff, AL, kCcSet); // MOVW+ADDS.
+ __ AddConstant(R0, R0, 0x1000, AL, kCcSet); // 32-bit ADDS, encoding T3.
+ __ AddConstant(R1, R1, 0x1001, AL, kCcSet); // MVN+SUBS.
+ __ AddConstant(R0, R0, 0x1002, AL, kCcSet); // MOVW+ADDS.
+ __ AddConstant(R1, R1, 0xffff, AL, kCcSet); // MOVW+ADDS.
+ __ AddConstant(R0, R0, 0x10000, AL, kCcSet); // 32-bit ADDS, encoding T3.
+ __ AddConstant(R1, R1, 0x10001, AL, kCcSet); // 32-bit ADDS, encoding T3.
+ __ AddConstant(R0, R0, 0x10002, AL, kCcSet); // MVN+SUBS.
+ __ AddConstant(R1, R1, 0x10003, AL, kCcSet); // MOVW+MOVT+ADDS.
+ __ AddConstant(R0, R0, -1, AL, kCcSet); // 16-bit SUBS, encoding T2.
+ __ AddConstant(R1, R1, -7, AL, kCcSet); // 16-bit SUBS, encoding T2.
+ __ AddConstant(R0, R0, -8, AL, kCcSet); // 16-bit SUBS, encoding T2.
+ __ AddConstant(R1, R1, -255, AL, kCcSet); // 16-bit SUBS, encoding T2.
+ __ AddConstant(R0, R0, -256, AL, kCcSet); // 32-bit SUB, encoding T3.
+ __ AddConstant(R1, R1, -257, AL, kCcSet); // MNV+ADDS.
+ __ AddConstant(R0, R0, -0xfff, AL, kCcSet); // MOVW+SUBS.
+ __ AddConstant(R1, R1, -0x1000, AL, kCcSet); // 32-bit SUB, encoding T3.
+ __ AddConstant(R0, R0, -0x1001, AL, kCcSet); // MVN+ADDS.
+ __ AddConstant(R1, R1, -0x1002, AL, kCcSet); // MOVW+SUBS.
+ __ AddConstant(R0, R0, -0xffff, AL, kCcSet); // MOVW+SUBS.
+ __ AddConstant(R1, R1, -0x10000, AL, kCcSet); // 32-bit SUBS, encoding T3.
+ __ AddConstant(R0, R0, -0x10001, AL, kCcSet); // 32-bit SUBS, encoding T3.
+ __ AddConstant(R1, R1, -0x10002, AL, kCcSet); // MVN+ADDS.
+ __ AddConstant(R0, R0, -0x10003, AL, kCcSet); // MOVW+MOVT+ADDS.
+
+ __ it(EQ);
+ __ AddConstant(R0, R1, 1, EQ, kCcSet); // 32-bit ADDS, encoding T3.
+ __ it(NE);
+ __ AddConstant(R0, R1, 1, NE, kCcKeep); // 16-bit ADDS, encoding T1.
+ __ it(GE);
+ __ AddConstant(R0, R0, 1, GE, kCcSet); // 32-bit ADDS, encoding T3.
+ __ it(LE);
+ __ AddConstant(R0, R0, 1, LE, kCcKeep); // 16-bit ADDS, encoding T2.
+
+ EmitAndCheck(&assembler, "AddConstant");
+}
+
#undef __
} // namespace arm
} // namespace art
diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc
index 886295e9f9..f07f8c74d7 100644
--- a/compiler/utils/assembler_thumb_test_expected.cc.inc
+++ b/compiler/utils/assembler_thumb_test_expected.cc.inc
@@ -5052,6 +5052,324 @@ const char* CompareAndBranchResults[] = {
nullptr
};
+const char* AddConstantResults[] = {
+ " 0: 4608 mov r0, r1\n",
+ " 2: 1c48 adds r0, r1, #1\n",
+ " 4: 1dc8 adds r0, r1, #7\n",
+ " 6: f101 0008 add.w r0, r1, #8\n",
+ " a: f101 00ff add.w r0, r1, #255 ; 0xff\n",
+ " e: f501 7080 add.w r0, r1, #256 ; 0x100\n",
+ " 12: f201 1001 addw r0, r1, #257 ; 0x101\n",
+ " 16: f601 70ff addw r0, r1, #4095 ; 0xfff\n",
+ " 1a: f501 5080 add.w r0, r1, #4096 ; 0x1000\n",
+ " 1e: f46f 5080 mvn.w r0, #4096 ; 0x1000\n",
+ " 22: 1a08 subs r0, r1, r0\n",
+ " 24: f241 0002 movw r0, #4098 ; 0x1002\n",
+ " 28: 1808 adds r0, r1, r0\n",
+ " 2a: f64f 70ff movw r0, #65535 ; 0xffff\n",
+ " 2e: 1808 adds r0, r1, r0\n",
+ " 30: f501 3080 add.w r0, r1, #65536 ; 0x10000\n",
+ " 34: f101 1001 add.w r0, r1, #65537 ; 0x10001\n",
+ " 38: f06f 1001 mvn.w r0, #65537 ; 0x10001\n",
+ " 3c: 1a08 subs r0, r1, r0\n",
+ " 3e: f240 0003 movw r0, #3\n",
+ " 42: f2c0 0001 movt r0, #1\n",
+ " 46: 1808 adds r0, r1, r0\n",
+ " 48: 1e48 subs r0, r1, #1\n",
+ " 4a: 1fc8 subs r0, r1, #7\n",
+ " 4c: f1a1 0008 sub.w r0, r1, #8\n",
+ " 50: f1a1 00ff sub.w r0, r1, #255 ; 0xff\n",
+ " 54: f5a1 7080 sub.w r0, r1, #256 ; 0x100\n",
+ " 58: f2a1 1001 subw r0, r1, #257 ; 0x101\n",
+ " 5c: f6a1 70ff subw r0, r1, #4095 ; 0xfff\n",
+ " 60: f5a1 5080 sub.w r0, r1, #4096 ; 0x1000\n",
+ " 64: f46f 5080 mvn.w r0, #4096 ; 0x1000\n",
+ " 68: 1808 adds r0, r1, r0\n",
+ " 6a: f241 0002 movw r0, #4098 ; 0x1002\n",
+ " 6e: 1a08 subs r0, r1, r0\n",
+ " 70: f64f 70ff movw r0, #65535 ; 0xffff\n",
+ " 74: 1a08 subs r0, r1, r0\n",
+ " 76: f5a1 3080 sub.w r0, r1, #65536 ; 0x10000\n",
+ " 7a: f1a1 1001 sub.w r0, r1, #65537 ; 0x10001\n",
+ " 7e: f06f 1001 mvn.w r0, #65537 ; 0x10001\n",
+ " 82: 1808 adds r0, r1, r0\n",
+ " 84: f64f 70fd movw r0, #65533 ; 0xfffd\n",
+ " 88: f6cf 70fe movt r0, #65534 ; 0xfffe\n",
+ " 8c: 1808 adds r0, r1, r0\n",
+ " 8e: 3101 adds r1, #1\n",
+ " 90: 3007 adds r0, #7\n",
+ " 92: 3108 adds r1, #8\n",
+ " 94: 30ff adds r0, #255 ; 0xff\n",
+ " 96: f501 7180 add.w r1, r1, #256 ; 0x100\n",
+ " 9a: f200 1001 addw r0, r0, #257 ; 0x101\n",
+ " 9e: f601 71ff addw r1, r1, #4095 ; 0xfff\n",
+ " a2: f500 5080 add.w r0, r0, #4096 ; 0x1000\n",
+ " a6: f46f 5c80 mvn.w ip, #4096 ; 0x1000\n",
+ " aa: eba1 010c sub.w r1, r1, ip\n",
+ " ae: f241 0c02 movw ip, #4098 ; 0x1002\n",
+ " b2: 4460 add r0, ip\n",
+ " b4: f64f 7cff movw ip, #65535 ; 0xffff\n",
+ " b8: 4461 add r1, ip\n",
+ " ba: f500 3080 add.w r0, r0, #65536 ; 0x10000\n",
+ " be: f101 1101 add.w r1, r1, #65537 ; 0x10001\n",
+ " c2: f06f 1c01 mvn.w ip, #65537 ; 0x10001\n",
+ " c6: eba0 000c sub.w r0, r0, ip\n",
+ " ca: f240 0c03 movw ip, #3\n",
+ " ce: f2c0 0c01 movt ip, #1\n",
+ " d2: 4461 add r1, ip\n",
+ " d4: 3801 subs r0, #1\n",
+ " d6: 3907 subs r1, #7\n",
+ " d8: 3808 subs r0, #8\n",
+ " da: 39ff subs r1, #255 ; 0xff\n",
+ " dc: f5a0 7080 sub.w r0, r0, #256 ; 0x100\n",
+ " e0: f2a1 1101 subw r1, r1, #257 ; 0x101\n",
+ " e4: f6a0 70ff subw r0, r0, #4095 ; 0xfff\n",
+ " e8: f5a1 5180 sub.w r1, r1, #4096 ; 0x1000\n",
+ " ec: f46f 5c80 mvn.w ip, #4096 ; 0x1000\n",
+ " f0: 4460 add r0, ip\n",
+ " f2: f241 0c02 movw ip, #4098 ; 0x1002\n",
+ " f6: eba1 010c sub.w r1, r1, ip\n",
+ " fa: f64f 7cff movw ip, #65535 ; 0xffff\n",
+ " fe: eba0 000c sub.w r0, r0, ip\n",
+ " 102: f5a1 3180 sub.w r1, r1, #65536 ; 0x10000\n",
+ " 106: f1a0 1001 sub.w r0, r0, #65537 ; 0x10001\n",
+ " 10a: f06f 1c01 mvn.w ip, #65537 ; 0x10001\n",
+ " 10e: 4461 add r1, ip\n",
+ " 110: f64f 7cfd movw ip, #65533 ; 0xfffd\n",
+ " 114: f6cf 7cfe movt ip, #65534 ; 0xfffe\n",
+ " 118: 4460 add r0, ip\n",
+ " 11a: f101 0801 add.w r8, r1, #1\n",
+ " 11e: f108 0007 add.w r0, r8, #7\n",
+ " 122: f108 0808 add.w r8, r8, #8\n",
+ " 126: f101 08ff add.w r8, r1, #255 ; 0xff\n",
+ " 12a: f508 7080 add.w r0, r8, #256 ; 0x100\n",
+ " 12e: f208 1801 addw r8, r8, #257 ; 0x101\n",
+ " 132: f601 78ff addw r8, r1, #4095 ; 0xfff\n",
+ " 136: f508 5080 add.w r0, r8, #4096 ; 0x1000\n",
+ " 13a: f46f 5c80 mvn.w ip, #4096 ; 0x1000\n",
+ " 13e: eba8 080c sub.w r8, r8, ip\n",
+ " 142: f241 0002 movw r0, #4098 ; 0x1002\n",
+ " 146: 1808 adds r0, r1, r0\n",
+ " 148: f64f 70ff movw r0, #65535 ; 0xffff\n",
+ " 14c: eb08 0000 add.w r0, r8, r0\n",
+ " 150: f508 3880 add.w r8, r8, #65536 ; 0x10000\n",
+ " 154: f101 1801 add.w r8, r1, #65537 ; 0x10001\n",
+ " 158: f06f 1001 mvn.w r0, #65537 ; 0x10001\n",
+ " 15c: eba8 0000 sub.w r0, r8, r0\n",
+ " 160: f240 0003 movw r0, #3\n",
+ " 164: f2c0 0001 movt r0, #1\n",
+ " 168: eb08 0000 add.w r0, r8, r0\n",
+ " 16c: f108 38ff add.w r8, r8, #4294967295 ; 0xffffffff\n",
+ " 170: f1a1 0807 sub.w r8, r1, #7\n",
+ " 174: f1a8 0008 sub.w r0, r8, #8\n",
+ " 178: f1a8 08ff sub.w r8, r8, #255 ; 0xff\n",
+ " 17c: f5a1 7880 sub.w r8, r1, #256 ; 0x100\n",
+ " 180: f2a8 1001 subw r0, r8, #257 ; 0x101\n",
+ " 184: f6a8 78ff subw r8, r8, #4095 ; 0xfff\n",
+ " 188: f5a1 5880 sub.w r8, r1, #4096 ; 0x1000\n",
+ " 18c: f46f 5080 mvn.w r0, #4096 ; 0x1000\n",
+ " 190: eb08 0000 add.w r0, r8, r0\n",
+ " 194: f241 0002 movw r0, #4098 ; 0x1002\n",
+ " 198: 1a08 subs r0, r1, r0\n",
+ " 19a: f64f 78ff movw r8, #65535 ; 0xffff\n",
+ " 19e: eba1 0808 sub.w r8, r1, r8\n",
+ " 1a2: f5a8 3080 sub.w r0, r8, #65536 ; 0x10000\n",
+ " 1a6: f1a8 1801 sub.w r8, r8, #65537 ; 0x10001\n",
+ " 1aa: f06f 1801 mvn.w r8, #65537 ; 0x10001\n",
+ " 1ae: eb01 0808 add.w r8, r1, r8\n",
+ " 1b2: f64f 70fd movw r0, #65533 ; 0xfffd\n",
+ " 1b6: f6cf 70fe movt r0, #65534 ; 0xfffe\n",
+ " 1ba: eb08 0000 add.w r0, r8, r0\n",
+ " 1be: 4608 mov r0, r1\n",
+ " 1c0: f101 0001 add.w r0, r1, #1\n",
+ " 1c4: f101 0007 add.w r0, r1, #7\n",
+ " 1c8: f101 0008 add.w r0, r1, #8\n",
+ " 1cc: f101 00ff add.w r0, r1, #255 ; 0xff\n",
+ " 1d0: f501 7080 add.w r0, r1, #256 ; 0x100\n",
+ " 1d4: f201 1001 addw r0, r1, #257 ; 0x101\n",
+ " 1d8: f601 70ff addw r0, r1, #4095 ; 0xfff\n",
+ " 1dc: f501 5080 add.w r0, r1, #4096 ; 0x1000\n",
+ " 1e0: f46f 5080 mvn.w r0, #4096 ; 0x1000\n",
+ " 1e4: eba1 0000 sub.w r0, r1, r0\n",
+ " 1e8: f241 0002 movw r0, #4098 ; 0x1002\n",
+ " 1ec: eb01 0000 add.w r0, r1, r0\n",
+ " 1f0: f64f 70ff movw r0, #65535 ; 0xffff\n",
+ " 1f4: eb01 0000 add.w r0, r1, r0\n",
+ " 1f8: f501 3080 add.w r0, r1, #65536 ; 0x10000\n",
+ " 1fc: f101 1001 add.w r0, r1, #65537 ; 0x10001\n",
+ " 200: f06f 1001 mvn.w r0, #65537 ; 0x10001\n",
+ " 204: eba1 0000 sub.w r0, r1, r0\n",
+ " 208: f240 0003 movw r0, #3\n",
+ " 20c: f2c0 0001 movt r0, #1\n",
+ " 210: eb01 0000 add.w r0, r1, r0\n",
+ " 214: f101 30ff add.w r0, r1, #4294967295 ; 0xffffffff\n",
+ " 218: f1a1 0007 sub.w r0, r1, #7\n",
+ " 21c: f1a1 0008 sub.w r0, r1, #8\n",
+ " 220: f1a1 00ff sub.w r0, r1, #255 ; 0xff\n",
+ " 224: f5a1 7080 sub.w r0, r1, #256 ; 0x100\n",
+ " 228: f2a1 1001 subw r0, r1, #257 ; 0x101\n",
+ " 22c: f6a1 70ff subw r0, r1, #4095 ; 0xfff\n",
+ " 230: f5a1 5080 sub.w r0, r1, #4096 ; 0x1000\n",
+ " 234: f46f 5080 mvn.w r0, #4096 ; 0x1000\n",
+ " 238: eb01 0000 add.w r0, r1, r0\n",
+ " 23c: f241 0002 movw r0, #4098 ; 0x1002\n",
+ " 240: eba1 0000 sub.w r0, r1, r0\n",
+ " 244: f64f 70ff movw r0, #65535 ; 0xffff\n",
+ " 248: eba1 0000 sub.w r0, r1, r0\n",
+ " 24c: f5a1 3080 sub.w r0, r1, #65536 ; 0x10000\n",
+ " 250: f1a1 1001 sub.w r0, r1, #65537 ; 0x10001\n",
+ " 254: f06f 1001 mvn.w r0, #65537 ; 0x10001\n",
+ " 258: eb01 0000 add.w r0, r1, r0\n",
+ " 25c: f64f 70fd movw r0, #65533 ; 0xfffd\n",
+ " 260: f6cf 70fe movt r0, #65534 ; 0xfffe\n",
+ " 264: eb01 0000 add.w r0, r1, r0\n",
+ " 268: f101 0101 add.w r1, r1, #1\n",
+ " 26c: f100 0007 add.w r0, r0, #7\n",
+ " 270: f101 0108 add.w r1, r1, #8\n",
+ " 274: f100 00ff add.w r0, r0, #255 ; 0xff\n",
+ " 278: f501 7180 add.w r1, r1, #256 ; 0x100\n",
+ " 27c: f200 1001 addw r0, r0, #257 ; 0x101\n",
+ " 280: f601 71ff addw r1, r1, #4095 ; 0xfff\n",
+ " 284: f500 5080 add.w r0, r0, #4096 ; 0x1000\n",
+ " 288: f46f 5c80 mvn.w ip, #4096 ; 0x1000\n",
+ " 28c: eba1 010c sub.w r1, r1, ip\n",
+ " 290: f241 0c02 movw ip, #4098 ; 0x1002\n",
+ " 294: 4460 add r0, ip\n",
+ " 296: f64f 7cff movw ip, #65535 ; 0xffff\n",
+ " 29a: 4461 add r1, ip\n",
+ " 29c: f500 3080 add.w r0, r0, #65536 ; 0x10000\n",
+ " 2a0: f101 1101 add.w r1, r1, #65537 ; 0x10001\n",
+ " 2a4: f06f 1c01 mvn.w ip, #65537 ; 0x10001\n",
+ " 2a8: eba0 000c sub.w r0, r0, ip\n",
+ " 2ac: f240 0c03 movw ip, #3\n",
+ " 2b0: f2c0 0c01 movt ip, #1\n",
+ " 2b4: 4461 add r1, ip\n",
+ " 2b6: f100 30ff add.w r0, r0, #4294967295 ; 0xffffffff\n",
+ " 2ba: f1a1 0107 sub.w r1, r1, #7\n",
+ " 2be: f1a0 0008 sub.w r0, r0, #8\n",
+ " 2c2: f1a1 01ff sub.w r1, r1, #255 ; 0xff\n",
+ " 2c6: f5a0 7080 sub.w r0, r0, #256 ; 0x100\n",
+ " 2ca: f2a1 1101 subw r1, r1, #257 ; 0x101\n",
+ " 2ce: f6a0 70ff subw r0, r0, #4095 ; 0xfff\n",
+ " 2d2: f5a1 5180 sub.w r1, r1, #4096 ; 0x1000\n",
+ " 2d6: f46f 5c80 mvn.w ip, #4096 ; 0x1000\n",
+ " 2da: 4460 add r0, ip\n",
+ " 2dc: f241 0c02 movw ip, #4098 ; 0x1002\n",
+ " 2e0: eba1 010c sub.w r1, r1, ip\n",
+ " 2e4: f64f 7cff movw ip, #65535 ; 0xffff\n",
+ " 2e8: eba0 000c sub.w r0, r0, ip\n",
+ " 2ec: f5a1 3180 sub.w r1, r1, #65536 ; 0x10000\n",
+ " 2f0: f1a0 1001 sub.w r0, r0, #65537 ; 0x10001\n",
+ " 2f4: f06f 1c01 mvn.w ip, #65537 ; 0x10001\n",
+ " 2f8: 4461 add r1, ip\n",
+ " 2fa: f64f 7cfd movw ip, #65533 ; 0xfffd\n",
+ " 2fe: f6cf 7cfe movt ip, #65534 ; 0xfffe\n",
+ " 302: 4460 add r0, ip\n",
+ " 304: 1c08 adds r0, r1, #0\n",
+ " 306: 1c48 adds r0, r1, #1\n",
+ " 308: 1dc8 adds r0, r1, #7\n",
+ " 30a: f111 0008 adds.w r0, r1, #8\n",
+ " 30e: f111 00ff adds.w r0, r1, #255 ; 0xff\n",
+ " 312: f511 7080 adds.w r0, r1, #256 ; 0x100\n",
+ " 316: f46f 7080 mvn.w r0, #256 ; 0x100\n",
+ " 31a: 1a08 subs r0, r1, r0\n",
+ " 31c: f640 70ff movw r0, #4095 ; 0xfff\n",
+ " 320: 1808 adds r0, r1, r0\n",
+ " 322: f511 5080 adds.w r0, r1, #4096 ; 0x1000\n",
+ " 326: f46f 5080 mvn.w r0, #4096 ; 0x1000\n",
+ " 32a: 1a08 subs r0, r1, r0\n",
+ " 32c: f241 0002 movw r0, #4098 ; 0x1002\n",
+ " 330: 1808 adds r0, r1, r0\n",
+ " 332: f64f 70ff movw r0, #65535 ; 0xffff\n",
+ " 336: 1808 adds r0, r1, r0\n",
+ " 338: f511 3080 adds.w r0, r1, #65536 ; 0x10000\n",
+ " 33c: f111 1001 adds.w r0, r1, #65537 ; 0x10001\n",
+ " 340: f06f 1001 mvn.w r0, #65537 ; 0x10001\n",
+ " 344: 1a08 subs r0, r1, r0\n",
+ " 346: f240 0003 movw r0, #3\n",
+ " 34a: f2c0 0001 movt r0, #1\n",
+ " 34e: 1808 adds r0, r1, r0\n",
+ " 350: 1e48 subs r0, r1, #1\n",
+ " 352: 1fc8 subs r0, r1, #7\n",
+ " 354: f1b1 0008 subs.w r0, r1, #8\n",
+ " 358: f1b1 00ff subs.w r0, r1, #255 ; 0xff\n",
+ " 35c: f5b1 7080 subs.w r0, r1, #256 ; 0x100\n",
+ " 360: f46f 7080 mvn.w r0, #256 ; 0x100\n",
+ " 364: 1808 adds r0, r1, r0\n",
+ " 366: f640 70ff movw r0, #4095 ; 0xfff\n",
+ " 36a: 1a08 subs r0, r1, r0\n",
+ " 36c: f5b1 5080 subs.w r0, r1, #4096 ; 0x1000\n",
+ " 370: f46f 5080 mvn.w r0, #4096 ; 0x1000\n",
+ " 374: 1808 adds r0, r1, r0\n",
+ " 376: f241 0002 movw r0, #4098 ; 0x1002\n",
+ " 37a: 1a08 subs r0, r1, r0\n",
+ " 37c: f64f 70ff movw r0, #65535 ; 0xffff\n",
+ " 380: 1a08 subs r0, r1, r0\n",
+ " 382: f5b1 3080 subs.w r0, r1, #65536 ; 0x10000\n",
+ " 386: f1b1 1001 subs.w r0, r1, #65537 ; 0x10001\n",
+ " 38a: f06f 1001 mvn.w r0, #65537 ; 0x10001\n",
+ " 38e: 1808 adds r0, r1, r0\n",
+ " 390: f64f 70fd movw r0, #65533 ; 0xfffd\n",
+ " 394: f6cf 70fe movt r0, #65534 ; 0xfffe\n",
+ " 398: 1808 adds r0, r1, r0\n",
+ " 39a: 3000 adds r0, #0\n",
+ " 39c: 3101 adds r1, #1\n",
+ " 39e: 3007 adds r0, #7\n",
+ " 3a0: 3108 adds r1, #8\n",
+ " 3a2: 30ff adds r0, #255 ; 0xff\n",
+ " 3a4: f511 7180 adds.w r1, r1, #256 ; 0x100\n",
+ " 3a8: f46f 7c80 mvn.w ip, #256 ; 0x100\n",
+ " 3ac: ebb0 000c subs.w r0, r0, ip\n",
+ " 3b0: f640 7cff movw ip, #4095 ; 0xfff\n",
+ " 3b4: eb11 010c adds.w r1, r1, ip\n",
+ " 3b8: f510 5080 adds.w r0, r0, #4096 ; 0x1000\n",
+ " 3bc: f46f 5c80 mvn.w ip, #4096 ; 0x1000\n",
+ " 3c0: ebb1 010c subs.w r1, r1, ip\n",
+ " 3c4: f241 0c02 movw ip, #4098 ; 0x1002\n",
+ " 3c8: eb10 000c adds.w r0, r0, ip\n",
+ " 3cc: f64f 7cff movw ip, #65535 ; 0xffff\n",
+ " 3d0: eb11 010c adds.w r1, r1, ip\n",
+ " 3d4: f510 3080 adds.w r0, r0, #65536 ; 0x10000\n",
+ " 3d8: f111 1101 adds.w r1, r1, #65537 ; 0x10001\n",
+ " 3dc: f06f 1c01 mvn.w ip, #65537 ; 0x10001\n",
+ " 3e0: ebb0 000c subs.w r0, r0, ip\n",
+ " 3e4: f240 0c03 movw ip, #3\n",
+ " 3e8: f2c0 0c01 movt ip, #1\n",
+ " 3ec: eb11 010c adds.w r1, r1, ip\n",
+ " 3f0: 3801 subs r0, #1\n",
+ " 3f2: 3907 subs r1, #7\n",
+ " 3f4: 3808 subs r0, #8\n",
+ " 3f6: 39ff subs r1, #255 ; 0xff\n",
+ " 3f8: f5b0 7080 subs.w r0, r0, #256 ; 0x100\n",
+ " 3fc: f46f 7c80 mvn.w ip, #256 ; 0x100\n",
+ " 400: eb11 010c adds.w r1, r1, ip\n",
+ " 404: f640 7cff movw ip, #4095 ; 0xfff\n",
+ " 408: ebb0 000c subs.w r0, r0, ip\n",
+ " 40c: f5b1 5180 subs.w r1, r1, #4096 ; 0x1000\n",
+ " 410: f46f 5c80 mvn.w ip, #4096 ; 0x1000\n",
+ " 414: eb10 000c adds.w r0, r0, ip\n",
+ " 418: f241 0c02 movw ip, #4098 ; 0x1002\n",
+ " 41c: ebb1 010c subs.w r1, r1, ip\n",
+ " 420: f64f 7cff movw ip, #65535 ; 0xffff\n",
+ " 424: ebb0 000c subs.w r0, r0, ip\n",
+ " 428: f5b1 3180 subs.w r1, r1, #65536 ; 0x10000\n",
+ " 42c: f1b0 1001 subs.w r0, r0, #65537 ; 0x10001\n",
+ " 430: f06f 1c01 mvn.w ip, #65537 ; 0x10001\n",
+ " 434: eb11 010c adds.w r1, r1, ip\n",
+ " 438: f64f 7cfd movw ip, #65533 ; 0xfffd\n",
+ " 43c: f6cf 7cfe movt ip, #65534 ; 0xfffe\n",
+ " 440: eb10 000c adds.w r0, r0, ip\n",
+ " 444: bf08 it eq\n",
+ " 446: f111 0001 addseq.w r0, r1, #1\n",
+ " 44a: bf18 it ne\n",
+ " 44c: 1c48 addne r0, r1, #1\n",
+ " 44e: bfa8 it ge\n",
+ " 450: f110 0001 addsge.w r0, r0, #1\n",
+ " 454: bfd8 it le\n",
+ " 456: 3001 addle r0, #1\n",
+ nullptr
+};
+
std::map<std::string, const char* const*> test_results;
void setup_results() {
test_results["SimpleMov"] = SimpleMovResults;
@@ -5102,4 +5420,5 @@ void setup_results() {
test_results["LoadStoreLiteral"] = LoadStoreLiteralResults;
test_results["LoadStoreLimits"] = LoadStoreLimitsResults;
test_results["CompareAndBranch"] = CompareAndBranchResults;
+ test_results["AddConstant"] = AddConstantResults;
}
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index 107d5bb572..cfd8421e93 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -616,6 +616,14 @@ void Mips64Assembler::Bnezc(GpuRegister rs, uint32_t imm21) {
EmitI21(0x3E, rs, imm21);
}
+void Mips64Assembler::Bc1eqz(FpuRegister ft, uint16_t imm16) {
+ EmitFI(0x11, 0x9, ft, imm16);
+}
+
+void Mips64Assembler::Bc1nez(FpuRegister ft, uint16_t imm16) {
+ EmitFI(0x11, 0xD, ft, imm16);
+}
+
void Mips64Assembler::EmitBcondc(BranchCondition cond,
GpuRegister rs,
GpuRegister rt,
@@ -669,6 +677,14 @@ void Mips64Assembler::EmitBcondc(BranchCondition cond,
case kCondGEU:
Bgeuc(rs, rt, imm16_21);
break;
+ case kCondF:
+ CHECK_EQ(rt, ZERO);
+ Bc1eqz(static_cast<FpuRegister>(rs), imm16_21);
+ break;
+ case kCondT:
+ CHECK_EQ(rt, ZERO);
+ Bc1nez(static_cast<FpuRegister>(rs), imm16_21);
+ break;
case kUncond:
LOG(FATAL) << "Unexpected branch condition " << cond;
UNREACHABLE();
@@ -827,6 +843,86 @@ void Mips64Assembler::MaxD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
EmitFR(0x11, 0x11, ft, fs, fd, 0x1e);
}
+void Mips64Assembler::CmpUnS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+ EmitFR(0x11, 0x14, ft, fs, fd, 0x01);
+}
+
+void Mips64Assembler::CmpEqS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+ EmitFR(0x11, 0x14, ft, fs, fd, 0x02);
+}
+
+void Mips64Assembler::CmpUeqS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+ EmitFR(0x11, 0x14, ft, fs, fd, 0x03);
+}
+
+void Mips64Assembler::CmpLtS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+ EmitFR(0x11, 0x14, ft, fs, fd, 0x04);
+}
+
+void Mips64Assembler::CmpUltS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+ EmitFR(0x11, 0x14, ft, fs, fd, 0x05);
+}
+
+void Mips64Assembler::CmpLeS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+ EmitFR(0x11, 0x14, ft, fs, fd, 0x06);
+}
+
+void Mips64Assembler::CmpUleS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+ EmitFR(0x11, 0x14, ft, fs, fd, 0x07);
+}
+
+void Mips64Assembler::CmpOrS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+ EmitFR(0x11, 0x14, ft, fs, fd, 0x11);
+}
+
+void Mips64Assembler::CmpUneS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+ EmitFR(0x11, 0x14, ft, fs, fd, 0x12);
+}
+
+void Mips64Assembler::CmpNeS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+ EmitFR(0x11, 0x14, ft, fs, fd, 0x13);
+}
+
+void Mips64Assembler::CmpUnD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+ EmitFR(0x11, 0x15, ft, fs, fd, 0x01);
+}
+
+void Mips64Assembler::CmpEqD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+ EmitFR(0x11, 0x15, ft, fs, fd, 0x02);
+}
+
+void Mips64Assembler::CmpUeqD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+ EmitFR(0x11, 0x15, ft, fs, fd, 0x03);
+}
+
+void Mips64Assembler::CmpLtD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+ EmitFR(0x11, 0x15, ft, fs, fd, 0x04);
+}
+
+void Mips64Assembler::CmpUltD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+ EmitFR(0x11, 0x15, ft, fs, fd, 0x05);
+}
+
+void Mips64Assembler::CmpLeD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+ EmitFR(0x11, 0x15, ft, fs, fd, 0x06);
+}
+
+void Mips64Assembler::CmpUleD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+ EmitFR(0x11, 0x15, ft, fs, fd, 0x07);
+}
+
+void Mips64Assembler::CmpOrD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+ EmitFR(0x11, 0x15, ft, fs, fd, 0x11);
+}
+
+void Mips64Assembler::CmpUneD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+ EmitFR(0x11, 0x15, ft, fs, fd, 0x12);
+}
+
+void Mips64Assembler::CmpNeD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+ EmitFR(0x11, 0x15, ft, fs, fd, 0x13);
+}
+
void Mips64Assembler::Cvtsw(FpuRegister fd, FpuRegister fs) {
EmitFR(0x11, 0x14, static_cast<FpuRegister>(0), fs, fd, 0x20);
}
@@ -1134,6 +1230,10 @@ Mips64Assembler::Branch::Branch(uint32_t location,
CHECK_NE(lhs_reg, ZERO);
CHECK_EQ(rhs_reg, ZERO);
break;
+ case kCondF:
+ case kCondT:
+ CHECK_EQ(rhs_reg, ZERO);
+ break;
case kUncond:
UNREACHABLE();
}
@@ -1188,6 +1288,10 @@ Mips64Assembler::BranchCondition Mips64Assembler::Branch::OppositeCondition(
return kCondGEU;
case kCondGEU:
return kCondLTU;
+ case kCondF:
+ return kCondT;
+ case kCondT:
+ return kCondF;
case kUncond:
LOG(FATAL) << "Unexpected branch condition " << cond;
}
@@ -1567,7 +1671,7 @@ void Mips64Assembler::EmitBranch(Mips64Assembler::Branch* branch) {
case Branch::kCondBranch:
CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
EmitBcondc(condition, lhs, rhs, offset);
- Nop(); // TODO: improve by filling the forbidden slot.
+ Nop(); // TODO: improve by filling the forbidden/delay slot.
break;
case Branch::kCall:
CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
@@ -1657,6 +1761,14 @@ void Mips64Assembler::Bnezc(GpuRegister rs, Mips64Label* label) {
Bcond(label, kCondNEZ, rs);
}
+void Mips64Assembler::Bc1eqz(FpuRegister ft, Mips64Label* label) {
+ Bcond(label, kCondF, static_cast<GpuRegister>(ft), ZERO);
+}
+
+void Mips64Assembler::Bc1nez(FpuRegister ft, Mips64Label* label) {
+ Bcond(label, kCondT, static_cast<GpuRegister>(ft), ZERO);
+}
+
void Mips64Assembler::LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base,
int32_t offset) {
if (!IsInt<16>(offset)) {
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index 57fc19a6e9..883f013f87 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -227,6 +227,8 @@ class Mips64Assembler FINAL : public Assembler {
void Bnec(GpuRegister rs, GpuRegister rt, uint16_t imm16);
void Beqzc(GpuRegister rs, uint32_t imm21);
void Bnezc(GpuRegister rs, uint32_t imm21);
+ void Bc1eqz(FpuRegister ft, uint16_t imm16);
+ void Bc1nez(FpuRegister ft, uint16_t imm16);
void AddS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
void SubS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
@@ -266,6 +268,26 @@ class Mips64Assembler FINAL : public Assembler {
void MinD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
void MaxS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
void MaxD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+ void CmpUnS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+ void CmpEqS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+ void CmpUeqS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+ void CmpLtS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+ void CmpUltS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+ void CmpLeS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+ void CmpUleS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+ void CmpOrS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+ void CmpUneS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+ void CmpNeS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+ void CmpUnD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+ void CmpEqD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+ void CmpUeqD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+ void CmpLtD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+ void CmpUltD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+ void CmpLeD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+ void CmpUleD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+ void CmpOrD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+ void CmpUneD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+ void CmpNeD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
void Cvtsw(FpuRegister fd, FpuRegister fs);
void Cvtdw(FpuRegister fd, FpuRegister fs);
@@ -317,6 +339,8 @@ class Mips64Assembler FINAL : public Assembler {
void Bnec(GpuRegister rs, GpuRegister rt, Mips64Label* label);
void Beqzc(GpuRegister rs, Mips64Label* label);
void Bnezc(GpuRegister rs, Mips64Label* label);
+ void Bc1eqz(FpuRegister ft, Mips64Label* label);
+ void Bc1nez(FpuRegister ft, Mips64Label* label);
void EmitLoad(ManagedRegister m_dst, GpuRegister src_register, int32_t src_offset, size_t size);
void LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base, int32_t offset);
@@ -474,6 +498,8 @@ class Mips64Assembler FINAL : public Assembler {
kCondNEZ,
kCondLTU,
kCondGEU,
+ kCondF, // Floating-point predicate false.
+ kCondT, // Floating-point predicate true.
kUncond,
};
friend std::ostream& operator<<(std::ostream& os, const BranchCondition& rhs);
diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc
index 29a5a88316..bac4375b35 100644
--- a/compiler/utils/mips64/assembler_mips64_test.cc
+++ b/compiler/utils/mips64/assembler_mips64_test.cc
@@ -403,6 +403,106 @@ TEST_F(AssemblerMIPS64Test, MaxD) {
DriverStr(RepeatFFF(&mips64::Mips64Assembler::MaxD, "max.d ${reg1}, ${reg2}, ${reg3}"), "max.d");
}
+TEST_F(AssemblerMIPS64Test, CmpUnS) {
+ DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUnS, "cmp.un.s ${reg1}, ${reg2}, ${reg3}"),
+ "cmp.un.s");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpEqS) {
+ DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpEqS, "cmp.eq.s ${reg1}, ${reg2}, ${reg3}"),
+ "cmp.eq.s");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpUeqS) {
+ DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUeqS, "cmp.ueq.s ${reg1}, ${reg2}, ${reg3}"),
+ "cmp.ueq.s");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpLtS) {
+ DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpLtS, "cmp.lt.s ${reg1}, ${reg2}, ${reg3}"),
+ "cmp.lt.s");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpUltS) {
+ DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUltS, "cmp.ult.s ${reg1}, ${reg2}, ${reg3}"),
+ "cmp.ult.s");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpLeS) {
+ DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpLeS, "cmp.le.s ${reg1}, ${reg2}, ${reg3}"),
+ "cmp.le.s");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpUleS) {
+ DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUleS, "cmp.ule.s ${reg1}, ${reg2}, ${reg3}"),
+ "cmp.ule.s");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpOrS) {
+ DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpOrS, "cmp.or.s ${reg1}, ${reg2}, ${reg3}"),
+ "cmp.or.s");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpUneS) {
+ DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUneS, "cmp.une.s ${reg1}, ${reg2}, ${reg3}"),
+ "cmp.une.s");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpNeS) {
+ DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpNeS, "cmp.ne.s ${reg1}, ${reg2}, ${reg3}"),
+ "cmp.ne.s");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpUnD) {
+ DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUnD, "cmp.un.d ${reg1}, ${reg2}, ${reg3}"),
+ "cmp.un.d");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpEqD) {
+ DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpEqD, "cmp.eq.d ${reg1}, ${reg2}, ${reg3}"),
+ "cmp.eq.d");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpUeqD) {
+ DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUeqD, "cmp.ueq.d ${reg1}, ${reg2}, ${reg3}"),
+ "cmp.ueq.d");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpLtD) {
+ DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpLtD, "cmp.lt.d ${reg1}, ${reg2}, ${reg3}"),
+ "cmp.lt.d");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpUltD) {
+ DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUltD, "cmp.ult.d ${reg1}, ${reg2}, ${reg3}"),
+ "cmp.ult.d");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpLeD) {
+ DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpLeD, "cmp.le.d ${reg1}, ${reg2}, ${reg3}"),
+ "cmp.le.d");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpUleD) {
+ DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUleD, "cmp.ule.d ${reg1}, ${reg2}, ${reg3}"),
+ "cmp.ule.d");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpOrD) {
+ DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpOrD, "cmp.or.d ${reg1}, ${reg2}, ${reg3}"),
+ "cmp.or.d");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpUneD) {
+ DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUneD, "cmp.une.d ${reg1}, ${reg2}, ${reg3}"),
+ "cmp.une.d");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpNeD) {
+ DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpNeD, "cmp.ne.d ${reg1}, ${reg2}, ${reg3}"),
+ "cmp.ne.d");
+}
+
TEST_F(AssemblerMIPS64Test, CvtDL) {
DriverStr(RepeatFF(&mips64::Mips64Assembler::Cvtdl, "cvt.d.l ${reg1}, ${reg2}"), "cvt.d.l");
}
@@ -591,6 +691,58 @@ TEST_F(AssemblerMIPS64Test, Bgeuc) {
BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bgeuc, "Bgeuc");
}
+TEST_F(AssemblerMIPS64Test, Bc1eqz) {
+ mips64::Mips64Label label;
+ __ Bc1eqz(mips64::F0, &label);
+ constexpr size_t kAdduCount1 = 63;
+ for (size_t i = 0; i != kAdduCount1; ++i) {
+ __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+ }
+ __ Bind(&label);
+ constexpr size_t kAdduCount2 = 64;
+ for (size_t i = 0; i != kAdduCount2; ++i) {
+ __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+ }
+ __ Bc1eqz(mips64::F31, &label);
+
+ std::string expected =
+ ".set noreorder\n"
+ "bc1eqz $f0, 1f\n"
+ "nop\n" +
+ RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+ "1:\n" +
+ RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+ "bc1eqz $f31, 1b\n"
+ "nop\n";
+ DriverStr(expected, "Bc1eqz");
+}
+
+TEST_F(AssemblerMIPS64Test, Bc1nez) {
+ mips64::Mips64Label label;
+ __ Bc1nez(mips64::F0, &label);
+ constexpr size_t kAdduCount1 = 63;
+ for (size_t i = 0; i != kAdduCount1; ++i) {
+ __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+ }
+ __ Bind(&label);
+ constexpr size_t kAdduCount2 = 64;
+ for (size_t i = 0; i != kAdduCount2; ++i) {
+ __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+ }
+ __ Bc1nez(mips64::F31, &label);
+
+ std::string expected =
+ ".set noreorder\n"
+ "bc1nez $f0, 1f\n"
+ "nop\n" +
+ RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+ "1:\n" +
+ RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+ "bc1nez $f31, 1b\n"
+ "nop\n";
+ DriverStr(expected, "Bc1nez");
+}
+
TEST_F(AssemblerMIPS64Test, LongBeqc) {
mips64::Mips64Label label;
__ Beqc(mips64::A0, mips64::A1, &label);