diff options
Diffstat (limited to 'compiler')
41 files changed, 1841 insertions, 438 deletions
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc index 638b897eea..c7c190793c 100644 --- a/compiler/common_compiler_test.cc +++ b/compiler/common_compiler_test.cc @@ -142,10 +142,7 @@ void CommonCompilerTest::MakeExecutable(mirror::ClassLoader* class_loader, const mirror::Class* klass = class_linker_->FindClass(self, class_descriptor.c_str(), loader); CHECK(klass != nullptr) << "Class not found " << class_name; size_t pointer_size = class_linker_->GetImagePointerSize(); - for (auto& m : klass->GetDirectMethods(pointer_size)) { - MakeExecutable(&m); - } - for (auto& m : klass->GetVirtualMethods(pointer_size)) { + for (auto& m : klass->GetMethods(pointer_size)) { MakeExecutable(&m); } } @@ -259,10 +256,7 @@ void CommonCompilerTest::CompileClass(mirror::ClassLoader* class_loader, const c mirror::Class* klass = class_linker_->FindClass(self, class_descriptor.c_str(), loader); CHECK(klass != nullptr) << "Class not found " << class_name; auto pointer_size = class_linker_->GetImagePointerSize(); - for (auto& m : klass->GetDirectMethods(pointer_size)) { - CompileMethod(&m); - } - for (auto& m : klass->GetVirtualMethods(pointer_size)) { + for (auto& m : klass->GetMethods(pointer_size)) { CompileMethod(&m); } } diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h index 15a4ba0f6f..7a93613481 100644 --- a/compiler/compiled_method.h +++ b/compiler/compiled_method.h @@ -23,7 +23,7 @@ #include "arch/instruction_set.h" #include "base/bit_utils.h" -#include "length_prefixed_array.h" +#include "base/length_prefixed_array.h" #include "method_reference.h" #include "utils/array_ref.h" diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc index e5d3841b14..1c2a619020 100644 --- a/compiler/dex/quick/x86/assemble_x86.cc +++ b/compiler/dex/quick/x86/assemble_x86.cc @@ -508,6 +508,7 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0, { kX86Lfence, kReg, NO_OPERAND, { 0, 0, 0x0F, 0xAE, 0, 5, 0, 0, false }, "Lfence", "" }, { kX86Mfence, kReg, NO_OPERAND, { 0, 0, 0x0F, 0xAE, 0, 6, 0, 0, false }, "Mfence", "" }, { kX86Sfence, kReg, NO_OPERAND, { 0, 0, 0x0F, 0xAE, 0, 7, 0, 0, false }, "Sfence", "" }, + { kX86LockAdd32MI8, kMemImm, IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES, { 0xF0, 0, 0x83, 0x0, 0x0, 0, 0, 1, false }, "LockAdd32MI8", "[!0r+!1d],!2d" }, EXT_0F_ENCODING_MAP(Imul16, 0x66, 0xAF, REG_USE0 | REG_DEF0 | SETS_CCODES), EXT_0F_ENCODING_MAP(Imul32, 0x00, 0xAF, REG_USE0 | REG_DEF0 | SETS_CCODES), diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc index 75f3fef599..4ff79935d7 100755 --- a/compiler/dex/quick/x86/target_x86.cc +++ b/compiler/dex/quick/x86/target_x86.cc @@ -20,7 +20,7 @@ #include <inttypes.h> #include <string> -#include "arch/instruction_set_features.h" +#include "arch/x86/instruction_set_features_x86.h" #include "art_method.h" #include "backend_x86.h" #include "base/logging.h" @@ -585,6 +585,8 @@ bool X86Mir2Lir::ProvidesFullMemoryBarrier(X86OpCode opcode) { case kX86LockCmpxchgAR: case kX86LockCmpxchg64M: case kX86LockCmpxchg64A: + case kX86LockCmpxchg64AR: + case kX86LockAdd32MI8: case kX86XchgMR: case kX86Mfence: // Atomic memory instructions provide full barrier. @@ -598,7 +600,9 @@ bool X86Mir2Lir::ProvidesFullMemoryBarrier(X86OpCode opcode) { } bool X86Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) { - if (!cu_->compiler_driver->GetInstructionSetFeatures()->IsSmp()) { + const X86InstructionSetFeatures* features = + cu_->compiler_driver->GetInstructionSetFeatures()->AsX86InstructionSetFeatures(); + if (!features->IsSmp()) { return false; } // Start off with using the last LIR as the barrier. If it is not enough, then we will update it. @@ -610,20 +614,34 @@ bool X86Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) { * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model. * For those cases, all we need to ensure is that there is a scheduling barrier in place. */ + const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32; + bool use_locked_add = features->PrefersLockedAddSynchronization(); if (barrier_kind == kAnyAny) { - // If no LIR exists already that can be used a barrier, then generate an mfence. + // If no LIR exists already that can be used a barrier, then generate a barrier. if (mem_barrier == nullptr) { - mem_barrier = NewLIR0(kX86Mfence); + if (use_locked_add) { + mem_barrier = NewLIR3(kX86LockAdd32MI8, rs_rSP.GetReg(), 0, 0); + } else { + mem_barrier = NewLIR0(kX86Mfence); + } ret = true; } - // If last instruction does not provide full barrier, then insert an mfence. + // If last instruction does not provide full barrier, then insert a barrier. if (ProvidesFullMemoryBarrier(static_cast<X86OpCode>(mem_barrier->opcode)) == false) { - mem_barrier = NewLIR0(kX86Mfence); + if (use_locked_add) { + mem_barrier = NewLIR3(kX86LockAdd32MI8, rs_rSP.GetReg(), 0, 0); + } else { + mem_barrier = NewLIR0(kX86Mfence); + } ret = true; } } else if (barrier_kind == kNTStoreStore) { - mem_barrier = NewLIR0(kX86Sfence); + if (use_locked_add) { + mem_barrier = NewLIR3(kX86LockAdd32MI8, rs_rSP.GetReg(), 0, 0); + } else { + mem_barrier = NewLIR0(kX86Sfence); + } ret = true; } diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h index d6a6a60d3d..8cd6574443 100644 --- a/compiler/dex/quick/x86/x86_lir.h +++ b/compiler/dex/quick/x86/x86_lir.h @@ -606,6 +606,7 @@ enum X86OpCode { // load-from-memory and store-to-memory instructions kX86Sfence, // memory barrier to serialize all previous // store-to-memory instructions + kX86LockAdd32MI8, // locked add used to serialize memory instructions Binary0fOpCode(kX86Imul16), // 16bit multiply Binary0fOpCode(kX86Imul32), // 32bit multiply Binary0fOpCode(kX86Imul64), // 64bit multiply diff --git a/compiler/driver/compiled_method_storage.h b/compiler/driver/compiled_method_storage.h index ef10b6768b..d6961a0876 100644 --- a/compiler/driver/compiled_method_storage.h +++ b/compiler/driver/compiled_method_storage.h @@ -20,8 +20,8 @@ #include <iosfwd> #include <memory> +#include "base/length_prefixed_array.h" #include "base/macros.h" -#include "length_prefixed_array.h" #include "utils/array_ref.h" #include "utils/dedupe_set.h" #include "utils/swap_space.h" diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index ba8f1d0df1..56839f85f9 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -790,10 +790,7 @@ class ResolveCatchBlockExceptionsClassVisitor : public ClassVisitor { virtual bool Visit(mirror::Class* c) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) { const auto pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize(); - for (auto& m : c->GetVirtualMethods(pointer_size)) { - ResolveExceptionsForMethod(&m, pointer_size); - } - for (auto& m : c->GetDirectMethods(pointer_size)) { + for (auto& m : c->GetMethods(pointer_size)) { ResolveExceptionsForMethod(&m, pointer_size); } return true; diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc index 2e43c2c71a..462c511a83 100644 --- a/compiler/driver/compiler_driver_test.cc +++ b/compiler/driver/compiler_driver_test.cc @@ -86,10 +86,7 @@ class CompilerDriverTest : public CommonCompilerTest { mirror::Class* c = class_linker->FindClass(soa.Self(), descriptor, loader); CHECK(c != nullptr); const auto pointer_size = class_linker->GetImagePointerSize(); - for (auto& m : c->GetDirectMethods(pointer_size)) { - MakeExecutable(&m); - } - for (auto& m : c->GetVirtualMethods(pointer_size)) { + for (auto& m : c->GetMethods(pointer_size)) { MakeExecutable(&m); } } diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc index fce08ea5f0..9545c83eaf 100644 --- a/compiler/image_writer.cc +++ b/compiler/image_writer.cc @@ -1030,44 +1030,42 @@ void ImageWriter::WalkFieldsInOrder(mirror::Object* obj) { } } // Visit and assign offsets for methods. - LengthPrefixedArray<ArtMethod>* method_arrays[] = { - as_klass->GetDirectMethodsPtr(), as_klass->GetVirtualMethodsPtr(), - }; - for (LengthPrefixedArray<ArtMethod>* array : method_arrays) { - if (array == nullptr) { - continue; - } + size_t num_methods = as_klass->NumMethods(); + if (num_methods != 0) { bool any_dirty = false; - size_t count = 0; - const size_t method_alignment = ArtMethod::Alignment(target_ptr_size_); - const size_t method_size = ArtMethod::Size(target_ptr_size_); - auto iteration_range = - MakeIterationRangeFromLengthPrefixedArray(array, method_size, method_alignment); - for (auto& m : iteration_range) { - any_dirty = any_dirty || WillMethodBeDirty(&m); - ++count; + for (auto& m : as_klass->GetMethods(target_ptr_size_)) { + if (WillMethodBeDirty(&m)) { + any_dirty = true; + break; + } } NativeObjectRelocationType type = any_dirty ? kNativeObjectRelocationTypeArtMethodDirty : kNativeObjectRelocationTypeArtMethodClean; Bin bin_type = BinTypeForNativeRelocationType(type); // Forward the entire array at once, but header first. + const size_t method_alignment = ArtMethod::Alignment(target_ptr_size_); + const size_t method_size = ArtMethod::Size(target_ptr_size_); const size_t header_size = LengthPrefixedArray<ArtMethod>::ComputeSize(0, method_size, method_alignment); + LengthPrefixedArray<ArtMethod>* array = as_klass->GetMethodsPtr(); auto it = native_object_relocations_.find(array); - CHECK(it == native_object_relocations_.end()) << "Method array " << array - << " already forwarded"; + CHECK(it == native_object_relocations_.end()) + << "Method array " << array << " already forwarded"; size_t& offset = bin_slot_sizes_[bin_type]; DCHECK(!IsInBootImage(array)); - native_object_relocations_.emplace(array, NativeObjectRelocation { offset, - any_dirty ? kNativeObjectRelocationTypeArtMethodArrayDirty : - kNativeObjectRelocationTypeArtMethodArrayClean }); + native_object_relocations_.emplace( + array, NativeObjectRelocation { + offset, + any_dirty ? kNativeObjectRelocationTypeArtMethodArrayDirty + : kNativeObjectRelocationTypeArtMethodArrayClean + }); offset += header_size; - for (auto& m : iteration_range) { + for (auto& m : as_klass->GetMethods(target_ptr_size_)) { AssignMethodOffset(&m, type); } - (any_dirty ? dirty_methods_ : clean_methods_) += count; + (any_dirty ? dirty_methods_ : clean_methods_) += num_methods; } } else if (h_obj->IsObjectArray()) { // Walk elements of an object array. @@ -1275,6 +1273,8 @@ void ImageWriter::CreateHeader(size_t oat_loaded_size, size_t oat_data_offset) { auto* interned_strings_section = §ions[ImageHeader::kSectionInternedStrings]; *interned_strings_section = ImageSection(cur_pos, intern_table_bytes_); cur_pos = interned_strings_section->End(); + // Round up to the alignment the class table expects. See HashSet::WriteToMemory. + cur_pos = RoundUp(cur_pos, sizeof(uint64_t)); // Calculate the size of the class table section. auto* class_table_section = §ions[ImageHeader::kSectionClassTable]; *class_table_section = ImageSection(cur_pos, class_table_bytes_); diff --git a/compiler/image_writer.h b/compiler/image_writer.h index 8e930f0373..f1b2965a12 100644 --- a/compiler/image_writer.h +++ b/compiler/image_writer.h @@ -27,11 +27,11 @@ #include <ostream> #include "base/bit_utils.h" +#include "base/length_prefixed_array.h" #include "base/macros.h" #include "driver/compiler_driver.h" #include "gc/space/space.h" #include "image.h" -#include "length_prefixed_array.h" #include "lock_word.h" #include "mem_map.h" #include "oat_file.h" diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc index 8865ea2243..4fd73be55a 100644 --- a/compiler/oat_test.cc +++ b/compiler/oat_test.cc @@ -238,12 +238,12 @@ TEST_F(OatTest, WriteRead) { ++method_index; } size_t visited_virtuals = 0; - for (auto& m : klass->GetVirtualMethods(pointer_size)) { - if (!m.IsMiranda()) { - CheckMethod(&m, oat_class.GetOatMethod(method_index), dex_file); - ++method_index; - ++visited_virtuals; - } + // TODO We should also check copied methods in this test. + for (auto& m : klass->GetDeclaredVirtualMethods(pointer_size)) { + EXPECT_FALSE(m.IsMiranda()); + CheckMethod(&m, oat_class.GetOatMethod(method_index), dex_file); + ++method_index; + ++visited_virtuals; } EXPECT_EQ(visited_virtuals, num_virtual_methods); } diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h index c3979f3dd1..ca71c32802 100644 --- a/compiler/optimizing/builder.h +++ b/compiler/optimizing/builder.h @@ -90,8 +90,9 @@ class HGraphBuilder : public ValueObject { static constexpr const char* kBuilderPassName = "builder"; - // The number of entries in a packed switch before we use a jump table. - static constexpr uint16_t kSmallSwitchThreshold = 5; + // The number of entries in a packed switch before we use a jump table or specified + // compare/jump series. + static constexpr uint16_t kSmallSwitchThreshold = 3; private: // Analyzes the dex instruction and adds HInstruction to the graph diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 3630dbec24..9fda83840c 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -59,7 +59,7 @@ static constexpr SRegister kFpuCalleeSaves[] = // S registers. Therefore there is no need to block it. static constexpr DRegister DTMP = D31; -static constexpr uint32_t kPackedSwitchJumpTableThreshold = 6; +static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7; #define __ down_cast<ArmAssembler*>(codegen->GetAssembler())-> #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmWordSize, x).Int32Value() @@ -6250,7 +6250,7 @@ void LocationsBuilderARM::VisitPackedSwitch(HPackedSwitch* switch_instr) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); - if (switch_instr->GetNumEntries() >= kPackedSwitchJumpTableThreshold && + if (switch_instr->GetNumEntries() > kPackedSwitchCompareJumpThreshold && codegen_->GetAssembler()->IsThumb()) { locations->AddTemp(Location::RequiresRegister()); // We need a temp for the table base. if (switch_instr->GetStartValue() != 0) { @@ -6266,12 +6266,30 @@ void InstructionCodeGeneratorARM::VisitPackedSwitch(HPackedSwitch* switch_instr) Register value_reg = locations->InAt(0).AsRegister<Register>(); HBasicBlock* default_block = switch_instr->GetDefaultBlock(); - if (num_entries < kPackedSwitchJumpTableThreshold || !codegen_->GetAssembler()->IsThumb()) { + if (num_entries <= kPackedSwitchCompareJumpThreshold || !codegen_->GetAssembler()->IsThumb()) { // Create a series of compare/jumps. + Register temp_reg = IP; + // Note: It is fine for the below AddConstantSetFlags() using IP register to temporarily store + // the immediate, because IP is used as the destination register. For the other + // AddConstantSetFlags() and GenerateCompareWithImmediate(), the immediate values are constant, + // and they can be encoded in the instruction without making use of IP register. + __ AddConstantSetFlags(temp_reg, value_reg, -lower_bound); + const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); - for (uint32_t i = 0; i < num_entries; i++) { - GenerateCompareWithImmediate(value_reg, lower_bound + i); - __ b(codegen_->GetLabelOf(successors[i]), EQ); + // Jump to successors[0] if value == lower_bound. + __ b(codegen_->GetLabelOf(successors[0]), EQ); + int32_t last_index = 0; + for (; num_entries - last_index > 2; last_index += 2) { + __ AddConstantSetFlags(temp_reg, temp_reg, -2); + // Jump to successors[last_index + 1] if value < case_value[last_index + 2]. + __ b(codegen_->GetLabelOf(successors[last_index + 1]), LO); + // Jump to successors[last_index + 2] if value == case_value[last_index + 2]. + __ b(codegen_->GetLabelOf(successors[last_index + 2]), EQ); + } + if (num_entries - last_index == 2) { + // The last missing case_value. + GenerateCompareWithImmediate(temp_reg, 1); + __ b(codegen_->GetLabelOf(successors[last_index + 1]), EQ); } // And the default for any other value. diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 451470f271..52058302be 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -71,10 +71,10 @@ using helpers::ARM64EncodableConstantOrRegister; using helpers::ArtVixlRegCodeCoherentForRegSet; static constexpr int kCurrentMethodStackOffset = 0; -// The compare/jump sequence will generate about (2 * num_entries + 1) instructions. While jump +// The compare/jump sequence will generate about (1.5 * num_entries + 3) instructions. While jump // table version generates 7 instructions and num_entries literals. Compare/jump sequence will // generates less code/data with a small num_entries. -static constexpr uint32_t kPackedSwitchJumpTableThreshold = 6; +static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7; inline Condition ARM64Condition(IfCondition cond) { switch (cond) { @@ -546,7 +546,7 @@ class ArraySetSlowPathARM64 : public SlowPathCodeARM64 { void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) { uint32_t num_entries = switch_instr_->GetNumEntries(); - DCHECK_GE(num_entries, kPackedSwitchJumpTableThreshold); + DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold); // We are about to use the assembler to place literals directly. Make sure we have enough // underlying code buffer and we have generated the jump table with right size. @@ -4582,20 +4582,29 @@ void InstructionCodeGeneratorARM64::VisitPackedSwitch(HPackedSwitch* switch_inst // ranges and emit the tables only as required. static constexpr int32_t kJumpTableInstructionThreshold = 1* MB / kMaxExpectedSizePerHInstruction; - if (num_entries < kPackedSwitchJumpTableThreshold || + if (num_entries <= kPackedSwitchCompareJumpThreshold || // Current instruction id is an upper bound of the number of HIRs in the graph. GetGraph()->GetCurrentInstructionId() > kJumpTableInstructionThreshold) { // Create a series of compare/jumps. + UseScratchRegisterScope temps(codegen_->GetVIXLAssembler()); + Register temp = temps.AcquireW(); + __ Subs(temp, value_reg, Operand(lower_bound)); + const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); - for (uint32_t i = 0; i < num_entries; i++) { - int32_t case_value = lower_bound + i; - vixl::Label* succ = codegen_->GetLabelOf(successors[i]); - if (case_value == 0) { - __ Cbz(value_reg, succ); - } else { - __ Cmp(value_reg, Operand(case_value)); - __ B(eq, succ); - } + // Jump to successors[0] if value == lower_bound. + __ B(eq, codegen_->GetLabelOf(successors[0])); + int32_t last_index = 0; + for (; num_entries - last_index > 2; last_index += 2) { + __ Subs(temp, temp, Operand(2)); + // Jump to successors[last_index + 1] if value < case_value[last_index + 2]. + __ B(lo, codegen_->GetLabelOf(successors[last_index + 1])); + // Jump to successors[last_index + 2] if value == case_value[last_index + 2]. + __ B(eq, codegen_->GetLabelOf(successors[last_index + 2])); + } + if (num_entries - last_index == 2) { + // The last missing case_value. + __ Cmp(temp, Operand(1)); + __ B(eq, codegen_->GetLabelOf(successors[last_index + 1])); } // And the default for any other value. diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 5dc101b199..ae0f2c8935 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -4897,19 +4897,31 @@ void InstructionCodeGeneratorMIPS::VisitPackedSwitch(HPackedSwitch* switch_instr HBasicBlock* default_block = switch_instr->GetDefaultBlock(); // Create a set of compare/jumps. - const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); - for (int32_t i = 0; i < num_entries; ++i) { - int32_t case_value = lower_bound + i; - MipsLabel* successor_label = codegen_->GetLabelOf(successors[i]); - if (case_value == 0) { - __ Beqz(value_reg, successor_label); - } else { - __ LoadConst32(TMP, case_value); - __ Beq(value_reg, TMP, successor_label); - } - } + Register temp_reg = TMP; + __ Addiu32(temp_reg, value_reg, -lower_bound); + // Jump to default if index is negative + // Note: We don't check the case that index is positive while value < lower_bound, because in + // this case, index >= num_entries must be true. So that we can save one branch instruction. + __ Bltz(temp_reg, codegen_->GetLabelOf(default_block)); - // Insert the default branch for every other value. + const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); + // Jump to successors[0] if value == lower_bound. + __ Beqz(temp_reg, codegen_->GetLabelOf(successors[0])); + int32_t last_index = 0; + for (; num_entries - last_index > 2; last_index += 2) { + __ Addiu(temp_reg, temp_reg, -2); + // Jump to successors[last_index + 1] if value < case_value[last_index + 2]. + __ Bltz(temp_reg, codegen_->GetLabelOf(successors[last_index + 1])); + // Jump to successors[last_index + 2] if value == case_value[last_index + 2]. + __ Beqz(temp_reg, codegen_->GetLabelOf(successors[last_index + 2])); + } + if (num_entries - last_index == 2) { + // The last missing case_value. + __ Addiu(temp_reg, temp_reg, -1); + __ Beqz(temp_reg, codegen_->GetLabelOf(successors[last_index + 1])); + } + + // And the default for any other value. if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) { __ B(codegen_->GetLabelOf(default_block)); } diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 99f58dd2c5..1e428a06e1 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -1752,11 +1752,7 @@ void InstructionCodeGeneratorMIPS64::VisitClinitCheck(HClinitCheck* check) { void LocationsBuilderMIPS64::VisitCompare(HCompare* compare) { Primitive::Type in_type = compare->InputAt(0)->GetType(); - LocationSummary::CallKind call_kind = Primitive::IsFloatingPointType(in_type) - ? LocationSummary::kCall - : LocationSummary::kNoCall; - - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare, call_kind); + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare); switch (in_type) { case Primitive::kPrimLong: @@ -1766,13 +1762,11 @@ void LocationsBuilderMIPS64::VisitCompare(HCompare* compare) { break; case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); - locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1))); - locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt)); + case Primitive::kPrimDouble: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; - } default: LOG(FATAL) << "Unexpected type for compare operation " << in_type; @@ -1781,14 +1775,15 @@ void LocationsBuilderMIPS64::VisitCompare(HCompare* compare) { void InstructionCodeGeneratorMIPS64::VisitCompare(HCompare* instruction) { LocationSummary* locations = instruction->GetLocations(); + GpuRegister res = locations->Out().AsRegister<GpuRegister>(); Primitive::Type in_type = instruction->InputAt(0)->GetType(); + bool gt_bias = instruction->IsGtBias(); // 0 if: left == right // 1 if: left > right // -1 if: left < right switch (in_type) { case Primitive::kPrimLong: { - GpuRegister dst = locations->Out().AsRegister<GpuRegister>(); GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>(); Location rhs_location = locations->InAt(1); bool use_imm = rhs_location.IsConstant(); @@ -1803,35 +1798,52 @@ void InstructionCodeGeneratorMIPS64::VisitCompare(HCompare* instruction) { rhs = rhs_location.AsRegister<GpuRegister>(); } __ Slt(TMP, lhs, rhs); - __ Slt(dst, rhs, lhs); - __ Subu(dst, dst, TMP); + __ Slt(res, rhs, lhs); + __ Subu(res, res, TMP); break; } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { - int32_t entry_point_offset; - if (in_type == Primitive::kPrimFloat) { - entry_point_offset = instruction->IsGtBias() ? QUICK_ENTRY_POINT(pCmpgFloat) - : QUICK_ENTRY_POINT(pCmplFloat); + case Primitive::kPrimFloat: { + FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>(); + FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>(); + Mips64Label done; + __ CmpEqS(FTMP, lhs, rhs); + __ LoadConst32(res, 0); + __ Bc1nez(FTMP, &done); + if (gt_bias) { + __ CmpLtS(FTMP, lhs, rhs); + __ LoadConst32(res, -1); + __ Bc1nez(FTMP, &done); + __ LoadConst32(res, 1); } else { - entry_point_offset = instruction->IsGtBias() ? QUICK_ENTRY_POINT(pCmpgDouble) - : QUICK_ENTRY_POINT(pCmplDouble); + __ CmpLtS(FTMP, rhs, lhs); + __ LoadConst32(res, 1); + __ Bc1nez(FTMP, &done); + __ LoadConst32(res, -1); } - codegen_->InvokeRuntime(entry_point_offset, instruction, instruction->GetDexPc(), nullptr); - if (in_type == Primitive::kPrimFloat) { - if (instruction->IsGtBias()) { - CheckEntrypointTypes<kQuickCmpgFloat, int32_t, float, float>(); - } else { - CheckEntrypointTypes<kQuickCmplFloat, int32_t, float, float>(); - } + __ Bind(&done); + break; + } + + case Primitive::kPrimDouble: { + FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>(); + FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>(); + Mips64Label done; + __ CmpEqD(FTMP, lhs, rhs); + __ LoadConst32(res, 0); + __ Bc1nez(FTMP, &done); + if (gt_bias) { + __ CmpLtD(FTMP, lhs, rhs); + __ LoadConst32(res, -1); + __ Bc1nez(FTMP, &done); + __ LoadConst32(res, 1); } else { - if (instruction->IsGtBias()) { - CheckEntrypointTypes<kQuickCmpgDouble, int32_t, double, double>(); - } else { - CheckEntrypointTypes<kQuickCmplDouble, int32_t, double, double>(); - } + __ CmpLtD(FTMP, rhs, lhs); + __ LoadConst32(res, 1); + __ Bc1nez(FTMP, &done); + __ LoadConst32(res, -1); } + __ Bind(&done); break; } @@ -1842,8 +1854,19 @@ void InstructionCodeGeneratorMIPS64::VisitCompare(HCompare* instruction) { void LocationsBuilderMIPS64::VisitCondition(HCondition* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + switch (instruction->InputAt(0)->GetType()) { + default: + case Primitive::kPrimLong: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + break; + + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + break; + } if (instruction->NeedsMaterialization()) { locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } @@ -1854,129 +1877,42 @@ void InstructionCodeGeneratorMIPS64::VisitCondition(HCondition* instruction) { return; } - // TODO: generalize to long - DCHECK_NE(instruction->InputAt(0)->GetType(), Primitive::kPrimLong); - + Primitive::Type type = instruction->InputAt(0)->GetType(); LocationSummary* locations = instruction->GetLocations(); - GpuRegister dst = locations->Out().AsRegister<GpuRegister>(); - GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>(); - Location rhs_location = locations->InAt(1); + Mips64Label true_label; - GpuRegister rhs_reg = ZERO; - int64_t rhs_imm = 0; - bool use_imm = rhs_location.IsConstant(); - if (use_imm) { - rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant()); - } else { - rhs_reg = rhs_location.AsRegister<GpuRegister>(); - } - - IfCondition if_cond = instruction->GetCondition(); - - switch (if_cond) { - case kCondEQ: - case kCondNE: - if (use_imm && IsUint<16>(rhs_imm)) { - __ Xori(dst, lhs, rhs_imm); - } else { - if (use_imm) { - rhs_reg = TMP; - __ LoadConst32(rhs_reg, rhs_imm); - } - __ Xor(dst, lhs, rhs_reg); - } - if (if_cond == kCondEQ) { - __ Sltiu(dst, dst, 1); - } else { - __ Sltu(dst, ZERO, dst); - } - break; + switch (type) { + default: + // Integer case. + GenerateIntLongCompare(instruction->GetCondition(), /* is64bit */ false, locations); + return; + case Primitive::kPrimLong: + GenerateIntLongCompare(instruction->GetCondition(), /* is64bit */ true, locations); + return; - case kCondLT: - case kCondGE: - if (use_imm && IsInt<16>(rhs_imm)) { - __ Slti(dst, lhs, rhs_imm); - } else { - if (use_imm) { - rhs_reg = TMP; - __ LoadConst32(rhs_reg, rhs_imm); - } - __ Slt(dst, lhs, rhs_reg); - } - if (if_cond == kCondGE) { - // Simulate lhs >= rhs via !(lhs < rhs) since there's - // only the slt instruction but no sge. - __ Xori(dst, dst, 1); - } + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + // TODO: don't use branches. + GenerateFpCompareAndBranch(instruction->GetCondition(), + instruction->IsGtBias(), + type, + locations, + &true_label); break; + } - case kCondLE: - case kCondGT: - if (use_imm && IsInt<16>(rhs_imm + 1)) { - // Simulate lhs <= rhs via lhs < rhs + 1. - __ Slti(dst, lhs, rhs_imm + 1); - if (if_cond == kCondGT) { - // Simulate lhs > rhs via !(lhs <= rhs) since there's - // only the slti instruction but no sgti. - __ Xori(dst, dst, 1); - } - } else { - if (use_imm) { - rhs_reg = TMP; - __ LoadConst32(rhs_reg, rhs_imm); - } - __ Slt(dst, rhs_reg, lhs); - if (if_cond == kCondLE) { - // Simulate lhs <= rhs via !(rhs < lhs) since there's - // only the slt instruction but no sle. - __ Xori(dst, dst, 1); - } - } - break; + // Convert the branches into the result. + Mips64Label done; - case kCondB: - case kCondAE: - if (use_imm && 0 <= rhs_imm && rhs_imm <= 0x7fff) { - __ Sltiu(dst, lhs, rhs_imm); - } else { - if (use_imm) { - rhs_reg = TMP; - __ LoadConst32(rhs_reg, rhs_imm); - } - __ Sltu(dst, lhs, rhs_reg); - } - if (if_cond == kCondAE) { - // Simulate lhs >= rhs via !(lhs < rhs) since there's - // only the sltu instruction but no sgeu. - __ Xori(dst, dst, 1); - } - break; + // False case: result = 0. + __ LoadConst32(dst, 0); + __ Bc(&done); - case kCondBE: - case kCondA: - if (use_imm && 0 <= rhs_imm && rhs_imm <= 0x7ffe) { - // Simulate lhs <= rhs via lhs < rhs + 1. - __ Sltiu(dst, lhs, rhs_imm + 1); - if (if_cond == kCondA) { - // Simulate lhs > rhs via !(lhs <= rhs) since there's - // only the sltiu instruction but no sgtiu. - __ Xori(dst, dst, 1); - } - } else { - if (use_imm) { - rhs_reg = TMP; - __ LoadConst32(rhs_reg, rhs_imm); - } - __ Sltu(dst, rhs_reg, lhs); - if (if_cond == kCondBE) { - // Simulate lhs <= rhs via !(rhs < lhs) since there's - // only the sltu instruction but no sleu. - __ Xori(dst, dst, 1); - } - } - break; - } + // True case: result = 1. + __ Bind(&true_label); + __ LoadConst32(dst, 1); + __ Bind(&done); } void InstructionCodeGeneratorMIPS64::DivRemOneOrMinusOne(HBinaryOperation* instruction) { @@ -2375,6 +2311,329 @@ void InstructionCodeGeneratorMIPS64::VisitTryBoundary(HTryBoundary* try_boundary } } +void InstructionCodeGeneratorMIPS64::GenerateIntLongCompare(IfCondition cond, + bool is64bit, + LocationSummary* locations) { + GpuRegister dst = locations->Out().AsRegister<GpuRegister>(); + GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>(); + Location rhs_location = locations->InAt(1); + GpuRegister rhs_reg = ZERO; + int64_t rhs_imm = 0; + bool use_imm = rhs_location.IsConstant(); + if (use_imm) { + if (is64bit) { + rhs_imm = CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()); + } else { + rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant()); + } + } else { + rhs_reg = rhs_location.AsRegister<GpuRegister>(); + } + int64_t rhs_imm_plus_one = rhs_imm + UINT64_C(1); + + switch (cond) { + case kCondEQ: + case kCondNE: + if (use_imm && IsUint<16>(rhs_imm)) { + __ Xori(dst, lhs, rhs_imm); + } else { + if (use_imm) { + rhs_reg = TMP; + __ LoadConst64(rhs_reg, rhs_imm); + } + __ Xor(dst, lhs, rhs_reg); + } + if (cond == kCondEQ) { + __ Sltiu(dst, dst, 1); + } else { + __ Sltu(dst, ZERO, dst); + } + break; + + case kCondLT: + case kCondGE: + if (use_imm && IsInt<16>(rhs_imm)) { + __ Slti(dst, lhs, rhs_imm); + } else { + if (use_imm) { + rhs_reg = TMP; + __ LoadConst64(rhs_reg, rhs_imm); + } + __ Slt(dst, lhs, rhs_reg); + } + if (cond == kCondGE) { + // Simulate lhs >= rhs via !(lhs < rhs) since there's + // only the slt instruction but no sge. + __ Xori(dst, dst, 1); + } + break; + + case kCondLE: + case kCondGT: + if (use_imm && IsInt<16>(rhs_imm_plus_one)) { + // Simulate lhs <= rhs via lhs < rhs + 1. + __ Slti(dst, lhs, rhs_imm_plus_one); + if (cond == kCondGT) { + // Simulate lhs > rhs via !(lhs <= rhs) since there's + // only the slti instruction but no sgti. + __ Xori(dst, dst, 1); + } + } else { + if (use_imm) { + rhs_reg = TMP; + __ LoadConst64(rhs_reg, rhs_imm); + } + __ Slt(dst, rhs_reg, lhs); + if (cond == kCondLE) { + // Simulate lhs <= rhs via !(rhs < lhs) since there's + // only the slt instruction but no sle. + __ Xori(dst, dst, 1); + } + } + break; + + case kCondB: + case kCondAE: + if (use_imm && IsInt<16>(rhs_imm)) { + // Sltiu sign-extends its 16-bit immediate operand before + // the comparison and thus lets us compare directly with + // unsigned values in the ranges [0, 0x7fff] and + // [0x[ffffffff]ffff8000, 0x[ffffffff]ffffffff]. + __ Sltiu(dst, lhs, rhs_imm); + } else { + if (use_imm) { + rhs_reg = TMP; + __ LoadConst64(rhs_reg, rhs_imm); + } + __ Sltu(dst, lhs, rhs_reg); + } + if (cond == kCondAE) { + // Simulate lhs >= rhs via !(lhs < rhs) since there's + // only the sltu instruction but no sgeu. + __ Xori(dst, dst, 1); + } + break; + + case kCondBE: + case kCondA: + if (use_imm && (rhs_imm_plus_one != 0) && IsInt<16>(rhs_imm_plus_one)) { + // Simulate lhs <= rhs via lhs < rhs + 1. + // Note that this only works if rhs + 1 does not overflow + // to 0, hence the check above. + // Sltiu sign-extends its 16-bit immediate operand before + // the comparison and thus lets us compare directly with + // unsigned values in the ranges [0, 0x7fff] and + // [0x[ffffffff]ffff8000, 0x[ffffffff]ffffffff]. + __ Sltiu(dst, lhs, rhs_imm_plus_one); + if (cond == kCondA) { + // Simulate lhs > rhs via !(lhs <= rhs) since there's + // only the sltiu instruction but no sgtiu. + __ Xori(dst, dst, 1); + } + } else { + if (use_imm) { + rhs_reg = TMP; + __ LoadConst64(rhs_reg, rhs_imm); + } + __ Sltu(dst, rhs_reg, lhs); + if (cond == kCondBE) { + // Simulate lhs <= rhs via !(rhs < lhs) since there's + // only the sltu instruction but no sleu. + __ Xori(dst, dst, 1); + } + } + break; + } +} + +void InstructionCodeGeneratorMIPS64::GenerateIntLongCompareAndBranch(IfCondition cond, + bool is64bit, + LocationSummary* locations, + Mips64Label* label) { + GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>(); + Location rhs_location = locations->InAt(1); + GpuRegister rhs_reg = ZERO; + int64_t rhs_imm = 0; + bool use_imm = rhs_location.IsConstant(); + if (use_imm) { + if (is64bit) { + rhs_imm = CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()); + } else { + rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant()); + } + } else { + rhs_reg = rhs_location.AsRegister<GpuRegister>(); + } + + if (use_imm && rhs_imm == 0) { + switch (cond) { + case kCondEQ: + case kCondBE: // <= 0 if zero + __ Beqzc(lhs, label); + break; + case kCondNE: + case kCondA: // > 0 if non-zero + __ Bnezc(lhs, label); + break; + case kCondLT: + __ Bltzc(lhs, label); + break; + case kCondGE: + __ Bgezc(lhs, label); + break; + case kCondLE: + __ Blezc(lhs, label); + break; + case kCondGT: + __ Bgtzc(lhs, label); + break; + case kCondB: // always false + break; + case kCondAE: // always true + __ Bc(label); + break; + } + } else { + if (use_imm) { + rhs_reg = TMP; + __ LoadConst64(rhs_reg, rhs_imm); + } + switch (cond) { + case kCondEQ: + __ Beqc(lhs, rhs_reg, label); + break; + case kCondNE: + __ Bnec(lhs, rhs_reg, label); + break; + case kCondLT: + __ Bltc(lhs, rhs_reg, label); + break; + case kCondGE: + __ Bgec(lhs, rhs_reg, label); + break; + case kCondLE: + __ Bgec(rhs_reg, lhs, label); + break; + case kCondGT: + __ Bltc(rhs_reg, lhs, label); + break; + case kCondB: + __ Bltuc(lhs, rhs_reg, label); + break; + case kCondAE: + __ Bgeuc(lhs, rhs_reg, label); + break; + case kCondBE: + __ Bgeuc(rhs_reg, lhs, label); + break; + case kCondA: + __ Bltuc(rhs_reg, lhs, label); + break; + } + } +} + +void InstructionCodeGeneratorMIPS64::GenerateFpCompareAndBranch(IfCondition cond, + bool gt_bias, + Primitive::Type type, + LocationSummary* locations, + Mips64Label* label) { + FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>(); + FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>(); + if (type == Primitive::kPrimFloat) { + switch (cond) { + case kCondEQ: + __ CmpEqS(FTMP, lhs, rhs); + __ Bc1nez(FTMP, label); + break; + case kCondNE: + __ CmpEqS(FTMP, lhs, rhs); + __ Bc1eqz(FTMP, label); + break; + case kCondLT: + if (gt_bias) { + __ CmpLtS(FTMP, lhs, rhs); + } else { + __ CmpUltS(FTMP, lhs, rhs); + } + __ Bc1nez(FTMP, label); + break; + case kCondLE: + if (gt_bias) { + __ CmpLeS(FTMP, lhs, rhs); + } else { + __ CmpUleS(FTMP, lhs, rhs); + } + __ Bc1nez(FTMP, label); + break; + case kCondGT: + if (gt_bias) { + __ CmpUltS(FTMP, rhs, lhs); + } else { + __ CmpLtS(FTMP, rhs, lhs); + } + __ Bc1nez(FTMP, label); + break; + case kCondGE: + if (gt_bias) { + __ CmpUleS(FTMP, rhs, lhs); + } else { + __ CmpLeS(FTMP, rhs, lhs); + } + __ Bc1nez(FTMP, label); + break; + default: + LOG(FATAL) << "Unexpected non-floating-point condition"; + } + } else { + DCHECK_EQ(type, Primitive::kPrimDouble); + switch (cond) { + case kCondEQ: + __ CmpEqD(FTMP, lhs, rhs); + __ Bc1nez(FTMP, label); + break; + case kCondNE: + __ CmpEqD(FTMP, lhs, rhs); + __ Bc1eqz(FTMP, label); + break; + case kCondLT: + if (gt_bias) { + __ CmpLtD(FTMP, lhs, rhs); + } else { + __ CmpUltD(FTMP, lhs, rhs); + } + __ Bc1nez(FTMP, label); + break; + case kCondLE: + if (gt_bias) { + __ CmpLeD(FTMP, lhs, rhs); + } else { + __ CmpUleD(FTMP, lhs, rhs); + } + __ Bc1nez(FTMP, label); + break; + case kCondGT: + if (gt_bias) { + __ CmpUltD(FTMP, rhs, lhs); + } else { + __ CmpLtD(FTMP, rhs, lhs); + } + __ Bc1nez(FTMP, label); + break; + case kCondGE: + if (gt_bias) { + __ CmpUleD(FTMP, rhs, lhs); + } else { + __ CmpLeD(FTMP, rhs, lhs); + } + __ Bc1nez(FTMP, label); + break; + default: + LOG(FATAL) << "Unexpected non-floating-point condition"; + } + } +} + void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruction, size_t condition_input_index, Mips64Label* true_target, @@ -2420,97 +2679,27 @@ void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruc // The condition instruction has not been materialized, use its inputs as // the comparison and its condition as the branch condition. HCondition* condition = cond->AsCondition(); + Primitive::Type type = condition->InputAt(0)->GetType(); + LocationSummary* locations = cond->GetLocations(); + IfCondition if_cond = condition->GetCondition(); + Mips64Label* branch_target = true_target; - GpuRegister lhs = condition->GetLocations()->InAt(0).AsRegister<GpuRegister>(); - Location rhs_location = condition->GetLocations()->InAt(1); - GpuRegister rhs_reg = ZERO; - int32_t rhs_imm = 0; - bool use_imm = rhs_location.IsConstant(); - if (use_imm) { - rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant()); - } else { - rhs_reg = rhs_location.AsRegister<GpuRegister>(); - } - - IfCondition if_cond; - Mips64Label* non_fallthrough_target; if (true_target == nullptr) { if_cond = condition->GetOppositeCondition(); - non_fallthrough_target = false_target; - } else { - if_cond = condition->GetCondition(); - non_fallthrough_target = true_target; - } - - if (use_imm && rhs_imm == 0) { - switch (if_cond) { - case kCondEQ: - __ Beqzc(lhs, non_fallthrough_target); - break; - case kCondNE: - __ Bnezc(lhs, non_fallthrough_target); - break; - case kCondLT: - __ Bltzc(lhs, non_fallthrough_target); - break; - case kCondGE: - __ Bgezc(lhs, non_fallthrough_target); - break; - case kCondLE: - __ Blezc(lhs, non_fallthrough_target); - break; - case kCondGT: - __ Bgtzc(lhs, non_fallthrough_target); - break; - case kCondB: - break; // always false - case kCondBE: - __ Beqzc(lhs, non_fallthrough_target); // <= 0 if zero - break; - case kCondA: - __ Bnezc(lhs, non_fallthrough_target); // > 0 if non-zero - break; - case kCondAE: - __ Bc(non_fallthrough_target); // always true - break; - } - } else { - if (use_imm) { - rhs_reg = TMP; - __ LoadConst32(rhs_reg, rhs_imm); - } - switch (if_cond) { - case kCondEQ: - __ Beqc(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondNE: - __ Bnec(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondLT: - __ Bltc(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondGE: - __ Bgec(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondLE: - __ Bgec(rhs_reg, lhs, non_fallthrough_target); - break; - case kCondGT: - __ Bltc(rhs_reg, lhs, non_fallthrough_target); - break; - case kCondB: - __ Bltuc(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondAE: - __ Bgeuc(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondBE: - __ Bgeuc(rhs_reg, lhs, non_fallthrough_target); - break; - case kCondA: - __ Bltuc(rhs_reg, lhs, non_fallthrough_target); - break; - } + branch_target = false_target; + } + + switch (type) { + default: + GenerateIntLongCompareAndBranch(if_cond, /* is64bit */ false, locations, branch_target); + break; + case Primitive::kPrimLong: + GenerateIntLongCompareAndBranch(if_cond, /* is64bit */ true, locations, branch_target); + break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + GenerateFpCompareAndBranch(if_cond, condition->IsGtBias(), type, locations, branch_target); + break; } } @@ -3991,17 +4180,34 @@ void InstructionCodeGeneratorMIPS64::VisitPackedSwitch(HPackedSwitch* switch_ins GpuRegister value_reg = locations->InAt(0).AsRegister<GpuRegister>(); HBasicBlock* default_block = switch_instr->GetDefaultBlock(); - // Create a series of compare/jumps. + // Create a set of compare/jumps. + GpuRegister temp_reg = TMP; + if (IsInt<16>(-lower_bound)) { + __ Addiu(temp_reg, value_reg, -lower_bound); + } else { + __ LoadConst32(AT, -lower_bound); + __ Addu(temp_reg, value_reg, AT); + } + // Jump to default if index is negative + // Note: We don't check the case that index is positive while value < lower_bound, because in + // this case, index >= num_entries must be true. So that we can save one branch instruction. + __ Bltzc(temp_reg, codegen_->GetLabelOf(default_block)); + const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); - for (int32_t i = 0; i < num_entries; i++) { - int32_t case_value = lower_bound + i; - Mips64Label* succ = codegen_->GetLabelOf(successors[i]); - if (case_value == 0) { - __ Beqzc(value_reg, succ); - } else { - __ LoadConst32(TMP, case_value); - __ Beqc(value_reg, TMP, succ); - } + // Jump to successors[0] if value == lower_bound. + __ Beqzc(temp_reg, codegen_->GetLabelOf(successors[0])); + int32_t last_index = 0; + for (; num_entries - last_index > 2; last_index += 2) { + __ Addiu(temp_reg, temp_reg, -2); + // Jump to successors[last_index + 1] if value < case_value[last_index + 2]. + __ Bltzc(temp_reg, codegen_->GetLabelOf(successors[last_index + 1])); + // Jump to successors[last_index + 2] if value == case_value[last_index + 2]. + __ Beqzc(temp_reg, codegen_->GetLabelOf(successors[last_index + 2])); + } + if (num_entries - last_index == 2) { + // The last missing case_value. + __ Addiu(temp_reg, temp_reg, -1); + __ Beqzc(temp_reg, codegen_->GetLabelOf(successors[last_index + 1])); } // And the default for any other value. diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index 85e3a4a3ce..1593cec2a6 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -237,6 +237,16 @@ class InstructionCodeGeneratorMIPS64 : public HGraphVisitor { void DivRemByPowerOfTwo(HBinaryOperation* instruction); void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); void GenerateDivRemIntegral(HBinaryOperation* instruction); + void GenerateIntLongCompare(IfCondition cond, bool is64bit, LocationSummary* locations); + void GenerateIntLongCompareAndBranch(IfCondition cond, + bool is64bit, + LocationSummary* locations, + Mips64Label* label); + void GenerateFpCompareAndBranch(IfCondition cond, + bool gt_bias, + Primitive::Type type, + LocationSummary* locations, + Mips64Label* label); void HandleGoto(HInstruction* got, HBasicBlock* successor); Mips64Assembler* const assembler_; diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index bc3256ec8c..7a5b8dbe46 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -42,7 +42,6 @@ namespace x86 { static constexpr int kCurrentMethodStackOffset = 0; static constexpr Register kMethodRegisterArgument = EAX; - static constexpr Register kCoreCalleeSaves[] = { EBP, ESI, EDI }; static constexpr int kC2ConditionMask = 0x400; @@ -4157,7 +4156,7 @@ void CodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) { */ switch (kind) { case MemBarrierKind::kAnyAny: { - __ mfence(); + MemoryFence(); break; } case MemBarrierKind::kAnyStore: @@ -6752,31 +6751,67 @@ void LocationsBuilderX86::VisitPackedSwitch(HPackedSwitch* switch_instr) { locations->SetInAt(0, Location::RequiresRegister()); } -void InstructionCodeGeneratorX86::VisitPackedSwitch(HPackedSwitch* switch_instr) { - int32_t lower_bound = switch_instr->GetStartValue(); - int32_t num_entries = switch_instr->GetNumEntries(); - LocationSummary* locations = switch_instr->GetLocations(); - Register value_reg = locations->InAt(0).AsRegister<Register>(); - HBasicBlock* default_block = switch_instr->GetDefaultBlock(); +void InstructionCodeGeneratorX86::GenPackedSwitchWithCompares(Register value_reg, + int32_t lower_bound, + uint32_t num_entries, + HBasicBlock* switch_block, + HBasicBlock* default_block) { + // Figure out the correct compare values and jump conditions. + // Handle the first compare/branch as a special case because it might + // jump to the default case. + DCHECK_GT(num_entries, 2u); + Condition first_condition; + uint32_t index; + const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors(); + if (lower_bound != 0) { + first_condition = kLess; + __ cmpl(value_reg, Immediate(lower_bound)); + __ j(first_condition, codegen_->GetLabelOf(default_block)); + __ j(kEqual, codegen_->GetLabelOf(successors[0])); - // Create a series of compare/jumps. - const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); - for (int i = 0; i < num_entries; i++) { - int32_t case_value = lower_bound + i; - if (case_value == 0) { - __ testl(value_reg, value_reg); - } else { - __ cmpl(value_reg, Immediate(case_value)); - } - __ j(kEqual, codegen_->GetLabelOf(successors[i])); + index = 1; + } else { + // Handle all the compare/jumps below. + first_condition = kBelow; + index = 0; + } + + // Handle the rest of the compare/jumps. + for (; index + 1 < num_entries; index += 2) { + int32_t compare_to_value = lower_bound + index + 1; + __ cmpl(value_reg, Immediate(compare_to_value)); + // Jump to successors[index] if value < case_value[index]. + __ j(first_condition, codegen_->GetLabelOf(successors[index])); + // Jump to successors[index + 1] if value == case_value[index + 1]. + __ j(kEqual, codegen_->GetLabelOf(successors[index + 1])); + } + + if (index != num_entries) { + // There are an odd number of entries. Handle the last one. + DCHECK_EQ(index + 1, num_entries); + __ cmpl(value_reg, Immediate(lower_bound + index)); + __ j(kEqual, codegen_->GetLabelOf(successors[index])); } // And the default for any other value. - if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) { - __ jmp(codegen_->GetLabelOf(default_block)); + if (!codegen_->GoesToNextBlock(switch_block, default_block)) { + __ jmp(codegen_->GetLabelOf(default_block)); } } +void InstructionCodeGeneratorX86::VisitPackedSwitch(HPackedSwitch* switch_instr) { + int32_t lower_bound = switch_instr->GetStartValue(); + uint32_t num_entries = switch_instr->GetNumEntries(); + LocationSummary* locations = switch_instr->GetLocations(); + Register value_reg = locations->InAt(0).AsRegister<Register>(); + + GenPackedSwitchWithCompares(value_reg, + lower_bound, + num_entries, + switch_instr->GetBlock(), + switch_instr->GetDefaultBlock()); +} + void LocationsBuilderX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall); @@ -6791,11 +6826,20 @@ void LocationsBuilderX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) { void InstructionCodeGeneratorX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) { int32_t lower_bound = switch_instr->GetStartValue(); - int32_t num_entries = switch_instr->GetNumEntries(); + uint32_t num_entries = switch_instr->GetNumEntries(); LocationSummary* locations = switch_instr->GetLocations(); Register value_reg = locations->InAt(0).AsRegister<Register>(); HBasicBlock* default_block = switch_instr->GetDefaultBlock(); + if (num_entries <= kPackedSwitchJumpTableThreshold) { + GenPackedSwitchWithCompares(value_reg, + lower_bound, + num_entries, + switch_instr->GetBlock(), + default_block); + return; + } + // Optimizing has a jump area. Register temp_reg = locations->GetTemp(0).AsRegister<Register>(); Register constant_area = locations->InAt(1).AsRegister<Register>(); @@ -6807,7 +6851,7 @@ void InstructionCodeGeneratorX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_ } // Is the value in range? - DCHECK_GE(num_entries, 1); + DCHECK_GE(num_entries, 1u); __ cmpl(value_reg, Immediate(num_entries - 1)); __ j(kAbove, codegen_->GetLabelOf(default_block)); diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 7c292fa103..f0ead0356d 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_H_ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_H_ +#include "arch/x86/instruction_set_features_x86.h" #include "code_generator.h" #include "dex/compiler_enums.h" #include "driver/compiler_options.h" @@ -195,6 +196,11 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { X86Assembler* GetAssembler() const { return assembler_; } + // The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump + // table version generates 7 instructions and num_entries literals. Compare/jump sequence will + // generates less code/data with a small num_entries. + static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5; + private: // Generate code for the given suspend check. If not null, `successor` // is the block to branch to if the suspend check is not needed, and after @@ -269,6 +275,11 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { void GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label); void GenerateLongComparesAndJumps(HCondition* cond, Label* true_label, Label* false_label); void HandleGoto(HInstruction* got, HBasicBlock* successor); + void GenPackedSwitchWithCompares(Register value_reg, + int32_t lower_bound, + uint32_t num_entries, + HBasicBlock* switch_block, + HBasicBlock* default_block); X86Assembler* const assembler_; CodeGeneratorX86* const codegen_; @@ -496,6 +507,19 @@ class CodeGeneratorX86 : public CodeGenerator { // artReadBarrierForRootSlow. void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root); + // Ensure that prior stores complete to memory before subsequent loads. + // The locked add implementation will avoid serializing device memory, but will + // touch (but not change) the top of the stack. + // The 'non_temporal' parameter should be used to ensure ordering of non-temporal stores. + void MemoryFence(bool non_temporal = false) { + if (!non_temporal && isa_features_.PrefersLockedAddSynchronization()) { + assembler_.lock()->addl(Address(ESP, 0), Immediate(0)); + } else { + assembler_.mfence(); + } + } + + private: // Factored implementation of GenerateFieldLoadWithBakerReadBarrier // and GenerateArrayLoadWithBakerReadBarrier. diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 92cef5f226..1e6d50610b 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -41,6 +41,10 @@ namespace x86_64 { static constexpr int kCurrentMethodStackOffset = 0; static constexpr Register kMethodRegisterArgument = RDI; +// The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump +// table version generates 7 instructions and num_entries literals. Compare/jump sequence will +// generates less code/data with a small num_entries. +static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5; static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 }; static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 }; @@ -4029,7 +4033,7 @@ void CodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) { */ switch (kind) { case MemBarrierKind::kAnyAny: { - __ mfence(); + MemoryFence(); break; } case MemBarrierKind::kAnyStore: @@ -6331,11 +6335,58 @@ void LocationsBuilderX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) { void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) { int32_t lower_bound = switch_instr->GetStartValue(); - int32_t num_entries = switch_instr->GetNumEntries(); + uint32_t num_entries = switch_instr->GetNumEntries(); LocationSummary* locations = switch_instr->GetLocations(); CpuRegister value_reg_in = locations->InAt(0).AsRegister<CpuRegister>(); CpuRegister temp_reg = locations->GetTemp(0).AsRegister<CpuRegister>(); CpuRegister base_reg = locations->GetTemp(1).AsRegister<CpuRegister>(); + HBasicBlock* default_block = switch_instr->GetDefaultBlock(); + + // Should we generate smaller inline compare/jumps? + if (num_entries <= kPackedSwitchJumpTableThreshold) { + // Figure out the correct compare values and jump conditions. + // Handle the first compare/branch as a special case because it might + // jump to the default case. + DCHECK_GT(num_entries, 2u); + Condition first_condition; + uint32_t index; + const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); + if (lower_bound != 0) { + first_condition = kLess; + __ cmpl(value_reg_in, Immediate(lower_bound)); + __ j(first_condition, codegen_->GetLabelOf(default_block)); + __ j(kEqual, codegen_->GetLabelOf(successors[0])); + + index = 1; + } else { + // Handle all the compare/jumps below. + first_condition = kBelow; + index = 0; + } + + // Handle the rest of the compare/jumps. + for (; index + 1 < num_entries; index += 2) { + int32_t compare_to_value = lower_bound + index + 1; + __ cmpl(value_reg_in, Immediate(compare_to_value)); + // Jump to successors[index] if value < case_value[index]. + __ j(first_condition, codegen_->GetLabelOf(successors[index])); + // Jump to successors[index + 1] if value == case_value[index + 1]. + __ j(kEqual, codegen_->GetLabelOf(successors[index + 1])); + } + + if (index != num_entries) { + // There are an odd number of entries. Handle the last one. + DCHECK_EQ(index + 1, num_entries); + __ cmpl(value_reg_in, Immediate(lower_bound + index)); + __ j(kEqual, codegen_->GetLabelOf(successors[index])); + } + + // And the default for any other value. + if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) { + __ jmp(codegen_->GetLabelOf(default_block)); + } + return; + } // Remove the bias, if needed. Register value_reg_out = value_reg_in.AsRegister(); @@ -6346,7 +6397,6 @@ void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_ins CpuRegister value_reg(value_reg_out); // Is the value in range? - HBasicBlock* default_block = switch_instr->GetDefaultBlock(); __ cmpl(value_reg, Immediate(num_entries - 1)); __ j(kAbove, codegen_->GetLabelOf(default_block)); diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index dda9ea22d9..e5a487c761 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_ +#include "arch/x86_64/instruction_set_features_x86_64.h" #include "code_generator.h" #include "dex/compiler_enums.h" #include "driver/compiler_options.h" @@ -479,6 +480,18 @@ class CodeGeneratorX86_64 : public CodeGenerator { int64_t v, HInstruction* instruction); + // Ensure that prior stores complete to memory before subsequent loads. + // The locked add implementation will avoid serializing device memory, but will + // touch (but not change) the top of the stack. The locked add should not be used for + // ordering non-temporal stores. + void MemoryFence(bool force_mfence = false) { + if (!force_mfence && isa_features_.PrefersLockedAddSynchronization()) { + assembler_.lock()->addl(Address(CpuRegister(RSP), 0), Immediate(0)); + } else { + assembler_.mfence(); + } + } + private: // Factored implementation of GenerateFieldLoadWithBakerReadBarrier // and GenerateArrayLoadWithBakerReadBarrier. diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index 67097deaeb..c504ded54c 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -777,13 +777,6 @@ void InstructionSimplifierVisitor::VisitLessThanOrEqual(HLessThanOrEqual* condit void InstructionSimplifierVisitor::VisitCondition(HCondition* condition) { // Try to fold an HCompare into this HCondition. - // This simplification is currently supported on x86, x86_64, ARM and ARM64. - // TODO: Implement it for MIPS64. - InstructionSet instruction_set = GetGraph()->GetInstructionSet(); - if (instruction_set == kMips64) { - return; - } - HInstruction* left = condition->GetLeft(); HInstruction* right = condition->GetRight(); // We can only replace an HCondition which compares a Compare to 0. diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index e8181bbb06..4683aee603 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -825,8 +825,15 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat Label loop_head; __ Bind(&loop_head); + // TODO: When `type == Primitive::kPrimNot`, add a read barrier for + // the reference stored in the object before attempting the CAS, + // similar to the one in the art::Unsafe_compareAndSwapObject JNI + // implementation. + // + // Note that this code is not (yet) used when read barriers are + // enabled (see IntrinsicLocationsBuilderARM::VisitUnsafeCASObject). + DCHECK(!(type == Primitive::kPrimNot && kEmitCompilerReadBarrier)); __ ldrex(tmp_lo, tmp_ptr); - // TODO: Do we need a read barrier here when `type == Primitive::kPrimNot`? __ subs(tmp_lo, tmp_lo, ShifterOperand(expected_lo)); @@ -852,15 +859,17 @@ void IntrinsicLocationsBuilderARM::VisitUnsafeCASInt(HInvoke* invoke) { CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke); } void IntrinsicLocationsBuilderARM::VisitUnsafeCASObject(HInvoke* invoke) { - // The UnsafeCASObject intrinsic does not always work when heap - // poisoning is enabled (it breaks run-test 004-UnsafeTest); turn it - // off temporarily as a quick fix. + // The UnsafeCASObject intrinsic is missing a read barrier, and + // therefore sometimes does not work as expected (b/25883050). + // Turn it off temporarily as a quick fix, until the read barrier is + // implemented (see TODO in GenCAS below). // - // TODO(rpl): Fix it and turn it back on. + // Also, the UnsafeCASObject intrinsic does not always work when heap + // poisoning is enabled (it breaks run-test 004-UnsafeTest); turn it + // off temporarily as a quick fix (b/26204023). // - // TODO(rpl): Also, we should investigate whether we need a read - // barrier in the generated code. - if (kPoisonHeapReferences) { + // TODO(rpl): Fix these two issues and re-enable this intrinsic. + if (kEmitCompilerReadBarrier || kPoisonHeapReferences) { return; } diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 6b34daadf0..9f6863cf6e 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -1031,10 +1031,15 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat } else { __ Dmb(InnerShareable, BarrierWrites); __ Bind(&loop_head); - __ Ldxr(tmp_value, MemOperand(tmp_ptr)); - // TODO: Do we need a read barrier here when `type == Primitive::kPrimNot`? + // TODO: When `type == Primitive::kPrimNot`, add a read barrier for + // the reference stored in the object before attempting the CAS, + // similar to the one in the art::Unsafe_compareAndSwapObject JNI + // implementation. + // // Note that this code is not (yet) used when read barriers are // enabled (see IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject). + DCHECK(!(type == Primitive::kPrimNot && kEmitCompilerReadBarrier)); + __ Ldxr(tmp_value, MemOperand(tmp_ptr)); __ Cmp(tmp_value, expected); __ B(&exit_loop, ne); __ Stxr(tmp_32, value, MemOperand(tmp_ptr)); @@ -1057,15 +1062,17 @@ void IntrinsicLocationsBuilderARM64::VisitUnsafeCASLong(HInvoke* invoke) { CreateIntIntIntIntIntToInt(arena_, invoke); } void IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject(HInvoke* invoke) { - // The UnsafeCASObject intrinsic does not always work when heap - // poisoning is enabled (it breaks run-test 004-UnsafeTest); turn it - // off temporarily as a quick fix. + // The UnsafeCASObject intrinsic is missing a read barrier, and + // therefore sometimes does not work as expected (b/25883050). + // Turn it off temporarily as a quick fix, until the read barrier is + // implemented (see TODO in GenCAS below). // - // TODO(rpl): Fix it and turn it back on. + // Also, the UnsafeCASObject intrinsic does not always work when heap + // poisoning is enabled (it breaks run-test 004-UnsafeTest); turn it + // off temporarily as a quick fix (b/26204023). // - // TODO(rpl): Also, we should investigate whether we need a read - // barrier in the generated code. - if (kPoisonHeapReferences) { + // TODO(rpl): Fix these two issues and re-enable this intrinsic. + if (kEmitCompilerReadBarrier || kPoisonHeapReferences) { return; } diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index 8aa7d9ff6f..8b45ea7c4f 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -1299,6 +1299,8 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat if (type == Primitive::kPrimLong) { __ Lld(out, TMP); } else { + // Note: We will need a read barrier here, when read barrier + // support is added to the MIPS64 back end. __ Ll(out, TMP); } __ Dsubu(out, out, expected); // If we didn't get the 'expected' diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index fd454d8322..80190629ee 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -2005,7 +2005,7 @@ static void GenUnsafePut(LocationSummary* locations, } if (is_volatile) { - __ mfence(); + codegen->MemoryFence(); } if (type == Primitive::kPrimNot) { @@ -2085,6 +2085,17 @@ void IntrinsicLocationsBuilderX86::VisitUnsafeCASLong(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) { + // The UnsafeCASObject intrinsic is missing a read barrier, and + // therefore sometimes does not work as expected (b/25883050). + // Turn it off temporarily as a quick fix, until the read barrier is + // implemented. + // + // TODO(rpl): Implement a read barrier in GenCAS below and re-enable + // this intrinsic. + if (kEmitCompilerReadBarrier) { + return; + } + CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke); } @@ -2136,6 +2147,13 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code __ PoisonHeapReference(value); } + // TODO: Add a read barrier for the reference stored in the object + // before attempting the CAS, similar to the one in the + // art::Unsafe_compareAndSwapObject JNI implementation. + // + // Note that this code is not (yet) used when read barriers are + // enabled (see IntrinsicLocationsBuilderX86::VisitUnsafeCASObject). + DCHECK(!kEmitCompilerReadBarrier); __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value); // LOCK CMPXCHG has full barrier semantics, and we don't need @@ -2145,11 +2163,8 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code __ setb(kZero, out.AsRegister<Register>()); __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>()); - // In the case of the `UnsafeCASObject` intrinsic, accessing an - // object in the heap with LOCK CMPXCHG does not require a read - // barrier, as we do not keep a reference to this heap location. - // However, if heap poisoning is enabled, we need to unpoison the - // values that were poisoned earlier. + // If heap poisoning is enabled, we need to unpoison the values + // that were poisoned earlier. if (kPoisonHeapReferences) { if (base_equals_value) { // `value` has been moved to a temporary register, no need to diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index ce737e3f7e..aa1c109738 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -2080,7 +2080,7 @@ static void GenUnsafePut(LocationSummary* locations, Primitive::Type type, bool } if (is_volatile) { - __ mfence(); + codegen->MemoryFence(); } if (type == Primitive::kPrimNot) { @@ -2150,6 +2150,17 @@ void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) { + // The UnsafeCASObject intrinsic is missing a read barrier, and + // therefore sometimes does not work as expected (b/25883050). + // Turn it off temporarily as a quick fix, until the read barrier is + // implemented. + // + // TODO(rpl): Implement a read barrier in GenCAS below and re-enable + // this intrinsic. + if (kEmitCompilerReadBarrier) { + return; + } + CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke); } @@ -2200,6 +2211,13 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* c __ PoisonHeapReference(CpuRegister(value_reg)); } + // TODO: Add a read barrier for the reference stored in the object + // before attempting the CAS, similar to the one in the + // art::Unsafe_compareAndSwapObject JNI implementation. + // + // Note that this code is not (yet) used when read barriers are + // enabled (see IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject). + DCHECK(!kEmitCompilerReadBarrier); __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), CpuRegister(value_reg)); // LOCK CMPXCHG has full barrier semantics, and we don't need @@ -2209,11 +2227,8 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* c __ setcc(kZero, out); __ movzxb(out, out); - // In the case of the `UnsafeCASObject` intrinsic, accessing an - // object in the heap with LOCK CMPXCHG does not require a read - // barrier, as we do not keep a reference to this heap location. - // However, if heap poisoning is enabled, we need to unpoison the - // values that were poisoned earlier. + // If heap poisoning is enabled, we need to unpoison the values + // that were poisoned earlier. if (kPoisonHeapReferences) { if (base_equals_value) { // `value_reg` has been moved to a temporary register, no need diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc index adde00464b..727f2bb717 100644 --- a/compiler/optimizing/load_store_elimination.cc +++ b/compiler/optimizing/load_store_elimination.cc @@ -119,10 +119,16 @@ class HeapLocation : public ArenaObject<kArenaAllocMisc> { : ref_info_(ref_info), offset_(offset), index_(index), - declaring_class_def_index_(declaring_class_def_index) { + declaring_class_def_index_(declaring_class_def_index), + value_killed_by_loop_side_effects_(true) { DCHECK(ref_info != nullptr); DCHECK((offset == kInvalidFieldOffset && index != nullptr) || (offset != kInvalidFieldOffset && index == nullptr)); + if (ref_info->IsSingleton() && !IsArrayElement()) { + // Assume this location's value cannot be killed by loop side effects + // until proven otherwise. + value_killed_by_loop_side_effects_ = false; + } } ReferenceInfo* GetReferenceInfo() const { return ref_info_; } @@ -139,11 +145,22 @@ class HeapLocation : public ArenaObject<kArenaAllocMisc> { return index_ != nullptr; } + bool IsValueKilledByLoopSideEffects() const { + return value_killed_by_loop_side_effects_; + } + + void SetValueKilledByLoopSideEffects(bool val) { + value_killed_by_loop_side_effects_ = val; + } + private: ReferenceInfo* const ref_info_; // reference for instance/static field or array access. const size_t offset_; // offset of static/instance field. HInstruction* const index_; // index of an array element. const int16_t declaring_class_def_index_; // declaring class's def's dex index. + bool value_killed_by_loop_side_effects_; // value of this location may be killed by loop + // side effects because this location is stored + // into inside a loop. DISALLOW_COPY_AND_ASSIGN(HeapLocation); }; @@ -370,13 +387,13 @@ class HeapLocationCollector : public HGraphVisitor { return heap_locations_[heap_location_idx]; } - void VisitFieldAccess(HInstruction* ref, const FieldInfo& field_info) { + HeapLocation* VisitFieldAccess(HInstruction* ref, const FieldInfo& field_info) { if (field_info.IsVolatile()) { has_volatile_ = true; } const uint16_t declaring_class_def_index = field_info.GetDeclaringClassDefIndex(); const size_t offset = field_info.GetFieldOffset().SizeValue(); - GetOrCreateHeapLocation(ref, offset, nullptr, declaring_class_def_index); + return GetOrCreateHeapLocation(ref, offset, nullptr, declaring_class_def_index); } void VisitArrayAccess(HInstruction* array, HInstruction* index) { @@ -390,8 +407,11 @@ class HeapLocationCollector : public HGraphVisitor { } void VisitInstanceFieldSet(HInstanceFieldSet* instruction) OVERRIDE { - VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); + HeapLocation* location = VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); has_heap_stores_ = true; + if (instruction->GetBlock()->GetLoopInformation() != nullptr) { + location->SetValueKilledByLoopSideEffects(true); + } } void VisitStaticFieldGet(HStaticFieldGet* instruction) OVERRIDE { @@ -565,23 +585,26 @@ class LSEVisitor : public HGraphVisitor { HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader(); ArenaVector<HInstruction*>& pre_header_heap_values = heap_values_for_[pre_header->GetBlockId()]; + // Inherit the values from pre-header. + for (size_t i = 0; i < heap_values.size(); i++) { + heap_values[i] = pre_header_heap_values[i]; + } + // We do a single pass in reverse post order. For loops, use the side effects as a hint // to see if the heap values should be killed. if (side_effects_.GetLoopEffects(block).DoesAnyWrite()) { - for (size_t i = 0; i < pre_header_heap_values.size(); i++) { - // heap value is killed by loop side effects, need to keep the last store. - KeepIfIsStore(pre_header_heap_values[i]); - } - if (kIsDebugBuild) { - // heap_values should all be kUnknownHeapValue that it is inited with. - for (size_t i = 0; i < heap_values.size(); i++) { - DCHECK_EQ(heap_values[i], kUnknownHeapValue); - } - } - } else { - // Inherit the values from pre-header. for (size_t i = 0; i < heap_values.size(); i++) { - heap_values[i] = pre_header_heap_values[i]; + HeapLocation* location = heap_location_collector_.GetHeapLocation(i); + ReferenceInfo* ref_info = location->GetReferenceInfo(); + if (!ref_info->IsSingleton() || location->IsValueKilledByLoopSideEffects()) { + // heap value is killed by loop side effects (stored into directly, or due to + // aliasing). + KeepIfIsStore(pre_header_heap_values[i]); + heap_values[i] = kUnknownHeapValue; + } else { + // A singleton's field that's not stored into inside a loop is invariant throughout + // the loop. + } } } } @@ -655,6 +678,16 @@ class LSEVisitor : public HGraphVisitor { } } + static bool IsIntFloatAlias(Primitive::Type type1, Primitive::Type type2) { + return (type1 == Primitive::kPrimFloat && type2 == Primitive::kPrimInt) || + (type2 == Primitive::kPrimFloat && type1 == Primitive::kPrimInt); + } + + static bool IsLongDoubleAlias(Primitive::Type type1, Primitive::Type type2) { + return (type1 == Primitive::kPrimDouble && type2 == Primitive::kPrimLong) || + (type2 == Primitive::kPrimDouble && type1 == Primitive::kPrimLong); + } + void VisitGetLocation(HInstruction* instruction, HInstruction* ref, size_t offset, @@ -686,7 +719,8 @@ class LSEVisitor : public HGraphVisitor { if ((heap_value != kUnknownHeapValue) && // Keep the load due to possible I/F, J/D array aliasing. // See b/22538329 for details. - (heap_value->GetType() == instruction->GetType())) { + !IsIntFloatAlias(heap_value->GetType(), instruction->GetType()) && + !IsLongDoubleAlias(heap_value->GetType(), instruction->GetType())) { removed_loads_.push_back(instruction); substitute_instructions_for_loads_.push_back(heap_value); TryRemovingNullCheck(instruction); @@ -751,6 +785,9 @@ class LSEVisitor : public HGraphVisitor { if (loop_info != nullptr) { // instruction is a store in the loop so the loop must does write. DCHECK(side_effects_.GetLoopEffects(loop_info->GetHeader()).DoesAnyWrite()); + // If it's a singleton, IsValueKilledByLoopSideEffects() must be true. + DCHECK(!ref_info->IsSingleton() || + heap_location_collector_.GetHeapLocation(idx)->IsValueKilledByLoopSideEffects()); if (loop_info->IsDefinedOutOfTheLoop(original_ref)) { DCHECK(original_ref->GetBlock()->Dominates(loop_info->GetPreHeader())); diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc index 30bcf19c64..176c50ce21 100644 --- a/compiler/optimizing/parallel_move_resolver.cc +++ b/compiler/optimizing/parallel_move_resolver.cc @@ -169,7 +169,7 @@ MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) { // If `other_move` was swapped, we iterate again to find a new // potential cycle. required_swap = nullptr; - i = 0; + i = -1; } else if (required_swap != nullptr) { // A move is required to swap. We walk back the cycle to find the // move by just returning from this `PerforrmMove`. diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc index 46e6f3e5d0..5e8fe37669 100644 --- a/compiler/optimizing/parallel_move_test.cc +++ b/compiler/optimizing/parallel_move_test.cc @@ -609,4 +609,36 @@ TYPED_TEST(ParallelMoveTest, CyclesWith64BitsMoves) { } } +TYPED_TEST(ParallelMoveTest, CyclesWith64BitsMoves2) { + ArenaPool pool; + ArenaAllocator allocator(&pool); + + { + TypeParam resolver(&allocator); + HParallelMove* moves = new (&allocator) HParallelMove(&allocator); + moves->AddMove( + Location::RegisterLocation(0), + Location::RegisterLocation(3), + Primitive::kPrimInt, + nullptr); + moves->AddMove( + Location::RegisterPairLocation(2, 3), + Location::RegisterPairLocation(0, 1), + Primitive::kPrimLong, + nullptr); + moves->AddMove( + Location::RegisterLocation(7), + Location::RegisterLocation(2), + Primitive::kPrimInt, + nullptr); + resolver.EmitNativeCode(moves); + if (TestFixture::has_swap) { + ASSERT_STREQ("(2,3 <-> 0,1) (2 -> 3) (7 -> 2)", resolver.GetMessage().c_str()); + } else { + ASSERT_STREQ("(2,3 -> T0,T1) (0 -> 3) (T0,T1 -> 0,1) (7 -> 2)", + resolver.GetMessage().c_str()); + } + } +} + } // namespace art diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc index b383f1e1ad..a385448104 100644 --- a/compiler/optimizing/pc_relative_fixups_x86.cc +++ b/compiler/optimizing/pc_relative_fixups_x86.cc @@ -15,6 +15,7 @@ */ #include "pc_relative_fixups_x86.h" +#include "code_generator_x86.h" namespace art { namespace x86 { @@ -79,6 +80,10 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { } void VisitPackedSwitch(HPackedSwitch* switch_insn) OVERRIDE { + if (switch_insn->GetNumEntries() <= + InstructionCodeGeneratorX86::kPackedSwitchJumpTableThreshold) { + return; + } // We need to replace the HPackedSwitch with a HX86PackedSwitch in order to // address the constant area. InitializePCRelativeBasePointer(); diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h index 98a1a8f9a1..b79c2f0f4e 100644 --- a/compiler/utils/arm/assembler_arm.h +++ b/compiler/utils/arm/assembler_arm.h @@ -878,7 +878,15 @@ class ArmAssembler : public Assembler { Register rn, Opcode opcode, uint32_t immediate, + SetCc set_cc, ShifterOperand* shifter_op) = 0; + bool ShifterOperandCanHold(Register rd, + Register rn, + Opcode opcode, + uint32_t immediate, + ShifterOperand* shifter_op) { + return ShifterOperandCanHold(rd, rn, opcode, immediate, kCcDontCare, shifter_op); + } virtual bool ShifterOperandCanAlwaysHold(uint32_t immediate) = 0; diff --git a/compiler/utils/arm/assembler_arm32.cc b/compiler/utils/arm/assembler_arm32.cc index a7dbacd3a9..ebca25bbf9 100644 --- a/compiler/utils/arm/assembler_arm32.cc +++ b/compiler/utils/arm/assembler_arm32.cc @@ -57,6 +57,7 @@ bool Arm32Assembler::ShifterOperandCanHold(Register rd ATTRIBUTE_UNUSED, Register rn ATTRIBUTE_UNUSED, Opcode opcode ATTRIBUTE_UNUSED, uint32_t immediate, + SetCc set_cc ATTRIBUTE_UNUSED, ShifterOperand* shifter_op) { return ShifterOperandCanHoldArm32(immediate, shifter_op); } diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h index ce3a87275d..bf332feb62 100644 --- a/compiler/utils/arm/assembler_arm32.h +++ b/compiler/utils/arm/assembler_arm32.h @@ -297,7 +297,9 @@ class Arm32Assembler FINAL : public ArmAssembler { Register rn, Opcode opcode, uint32_t immediate, + SetCc set_cc, ShifterOperand* shifter_op) OVERRIDE; + using ArmAssembler::ShifterOperandCanHold; // Don't hide the non-virtual override. bool ShifterOperandCanAlwaysHold(uint32_t immediate) OVERRIDE; diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc index cdeb443977..f341030c15 100644 --- a/compiler/utils/arm/assembler_thumb2.cc +++ b/compiler/utils/arm/assembler_thumb2.cc @@ -500,6 +500,7 @@ bool Thumb2Assembler::ShifterOperandCanHold(Register rd ATTRIBUTE_UNUSED, Register rn ATTRIBUTE_UNUSED, Opcode opcode, uint32_t immediate, + SetCc set_cc, ShifterOperand* shifter_op) { shifter_op->type_ = ShifterOperand::kImmediate; shifter_op->immed_ = immediate; @@ -508,7 +509,8 @@ bool Thumb2Assembler::ShifterOperandCanHold(Register rd ATTRIBUTE_UNUSED, switch (opcode) { case ADD: case SUB: - if (immediate < (1 << 12)) { // Less than (or equal to) 12 bits can always be done. + // Less than (or equal to) 12 bits can be done if we don't need to set condition codes. + if (immediate < (1 << 12) && set_cc != kCcSet) { return true; } return ArmAssembler::ModifiedImmediate(immediate) != kInvalidModifiedImmediate; @@ -1239,7 +1241,10 @@ bool Thumb2Assembler::Is32BitDataProcessing(Condition cond, // The only thumb1 instructions with a register and an immediate are ADD and SUB // with a 3-bit immediate, and RSB with zero immediate. if (opcode == ADD || opcode == SUB) { - if (!IsUint<3>(so.GetImmediate())) { + if ((cond == AL) ? set_cc == kCcKeep : set_cc == kCcSet) { + return true; // Cannot match "setflags". + } + if (!IsUint<3>(so.GetImmediate()) && !IsUint<3>(-so.GetImmediate())) { return true; } } else { @@ -1249,8 +1254,12 @@ bool Thumb2Assembler::Is32BitDataProcessing(Condition cond, // ADD, SUB, CMP and MOV may be thumb1 only if the immediate is 8 bits. if (!(opcode == ADD || opcode == SUB || opcode == MOV || opcode == CMP)) { return true; + } else if (opcode != CMP && ((cond == AL) ? set_cc == kCcKeep : set_cc == kCcSet)) { + return true; // Cannot match "setflags" for ADD, SUB or MOV. } else { - if (!IsUint<8>(so.GetImmediate())) { + // For ADD and SUB allow also negative 8-bit immediate as we will emit the oposite opcode. + if (!IsUint<8>(so.GetImmediate()) && + (opcode == MOV || opcode == CMP || !IsUint<8>(-so.GetImmediate()))) { return true; } } @@ -1602,12 +1611,18 @@ void Thumb2Assembler::Emit16BitAddSub(Condition cond, uint8_t rn_shift = 3; uint8_t immediate_shift = 0; bool use_immediate = false; - uint32_t immediate = 0; // Should be at most 9 bits but keep the full immediate for CHECKs. + uint32_t immediate = 0; // Should be at most 10 bits but keep the full immediate for CHECKs. uint8_t thumb_opcode; if (so.IsImmediate()) { use_immediate = true; immediate = so.GetImmediate(); + if (!IsUint<10>(immediate)) { + // Flip ADD/SUB. + opcode = (opcode == ADD) ? SUB : ADD; + immediate = -immediate; + DCHECK(IsUint<10>(immediate)); // More stringent checks below. + } } switch (opcode) { @@ -1644,7 +1659,7 @@ void Thumb2Assembler::Emit16BitAddSub(Condition cond, dp_opcode = 2U /* 0b10 */; thumb_opcode = 3U /* 0b11 */; opcode_shift = 12; - CHECK_LT(immediate, (1u << 9)); + CHECK(IsUint<9>(immediate)); CHECK_ALIGNED(immediate, 4); // Remove rd and rn from instruction by orring it with immed and clearing bits. @@ -1658,7 +1673,7 @@ void Thumb2Assembler::Emit16BitAddSub(Condition cond, dp_opcode = 2U /* 0b10 */; thumb_opcode = 5U /* 0b101 */; opcode_shift = 11; - CHECK_LT(immediate, (1u << 10)); + CHECK(IsUint<10>(immediate)); CHECK_ALIGNED(immediate, 4); // Remove rn from instruction. @@ -1668,11 +1683,13 @@ void Thumb2Assembler::Emit16BitAddSub(Condition cond, immediate >>= 2; } else if (rn != rd) { // Must use T1. + CHECK(IsUint<3>(immediate)); opcode_shift = 9; thumb_opcode = 14U /* 0b01110 */; immediate_shift = 6; } else { // T2 encoding. + CHECK(IsUint<8>(immediate)); opcode_shift = 11; thumb_opcode = 6U /* 0b110 */; rd_shift = 8; @@ -1702,7 +1719,7 @@ void Thumb2Assembler::Emit16BitAddSub(Condition cond, dp_opcode = 2U /* 0b10 */; thumb_opcode = 0x61 /* 0b1100001 */; opcode_shift = 7; - CHECK_LT(immediate, (1u << 9)); + CHECK(IsUint<9>(immediate)); CHECK_ALIGNED(immediate, 4); // Remove rd and rn from instruction by orring it with immed and clearing bits. @@ -1713,11 +1730,13 @@ void Thumb2Assembler::Emit16BitAddSub(Condition cond, immediate >>= 2; } else if (rn != rd) { // Must use T1. + CHECK(IsUint<3>(immediate)); opcode_shift = 9; thumb_opcode = 15U /* 0b01111 */; immediate_shift = 6; } else { // T2 encoding. + CHECK(IsUint<8>(immediate)); opcode_shift = 11; thumb_opcode = 7U /* 0b111 */; rd_shift = 8; @@ -3401,25 +3420,30 @@ void Thumb2Assembler::AddConstant(Register rd, Register rn, int32_t value, // positive values and sub for negatives ones, which would slightly improve // the readability of generated code for some constants. ShifterOperand shifter_op; - if (ShifterOperandCanHold(rd, rn, ADD, value, &shifter_op)) { + if (ShifterOperandCanHold(rd, rn, ADD, value, set_cc, &shifter_op)) { add(rd, rn, shifter_op, cond, set_cc); - } else if (ShifterOperandCanHold(rd, rn, SUB, -value, &shifter_op)) { + } else if (ShifterOperandCanHold(rd, rn, SUB, -value, set_cc, &shifter_op)) { sub(rd, rn, shifter_op, cond, set_cc); } else { CHECK(rn != IP); - if (ShifterOperandCanHold(rd, rn, MVN, ~value, &shifter_op)) { - mvn(IP, shifter_op, cond, kCcKeep); - add(rd, rn, ShifterOperand(IP), cond, set_cc); - } else if (ShifterOperandCanHold(rd, rn, MVN, ~(-value), &shifter_op)) { - mvn(IP, shifter_op, cond, kCcKeep); - sub(rd, rn, ShifterOperand(IP), cond, set_cc); + // If rd != rn, use rd as temp. This alows 16-bit ADD/SUB in more situations than using IP. + Register temp = (rd != rn) ? rd : IP; + if (ShifterOperandCanHold(temp, kNoRegister, MVN, ~value, set_cc, &shifter_op)) { + mvn(temp, shifter_op, cond, kCcKeep); + add(rd, rn, ShifterOperand(temp), cond, set_cc); + } else if (ShifterOperandCanHold(temp, kNoRegister, MVN, ~(-value), set_cc, &shifter_op)) { + mvn(temp, shifter_op, cond, kCcKeep); + sub(rd, rn, ShifterOperand(temp), cond, set_cc); + } else if (High16Bits(-value) == 0) { + movw(temp, Low16Bits(-value), cond); + sub(rd, rn, ShifterOperand(temp), cond, set_cc); } else { - movw(IP, Low16Bits(value), cond); + movw(temp, Low16Bits(value), cond); uint16_t value_high = High16Bits(value); if (value_high != 0) { - movt(IP, value_high, cond); + movt(temp, value_high, cond); } - add(rd, rn, ShifterOperand(IP), cond, set_cc); + add(rd, rn, ShifterOperand(temp), cond, set_cc); } } } @@ -3429,9 +3453,9 @@ void Thumb2Assembler::CmpConstant(Register rn, int32_t value, Condition cond) { // positive values and sub for negatives ones, which would slightly improve // the readability of generated code for some constants. ShifterOperand shifter_op; - if (ShifterOperandCanHold(kNoRegister, rn, CMP, value, &shifter_op)) { + if (ShifterOperandCanHold(kNoRegister, rn, CMP, value, kCcSet, &shifter_op)) { cmp(rn, shifter_op, cond); - } else if (ShifterOperandCanHold(kNoRegister, rn, CMN, ~value, &shifter_op)) { + } else if (ShifterOperandCanHold(kNoRegister, rn, CMN, ~value, kCcSet, &shifter_op)) { cmn(rn, shifter_op, cond); } else { CHECK(rn != IP); diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h index 9aeece8e57..bf07b2dbf8 100644 --- a/compiler/utils/arm/assembler_thumb2.h +++ b/compiler/utils/arm/assembler_thumb2.h @@ -342,7 +342,9 @@ class Thumb2Assembler FINAL : public ArmAssembler { Register rn, Opcode opcode, uint32_t immediate, + SetCc set_cc, ShifterOperand* shifter_op) OVERRIDE; + using ArmAssembler::ShifterOperandCanHold; // Don't hide the non-virtual override. bool ShifterOperandCanAlwaysHold(uint32_t immediate) OVERRIDE; diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc index 5ae2cc28a2..0ef0dc19e6 100644 --- a/compiler/utils/assembler_thumb_test.cc +++ b/compiler/utils/assembler_thumb_test.cc @@ -135,7 +135,8 @@ void DumpAndCheck(std::vector<uint8_t>& code, const char* testname, const char* toolsdir.c_str(), filename); if (kPrintResults) { // Print the results only, don't check. This is used to generate new output for inserting - // into the .inc file. + // into the .inc file, so let's add the appropriate prefix/suffix needed in the C++ code. + strcat(cmd, " | sed '-es/^/ \"/' | sed '-es/$/\\\\n\",/'"); int cmd_result3 = system(cmd); ASSERT_EQ(cmd_result3, 0) << strerror(errno); } else { @@ -1379,6 +1380,252 @@ TEST(Thumb2AssemblerTest, CompareAndBranch) { EmitAndCheck(&assembler, "CompareAndBranch"); } +TEST(Thumb2AssemblerTest, AddConstant) { + arm::Thumb2Assembler assembler; + + // Low registers, Rd != Rn. + __ AddConstant(R0, R1, 0); // MOV. + __ AddConstant(R0, R1, 1); // 16-bit ADDS, encoding T1. + __ AddConstant(R0, R1, 7); // 16-bit ADDS, encoding T1. + __ AddConstant(R0, R1, 8); // 32-bit ADD, encoding T3. + __ AddConstant(R0, R1, 255); // 32-bit ADD, encoding T3. + __ AddConstant(R0, R1, 256); // 32-bit ADD, encoding T3. + __ AddConstant(R0, R1, 257); // 32-bit ADD, encoding T4. + __ AddConstant(R0, R1, 0xfff); // 32-bit ADD, encoding T4. + __ AddConstant(R0, R1, 0x1000); // 32-bit ADD, encoding T3. + __ AddConstant(R0, R1, 0x1001); // MVN+SUB. + __ AddConstant(R0, R1, 0x1002); // MOVW+ADD. + __ AddConstant(R0, R1, 0xffff); // MOVW+ADD. + __ AddConstant(R0, R1, 0x10000); // 32-bit ADD, encoding T3. + __ AddConstant(R0, R1, 0x10001); // 32-bit ADD, encoding T3. + __ AddConstant(R0, R1, 0x10002); // MVN+SUB. + __ AddConstant(R0, R1, 0x10003); // MOVW+MOVT+ADD. + __ AddConstant(R0, R1, -1); // 16-bit SUBS. + __ AddConstant(R0, R1, -7); // 16-bit SUBS. + __ AddConstant(R0, R1, -8); // 32-bit SUB, encoding T3. + __ AddConstant(R0, R1, -255); // 32-bit SUB, encoding T3. + __ AddConstant(R0, R1, -256); // 32-bit SUB, encoding T3. + __ AddConstant(R0, R1, -257); // 32-bit SUB, encoding T4. + __ AddConstant(R0, R1, -0xfff); // 32-bit SUB, encoding T4. + __ AddConstant(R0, R1, -0x1000); // 32-bit SUB, encoding T3. + __ AddConstant(R0, R1, -0x1001); // MVN+ADD. + __ AddConstant(R0, R1, -0x1002); // MOVW+SUB. + __ AddConstant(R0, R1, -0xffff); // MOVW+SUB. + __ AddConstant(R0, R1, -0x10000); // 32-bit SUB, encoding T3. + __ AddConstant(R0, R1, -0x10001); // 32-bit SUB, encoding T3. + __ AddConstant(R0, R1, -0x10002); // MVN+ADD. + __ AddConstant(R0, R1, -0x10003); // MOVW+MOVT+ADD. + + // Low registers, Rd == Rn. + __ AddConstant(R0, R0, 0); // Nothing. + __ AddConstant(R1, R1, 1); // 16-bit ADDS, encoding T2, + __ AddConstant(R0, R0, 7); // 16-bit ADDS, encoding T2. + __ AddConstant(R1, R1, 8); // 16-bit ADDS, encoding T2. + __ AddConstant(R0, R0, 255); // 16-bit ADDS, encoding T2. + __ AddConstant(R1, R1, 256); // 32-bit ADD, encoding T3. + __ AddConstant(R0, R0, 257); // 32-bit ADD, encoding T4. + __ AddConstant(R1, R1, 0xfff); // 32-bit ADD, encoding T4. + __ AddConstant(R0, R0, 0x1000); // 32-bit ADD, encoding T3. + __ AddConstant(R1, R1, 0x1001); // MVN+SUB. + __ AddConstant(R0, R0, 0x1002); // MOVW+ADD. + __ AddConstant(R1, R1, 0xffff); // MOVW+ADD. + __ AddConstant(R0, R0, 0x10000); // 32-bit ADD, encoding T3. + __ AddConstant(R1, R1, 0x10001); // 32-bit ADD, encoding T3. + __ AddConstant(R0, R0, 0x10002); // MVN+SUB. + __ AddConstant(R1, R1, 0x10003); // MOVW+MOVT+ADD. + __ AddConstant(R0, R0, -1); // 16-bit SUBS, encoding T2. + __ AddConstant(R1, R1, -7); // 16-bit SUBS, encoding T2. + __ AddConstant(R0, R0, -8); // 16-bit SUBS, encoding T2. + __ AddConstant(R1, R1, -255); // 16-bit SUBS, encoding T2. + __ AddConstant(R0, R0, -256); // 32-bit SUB, encoding T3. + __ AddConstant(R1, R1, -257); // 32-bit SUB, encoding T4. + __ AddConstant(R0, R0, -0xfff); // 32-bit SUB, encoding T4. + __ AddConstant(R1, R1, -0x1000); // 32-bit SUB, encoding T3. + __ AddConstant(R0, R0, -0x1001); // MVN+ADD. + __ AddConstant(R1, R1, -0x1002); // MOVW+SUB. + __ AddConstant(R0, R0, -0xffff); // MOVW+SUB. + __ AddConstant(R1, R1, -0x10000); // 32-bit SUB, encoding T3. + __ AddConstant(R0, R0, -0x10001); // 32-bit SUB, encoding T3. + __ AddConstant(R1, R1, -0x10002); // MVN+ADD. + __ AddConstant(R0, R0, -0x10003); // MOVW+MOVT+ADD. + + // High registers. + __ AddConstant(R8, R8, 0); // Nothing. + __ AddConstant(R8, R1, 1); // 32-bit ADD, encoding T3, + __ AddConstant(R0, R8, 7); // 32-bit ADD, encoding T3. + __ AddConstant(R8, R8, 8); // 32-bit ADD, encoding T3. + __ AddConstant(R8, R1, 255); // 32-bit ADD, encoding T3. + __ AddConstant(R0, R8, 256); // 32-bit ADD, encoding T3. + __ AddConstant(R8, R8, 257); // 32-bit ADD, encoding T4. + __ AddConstant(R8, R1, 0xfff); // 32-bit ADD, encoding T4. + __ AddConstant(R0, R8, 0x1000); // 32-bit ADD, encoding T3. + __ AddConstant(R8, R8, 0x1001); // MVN+SUB. + __ AddConstant(R0, R1, 0x1002); // MOVW+ADD. + __ AddConstant(R0, R8, 0xffff); // MOVW+ADD. + __ AddConstant(R8, R8, 0x10000); // 32-bit ADD, encoding T3. + __ AddConstant(R8, R1, 0x10001); // 32-bit ADD, encoding T3. + __ AddConstant(R0, R8, 0x10002); // MVN+SUB. + __ AddConstant(R0, R8, 0x10003); // MOVW+MOVT+ADD. + __ AddConstant(R8, R8, -1); // 32-bit ADD, encoding T3. + __ AddConstant(R8, R1, -7); // 32-bit SUB, encoding T3. + __ AddConstant(R0, R8, -8); // 32-bit SUB, encoding T3. + __ AddConstant(R8, R8, -255); // 32-bit SUB, encoding T3. + __ AddConstant(R8, R1, -256); // 32-bit SUB, encoding T3. + __ AddConstant(R0, R8, -257); // 32-bit SUB, encoding T4. + __ AddConstant(R8, R8, -0xfff); // 32-bit SUB, encoding T4. + __ AddConstant(R8, R1, -0x1000); // 32-bit SUB, encoding T3. + __ AddConstant(R0, R8, -0x1001); // MVN+ADD. + __ AddConstant(R0, R1, -0x1002); // MOVW+SUB. + __ AddConstant(R8, R1, -0xffff); // MOVW+SUB. + __ AddConstant(R0, R8, -0x10000); // 32-bit SUB, encoding T3. + __ AddConstant(R8, R8, -0x10001); // 32-bit SUB, encoding T3. + __ AddConstant(R8, R1, -0x10002); // MVN+SUB. + __ AddConstant(R0, R8, -0x10003); // MOVW+MOVT+ADD. + + // Low registers, Rd != Rn, kCcKeep. + __ AddConstant(R0, R1, 0, AL, kCcKeep); // MOV. + __ AddConstant(R0, R1, 1, AL, kCcKeep); // 32-bit ADD, encoding T3. + __ AddConstant(R0, R1, 7, AL, kCcKeep); // 32-bit ADD, encoding T3. + __ AddConstant(R0, R1, 8, AL, kCcKeep); // 32-bit ADD, encoding T3. + __ AddConstant(R0, R1, 255, AL, kCcKeep); // 32-bit ADD, encoding T3. + __ AddConstant(R0, R1, 256, AL, kCcKeep); // 32-bit ADD, encoding T3. + __ AddConstant(R0, R1, 257, AL, kCcKeep); // 32-bit ADD, encoding T4. + __ AddConstant(R0, R1, 0xfff, AL, kCcKeep); // 32-bit ADD, encoding T4. + __ AddConstant(R0, R1, 0x1000, AL, kCcKeep); // 32-bit ADD, encoding T3. + __ AddConstant(R0, R1, 0x1001, AL, kCcKeep); // MVN+SUB. + __ AddConstant(R0, R1, 0x1002, AL, kCcKeep); // MOVW+ADD. + __ AddConstant(R0, R1, 0xffff, AL, kCcKeep); // MOVW+ADD. + __ AddConstant(R0, R1, 0x10000, AL, kCcKeep); // 32-bit ADD, encoding T3. + __ AddConstant(R0, R1, 0x10001, AL, kCcKeep); // 32-bit ADD, encoding T3. + __ AddConstant(R0, R1, 0x10002, AL, kCcKeep); // MVN+SUB. + __ AddConstant(R0, R1, 0x10003, AL, kCcKeep); // MOVW+MOVT+ADD. + __ AddConstant(R0, R1, -1, AL, kCcKeep); // 32-bit ADD, encoding T3. + __ AddConstant(R0, R1, -7, AL, kCcKeep); // 32-bit SUB, encoding T3. + __ AddConstant(R0, R1, -8, AL, kCcKeep); // 32-bit SUB, encoding T3. + __ AddConstant(R0, R1, -255, AL, kCcKeep); // 32-bit SUB, encoding T3. + __ AddConstant(R0, R1, -256, AL, kCcKeep); // 32-bit SUB, encoding T3. + __ AddConstant(R0, R1, -257, AL, kCcKeep); // 32-bit SUB, encoding T4. + __ AddConstant(R0, R1, -0xfff, AL, kCcKeep); // 32-bit SUB, encoding T4. + __ AddConstant(R0, R1, -0x1000, AL, kCcKeep); // 32-bit SUB, encoding T3. + __ AddConstant(R0, R1, -0x1001, AL, kCcKeep); // MVN+ADD. + __ AddConstant(R0, R1, -0x1002, AL, kCcKeep); // MOVW+SUB. + __ AddConstant(R0, R1, -0xffff, AL, kCcKeep); // MOVW+SUB. + __ AddConstant(R0, R1, -0x10000, AL, kCcKeep); // 32-bit SUB, encoding T3. + __ AddConstant(R0, R1, -0x10001, AL, kCcKeep); // 32-bit SUB, encoding T3. + __ AddConstant(R0, R1, -0x10002, AL, kCcKeep); // MVN+ADD. + __ AddConstant(R0, R1, -0x10003, AL, kCcKeep); // MOVW+MOVT+ADD. + + // Low registers, Rd == Rn, kCcKeep. + __ AddConstant(R0, R0, 0, AL, kCcKeep); // Nothing. + __ AddConstant(R1, R1, 1, AL, kCcKeep); // 32-bit ADD, encoding T3. + __ AddConstant(R0, R0, 7, AL, kCcKeep); // 32-bit ADD, encoding T3. + __ AddConstant(R1, R1, 8, AL, kCcKeep); // 32-bit ADD, encoding T3. + __ AddConstant(R0, R0, 255, AL, kCcKeep); // 32-bit ADD, encoding T3. + __ AddConstant(R1, R1, 256, AL, kCcKeep); // 32-bit ADD, encoding T3. + __ AddConstant(R0, R0, 257, AL, kCcKeep); // 32-bit ADD, encoding T4. + __ AddConstant(R1, R1, 0xfff, AL, kCcKeep); // 32-bit ADD, encoding T4. + __ AddConstant(R0, R0, 0x1000, AL, kCcKeep); // 32-bit ADD, encoding T3. + __ AddConstant(R1, R1, 0x1001, AL, kCcKeep); // MVN+SUB. + __ AddConstant(R0, R0, 0x1002, AL, kCcKeep); // MOVW+ADD. + __ AddConstant(R1, R1, 0xffff, AL, kCcKeep); // MOVW+ADD. + __ AddConstant(R0, R0, 0x10000, AL, kCcKeep); // 32-bit ADD, encoding T3. + __ AddConstant(R1, R1, 0x10001, AL, kCcKeep); // 32-bit ADD, encoding T3. + __ AddConstant(R0, R0, 0x10002, AL, kCcKeep); // MVN+SUB. + __ AddConstant(R1, R1, 0x10003, AL, kCcKeep); // MOVW+MOVT+ADD. + __ AddConstant(R0, R0, -1, AL, kCcKeep); // 32-bit ADD, encoding T3. + __ AddConstant(R1, R1, -7, AL, kCcKeep); // 32-bit SUB, encoding T3. + __ AddConstant(R0, R0, -8, AL, kCcKeep); // 32-bit SUB, encoding T3. + __ AddConstant(R1, R1, -255, AL, kCcKeep); // 32-bit SUB, encoding T3. + __ AddConstant(R0, R0, -256, AL, kCcKeep); // 32-bit SUB, encoding T3. + __ AddConstant(R1, R1, -257, AL, kCcKeep); // 32-bit SUB, encoding T4. + __ AddConstant(R0, R0, -0xfff, AL, kCcKeep); // 32-bit SUB, encoding T4. + __ AddConstant(R1, R1, -0x1000, AL, kCcKeep); // 32-bit SUB, encoding T3. + __ AddConstant(R0, R0, -0x1001, AL, kCcKeep); // MVN+ADD. + __ AddConstant(R1, R1, -0x1002, AL, kCcKeep); // MOVW+SUB. + __ AddConstant(R0, R0, -0xffff, AL, kCcKeep); // MOVW+SUB. + __ AddConstant(R1, R1, -0x10000, AL, kCcKeep); // 32-bit SUB, encoding T3. + __ AddConstant(R0, R0, -0x10001, AL, kCcKeep); // 32-bit SUB, encoding T3. + __ AddConstant(R1, R1, -0x10002, AL, kCcKeep); // MVN+ADD. + __ AddConstant(R0, R0, -0x10003, AL, kCcKeep); // MOVW+MOVT+ADD. + + // Low registers, Rd != Rn, kCcSet. + __ AddConstant(R0, R1, 0, AL, kCcSet); // 16-bit ADDS. + __ AddConstant(R0, R1, 1, AL, kCcSet); // 16-bit ADDS. + __ AddConstant(R0, R1, 7, AL, kCcSet); // 16-bit ADDS. + __ AddConstant(R0, R1, 8, AL, kCcSet); // 32-bit ADDS, encoding T3. + __ AddConstant(R0, R1, 255, AL, kCcSet); // 32-bit ADDS, encoding T3. + __ AddConstant(R0, R1, 256, AL, kCcSet); // 32-bit ADDS, encoding T3. + __ AddConstant(R0, R1, 257, AL, kCcSet); // MVN+SUBS. + __ AddConstant(R0, R1, 0xfff, AL, kCcSet); // MOVW+ADDS. + __ AddConstant(R0, R1, 0x1000, AL, kCcSet); // 32-bit ADDS, encoding T3. + __ AddConstant(R0, R1, 0x1001, AL, kCcSet); // MVN+SUBS. + __ AddConstant(R0, R1, 0x1002, AL, kCcSet); // MOVW+ADDS. + __ AddConstant(R0, R1, 0xffff, AL, kCcSet); // MOVW+ADDS. + __ AddConstant(R0, R1, 0x10000, AL, kCcSet); // 32-bit ADDS, encoding T3. + __ AddConstant(R0, R1, 0x10001, AL, kCcSet); // 32-bit ADDS, encoding T3. + __ AddConstant(R0, R1, 0x10002, AL, kCcSet); // MVN+SUBS. + __ AddConstant(R0, R1, 0x10003, AL, kCcSet); // MOVW+MOVT+ADDS. + __ AddConstant(R0, R1, -1, AL, kCcSet); // 16-bit SUBS. + __ AddConstant(R0, R1, -7, AL, kCcSet); // 16-bit SUBS. + __ AddConstant(R0, R1, -8, AL, kCcSet); // 32-bit SUBS, encoding T3. + __ AddConstant(R0, R1, -255, AL, kCcSet); // 32-bit SUBS, encoding T3. + __ AddConstant(R0, R1, -256, AL, kCcSet); // 32-bit SUBS, encoding T3. + __ AddConstant(R0, R1, -257, AL, kCcSet); // MVN+ADDS. + __ AddConstant(R0, R1, -0xfff, AL, kCcSet); // MOVW+SUBS. + __ AddConstant(R0, R1, -0x1000, AL, kCcSet); // 32-bit SUBS, encoding T3. + __ AddConstant(R0, R1, -0x1001, AL, kCcSet); // MVN+ADDS. + __ AddConstant(R0, R1, -0x1002, AL, kCcSet); // MOVW+SUBS. + __ AddConstant(R0, R1, -0xffff, AL, kCcSet); // MOVW+SUBS. + __ AddConstant(R0, R1, -0x10000, AL, kCcSet); // 32-bit SUBS, encoding T3. + __ AddConstant(R0, R1, -0x10001, AL, kCcSet); // 32-bit SUBS, encoding T3. + __ AddConstant(R0, R1, -0x10002, AL, kCcSet); // MVN+ADDS. + __ AddConstant(R0, R1, -0x10003, AL, kCcSet); // MOVW+MOVT+ADDS. + + // Low registers, Rd == Rn, kCcSet. + __ AddConstant(R0, R0, 0, AL, kCcSet); // 16-bit ADDS, encoding T2. + __ AddConstant(R1, R1, 1, AL, kCcSet); // 16-bit ADDS, encoding T2. + __ AddConstant(R0, R0, 7, AL, kCcSet); // 16-bit ADDS, encoding T2. + __ AddConstant(R1, R1, 8, AL, kCcSet); // 16-bit ADDS, encoding T2. + __ AddConstant(R0, R0, 255, AL, kCcSet); // 16-bit ADDS, encoding T2. + __ AddConstant(R1, R1, 256, AL, kCcSet); // 32-bit ADDS, encoding T3. + __ AddConstant(R0, R0, 257, AL, kCcSet); // MVN+SUBS. + __ AddConstant(R1, R1, 0xfff, AL, kCcSet); // MOVW+ADDS. + __ AddConstant(R0, R0, 0x1000, AL, kCcSet); // 32-bit ADDS, encoding T3. + __ AddConstant(R1, R1, 0x1001, AL, kCcSet); // MVN+SUBS. + __ AddConstant(R0, R0, 0x1002, AL, kCcSet); // MOVW+ADDS. + __ AddConstant(R1, R1, 0xffff, AL, kCcSet); // MOVW+ADDS. + __ AddConstant(R0, R0, 0x10000, AL, kCcSet); // 32-bit ADDS, encoding T3. + __ AddConstant(R1, R1, 0x10001, AL, kCcSet); // 32-bit ADDS, encoding T3. + __ AddConstant(R0, R0, 0x10002, AL, kCcSet); // MVN+SUBS. + __ AddConstant(R1, R1, 0x10003, AL, kCcSet); // MOVW+MOVT+ADDS. + __ AddConstant(R0, R0, -1, AL, kCcSet); // 16-bit SUBS, encoding T2. + __ AddConstant(R1, R1, -7, AL, kCcSet); // 16-bit SUBS, encoding T2. + __ AddConstant(R0, R0, -8, AL, kCcSet); // 16-bit SUBS, encoding T2. + __ AddConstant(R1, R1, -255, AL, kCcSet); // 16-bit SUBS, encoding T2. + __ AddConstant(R0, R0, -256, AL, kCcSet); // 32-bit SUB, encoding T3. + __ AddConstant(R1, R1, -257, AL, kCcSet); // MNV+ADDS. + __ AddConstant(R0, R0, -0xfff, AL, kCcSet); // MOVW+SUBS. + __ AddConstant(R1, R1, -0x1000, AL, kCcSet); // 32-bit SUB, encoding T3. + __ AddConstant(R0, R0, -0x1001, AL, kCcSet); // MVN+ADDS. + __ AddConstant(R1, R1, -0x1002, AL, kCcSet); // MOVW+SUBS. + __ AddConstant(R0, R0, -0xffff, AL, kCcSet); // MOVW+SUBS. + __ AddConstant(R1, R1, -0x10000, AL, kCcSet); // 32-bit SUBS, encoding T3. + __ AddConstant(R0, R0, -0x10001, AL, kCcSet); // 32-bit SUBS, encoding T3. + __ AddConstant(R1, R1, -0x10002, AL, kCcSet); // MVN+ADDS. + __ AddConstant(R0, R0, -0x10003, AL, kCcSet); // MOVW+MOVT+ADDS. + + __ it(EQ); + __ AddConstant(R0, R1, 1, EQ, kCcSet); // 32-bit ADDS, encoding T3. + __ it(NE); + __ AddConstant(R0, R1, 1, NE, kCcKeep); // 16-bit ADDS, encoding T1. + __ it(GE); + __ AddConstant(R0, R0, 1, GE, kCcSet); // 32-bit ADDS, encoding T3. + __ it(LE); + __ AddConstant(R0, R0, 1, LE, kCcKeep); // 16-bit ADDS, encoding T2. + + EmitAndCheck(&assembler, "AddConstant"); +} + #undef __ } // namespace arm } // namespace art diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc index 886295e9f9..f07f8c74d7 100644 --- a/compiler/utils/assembler_thumb_test_expected.cc.inc +++ b/compiler/utils/assembler_thumb_test_expected.cc.inc @@ -5052,6 +5052,324 @@ const char* CompareAndBranchResults[] = { nullptr }; +const char* AddConstantResults[] = { + " 0: 4608 mov r0, r1\n", + " 2: 1c48 adds r0, r1, #1\n", + " 4: 1dc8 adds r0, r1, #7\n", + " 6: f101 0008 add.w r0, r1, #8\n", + " a: f101 00ff add.w r0, r1, #255 ; 0xff\n", + " e: f501 7080 add.w r0, r1, #256 ; 0x100\n", + " 12: f201 1001 addw r0, r1, #257 ; 0x101\n", + " 16: f601 70ff addw r0, r1, #4095 ; 0xfff\n", + " 1a: f501 5080 add.w r0, r1, #4096 ; 0x1000\n", + " 1e: f46f 5080 mvn.w r0, #4096 ; 0x1000\n", + " 22: 1a08 subs r0, r1, r0\n", + " 24: f241 0002 movw r0, #4098 ; 0x1002\n", + " 28: 1808 adds r0, r1, r0\n", + " 2a: f64f 70ff movw r0, #65535 ; 0xffff\n", + " 2e: 1808 adds r0, r1, r0\n", + " 30: f501 3080 add.w r0, r1, #65536 ; 0x10000\n", + " 34: f101 1001 add.w r0, r1, #65537 ; 0x10001\n", + " 38: f06f 1001 mvn.w r0, #65537 ; 0x10001\n", + " 3c: 1a08 subs r0, r1, r0\n", + " 3e: f240 0003 movw r0, #3\n", + " 42: f2c0 0001 movt r0, #1\n", + " 46: 1808 adds r0, r1, r0\n", + " 48: 1e48 subs r0, r1, #1\n", + " 4a: 1fc8 subs r0, r1, #7\n", + " 4c: f1a1 0008 sub.w r0, r1, #8\n", + " 50: f1a1 00ff sub.w r0, r1, #255 ; 0xff\n", + " 54: f5a1 7080 sub.w r0, r1, #256 ; 0x100\n", + " 58: f2a1 1001 subw r0, r1, #257 ; 0x101\n", + " 5c: f6a1 70ff subw r0, r1, #4095 ; 0xfff\n", + " 60: f5a1 5080 sub.w r0, r1, #4096 ; 0x1000\n", + " 64: f46f 5080 mvn.w r0, #4096 ; 0x1000\n", + " 68: 1808 adds r0, r1, r0\n", + " 6a: f241 0002 movw r0, #4098 ; 0x1002\n", + " 6e: 1a08 subs r0, r1, r0\n", + " 70: f64f 70ff movw r0, #65535 ; 0xffff\n", + " 74: 1a08 subs r0, r1, r0\n", + " 76: f5a1 3080 sub.w r0, r1, #65536 ; 0x10000\n", + " 7a: f1a1 1001 sub.w r0, r1, #65537 ; 0x10001\n", + " 7e: f06f 1001 mvn.w r0, #65537 ; 0x10001\n", + " 82: 1808 adds r0, r1, r0\n", + " 84: f64f 70fd movw r0, #65533 ; 0xfffd\n", + " 88: f6cf 70fe movt r0, #65534 ; 0xfffe\n", + " 8c: 1808 adds r0, r1, r0\n", + " 8e: 3101 adds r1, #1\n", + " 90: 3007 adds r0, #7\n", + " 92: 3108 adds r1, #8\n", + " 94: 30ff adds r0, #255 ; 0xff\n", + " 96: f501 7180 add.w r1, r1, #256 ; 0x100\n", + " 9a: f200 1001 addw r0, r0, #257 ; 0x101\n", + " 9e: f601 71ff addw r1, r1, #4095 ; 0xfff\n", + " a2: f500 5080 add.w r0, r0, #4096 ; 0x1000\n", + " a6: f46f 5c80 mvn.w ip, #4096 ; 0x1000\n", + " aa: eba1 010c sub.w r1, r1, ip\n", + " ae: f241 0c02 movw ip, #4098 ; 0x1002\n", + " b2: 4460 add r0, ip\n", + " b4: f64f 7cff movw ip, #65535 ; 0xffff\n", + " b8: 4461 add r1, ip\n", + " ba: f500 3080 add.w r0, r0, #65536 ; 0x10000\n", + " be: f101 1101 add.w r1, r1, #65537 ; 0x10001\n", + " c2: f06f 1c01 mvn.w ip, #65537 ; 0x10001\n", + " c6: eba0 000c sub.w r0, r0, ip\n", + " ca: f240 0c03 movw ip, #3\n", + " ce: f2c0 0c01 movt ip, #1\n", + " d2: 4461 add r1, ip\n", + " d4: 3801 subs r0, #1\n", + " d6: 3907 subs r1, #7\n", + " d8: 3808 subs r0, #8\n", + " da: 39ff subs r1, #255 ; 0xff\n", + " dc: f5a0 7080 sub.w r0, r0, #256 ; 0x100\n", + " e0: f2a1 1101 subw r1, r1, #257 ; 0x101\n", + " e4: f6a0 70ff subw r0, r0, #4095 ; 0xfff\n", + " e8: f5a1 5180 sub.w r1, r1, #4096 ; 0x1000\n", + " ec: f46f 5c80 mvn.w ip, #4096 ; 0x1000\n", + " f0: 4460 add r0, ip\n", + " f2: f241 0c02 movw ip, #4098 ; 0x1002\n", + " f6: eba1 010c sub.w r1, r1, ip\n", + " fa: f64f 7cff movw ip, #65535 ; 0xffff\n", + " fe: eba0 000c sub.w r0, r0, ip\n", + " 102: f5a1 3180 sub.w r1, r1, #65536 ; 0x10000\n", + " 106: f1a0 1001 sub.w r0, r0, #65537 ; 0x10001\n", + " 10a: f06f 1c01 mvn.w ip, #65537 ; 0x10001\n", + " 10e: 4461 add r1, ip\n", + " 110: f64f 7cfd movw ip, #65533 ; 0xfffd\n", + " 114: f6cf 7cfe movt ip, #65534 ; 0xfffe\n", + " 118: 4460 add r0, ip\n", + " 11a: f101 0801 add.w r8, r1, #1\n", + " 11e: f108 0007 add.w r0, r8, #7\n", + " 122: f108 0808 add.w r8, r8, #8\n", + " 126: f101 08ff add.w r8, r1, #255 ; 0xff\n", + " 12a: f508 7080 add.w r0, r8, #256 ; 0x100\n", + " 12e: f208 1801 addw r8, r8, #257 ; 0x101\n", + " 132: f601 78ff addw r8, r1, #4095 ; 0xfff\n", + " 136: f508 5080 add.w r0, r8, #4096 ; 0x1000\n", + " 13a: f46f 5c80 mvn.w ip, #4096 ; 0x1000\n", + " 13e: eba8 080c sub.w r8, r8, ip\n", + " 142: f241 0002 movw r0, #4098 ; 0x1002\n", + " 146: 1808 adds r0, r1, r0\n", + " 148: f64f 70ff movw r0, #65535 ; 0xffff\n", + " 14c: eb08 0000 add.w r0, r8, r0\n", + " 150: f508 3880 add.w r8, r8, #65536 ; 0x10000\n", + " 154: f101 1801 add.w r8, r1, #65537 ; 0x10001\n", + " 158: f06f 1001 mvn.w r0, #65537 ; 0x10001\n", + " 15c: eba8 0000 sub.w r0, r8, r0\n", + " 160: f240 0003 movw r0, #3\n", + " 164: f2c0 0001 movt r0, #1\n", + " 168: eb08 0000 add.w r0, r8, r0\n", + " 16c: f108 38ff add.w r8, r8, #4294967295 ; 0xffffffff\n", + " 170: f1a1 0807 sub.w r8, r1, #7\n", + " 174: f1a8 0008 sub.w r0, r8, #8\n", + " 178: f1a8 08ff sub.w r8, r8, #255 ; 0xff\n", + " 17c: f5a1 7880 sub.w r8, r1, #256 ; 0x100\n", + " 180: f2a8 1001 subw r0, r8, #257 ; 0x101\n", + " 184: f6a8 78ff subw r8, r8, #4095 ; 0xfff\n", + " 188: f5a1 5880 sub.w r8, r1, #4096 ; 0x1000\n", + " 18c: f46f 5080 mvn.w r0, #4096 ; 0x1000\n", + " 190: eb08 0000 add.w r0, r8, r0\n", + " 194: f241 0002 movw r0, #4098 ; 0x1002\n", + " 198: 1a08 subs r0, r1, r0\n", + " 19a: f64f 78ff movw r8, #65535 ; 0xffff\n", + " 19e: eba1 0808 sub.w r8, r1, r8\n", + " 1a2: f5a8 3080 sub.w r0, r8, #65536 ; 0x10000\n", + " 1a6: f1a8 1801 sub.w r8, r8, #65537 ; 0x10001\n", + " 1aa: f06f 1801 mvn.w r8, #65537 ; 0x10001\n", + " 1ae: eb01 0808 add.w r8, r1, r8\n", + " 1b2: f64f 70fd movw r0, #65533 ; 0xfffd\n", + " 1b6: f6cf 70fe movt r0, #65534 ; 0xfffe\n", + " 1ba: eb08 0000 add.w r0, r8, r0\n", + " 1be: 4608 mov r0, r1\n", + " 1c0: f101 0001 add.w r0, r1, #1\n", + " 1c4: f101 0007 add.w r0, r1, #7\n", + " 1c8: f101 0008 add.w r0, r1, #8\n", + " 1cc: f101 00ff add.w r0, r1, #255 ; 0xff\n", + " 1d0: f501 7080 add.w r0, r1, #256 ; 0x100\n", + " 1d4: f201 1001 addw r0, r1, #257 ; 0x101\n", + " 1d8: f601 70ff addw r0, r1, #4095 ; 0xfff\n", + " 1dc: f501 5080 add.w r0, r1, #4096 ; 0x1000\n", + " 1e0: f46f 5080 mvn.w r0, #4096 ; 0x1000\n", + " 1e4: eba1 0000 sub.w r0, r1, r0\n", + " 1e8: f241 0002 movw r0, #4098 ; 0x1002\n", + " 1ec: eb01 0000 add.w r0, r1, r0\n", + " 1f0: f64f 70ff movw r0, #65535 ; 0xffff\n", + " 1f4: eb01 0000 add.w r0, r1, r0\n", + " 1f8: f501 3080 add.w r0, r1, #65536 ; 0x10000\n", + " 1fc: f101 1001 add.w r0, r1, #65537 ; 0x10001\n", + " 200: f06f 1001 mvn.w r0, #65537 ; 0x10001\n", + " 204: eba1 0000 sub.w r0, r1, r0\n", + " 208: f240 0003 movw r0, #3\n", + " 20c: f2c0 0001 movt r0, #1\n", + " 210: eb01 0000 add.w r0, r1, r0\n", + " 214: f101 30ff add.w r0, r1, #4294967295 ; 0xffffffff\n", + " 218: f1a1 0007 sub.w r0, r1, #7\n", + " 21c: f1a1 0008 sub.w r0, r1, #8\n", + " 220: f1a1 00ff sub.w r0, r1, #255 ; 0xff\n", + " 224: f5a1 7080 sub.w r0, r1, #256 ; 0x100\n", + " 228: f2a1 1001 subw r0, r1, #257 ; 0x101\n", + " 22c: f6a1 70ff subw r0, r1, #4095 ; 0xfff\n", + " 230: f5a1 5080 sub.w r0, r1, #4096 ; 0x1000\n", + " 234: f46f 5080 mvn.w r0, #4096 ; 0x1000\n", + " 238: eb01 0000 add.w r0, r1, r0\n", + " 23c: f241 0002 movw r0, #4098 ; 0x1002\n", + " 240: eba1 0000 sub.w r0, r1, r0\n", + " 244: f64f 70ff movw r0, #65535 ; 0xffff\n", + " 248: eba1 0000 sub.w r0, r1, r0\n", + " 24c: f5a1 3080 sub.w r0, r1, #65536 ; 0x10000\n", + " 250: f1a1 1001 sub.w r0, r1, #65537 ; 0x10001\n", + " 254: f06f 1001 mvn.w r0, #65537 ; 0x10001\n", + " 258: eb01 0000 add.w r0, r1, r0\n", + " 25c: f64f 70fd movw r0, #65533 ; 0xfffd\n", + " 260: f6cf 70fe movt r0, #65534 ; 0xfffe\n", + " 264: eb01 0000 add.w r0, r1, r0\n", + " 268: f101 0101 add.w r1, r1, #1\n", + " 26c: f100 0007 add.w r0, r0, #7\n", + " 270: f101 0108 add.w r1, r1, #8\n", + " 274: f100 00ff add.w r0, r0, #255 ; 0xff\n", + " 278: f501 7180 add.w r1, r1, #256 ; 0x100\n", + " 27c: f200 1001 addw r0, r0, #257 ; 0x101\n", + " 280: f601 71ff addw r1, r1, #4095 ; 0xfff\n", + " 284: f500 5080 add.w r0, r0, #4096 ; 0x1000\n", + " 288: f46f 5c80 mvn.w ip, #4096 ; 0x1000\n", + " 28c: eba1 010c sub.w r1, r1, ip\n", + " 290: f241 0c02 movw ip, #4098 ; 0x1002\n", + " 294: 4460 add r0, ip\n", + " 296: f64f 7cff movw ip, #65535 ; 0xffff\n", + " 29a: 4461 add r1, ip\n", + " 29c: f500 3080 add.w r0, r0, #65536 ; 0x10000\n", + " 2a0: f101 1101 add.w r1, r1, #65537 ; 0x10001\n", + " 2a4: f06f 1c01 mvn.w ip, #65537 ; 0x10001\n", + " 2a8: eba0 000c sub.w r0, r0, ip\n", + " 2ac: f240 0c03 movw ip, #3\n", + " 2b0: f2c0 0c01 movt ip, #1\n", + " 2b4: 4461 add r1, ip\n", + " 2b6: f100 30ff add.w r0, r0, #4294967295 ; 0xffffffff\n", + " 2ba: f1a1 0107 sub.w r1, r1, #7\n", + " 2be: f1a0 0008 sub.w r0, r0, #8\n", + " 2c2: f1a1 01ff sub.w r1, r1, #255 ; 0xff\n", + " 2c6: f5a0 7080 sub.w r0, r0, #256 ; 0x100\n", + " 2ca: f2a1 1101 subw r1, r1, #257 ; 0x101\n", + " 2ce: f6a0 70ff subw r0, r0, #4095 ; 0xfff\n", + " 2d2: f5a1 5180 sub.w r1, r1, #4096 ; 0x1000\n", + " 2d6: f46f 5c80 mvn.w ip, #4096 ; 0x1000\n", + " 2da: 4460 add r0, ip\n", + " 2dc: f241 0c02 movw ip, #4098 ; 0x1002\n", + " 2e0: eba1 010c sub.w r1, r1, ip\n", + " 2e4: f64f 7cff movw ip, #65535 ; 0xffff\n", + " 2e8: eba0 000c sub.w r0, r0, ip\n", + " 2ec: f5a1 3180 sub.w r1, r1, #65536 ; 0x10000\n", + " 2f0: f1a0 1001 sub.w r0, r0, #65537 ; 0x10001\n", + " 2f4: f06f 1c01 mvn.w ip, #65537 ; 0x10001\n", + " 2f8: 4461 add r1, ip\n", + " 2fa: f64f 7cfd movw ip, #65533 ; 0xfffd\n", + " 2fe: f6cf 7cfe movt ip, #65534 ; 0xfffe\n", + " 302: 4460 add r0, ip\n", + " 304: 1c08 adds r0, r1, #0\n", + " 306: 1c48 adds r0, r1, #1\n", + " 308: 1dc8 adds r0, r1, #7\n", + " 30a: f111 0008 adds.w r0, r1, #8\n", + " 30e: f111 00ff adds.w r0, r1, #255 ; 0xff\n", + " 312: f511 7080 adds.w r0, r1, #256 ; 0x100\n", + " 316: f46f 7080 mvn.w r0, #256 ; 0x100\n", + " 31a: 1a08 subs r0, r1, r0\n", + " 31c: f640 70ff movw r0, #4095 ; 0xfff\n", + " 320: 1808 adds r0, r1, r0\n", + " 322: f511 5080 adds.w r0, r1, #4096 ; 0x1000\n", + " 326: f46f 5080 mvn.w r0, #4096 ; 0x1000\n", + " 32a: 1a08 subs r0, r1, r0\n", + " 32c: f241 0002 movw r0, #4098 ; 0x1002\n", + " 330: 1808 adds r0, r1, r0\n", + " 332: f64f 70ff movw r0, #65535 ; 0xffff\n", + " 336: 1808 adds r0, r1, r0\n", + " 338: f511 3080 adds.w r0, r1, #65536 ; 0x10000\n", + " 33c: f111 1001 adds.w r0, r1, #65537 ; 0x10001\n", + " 340: f06f 1001 mvn.w r0, #65537 ; 0x10001\n", + " 344: 1a08 subs r0, r1, r0\n", + " 346: f240 0003 movw r0, #3\n", + " 34a: f2c0 0001 movt r0, #1\n", + " 34e: 1808 adds r0, r1, r0\n", + " 350: 1e48 subs r0, r1, #1\n", + " 352: 1fc8 subs r0, r1, #7\n", + " 354: f1b1 0008 subs.w r0, r1, #8\n", + " 358: f1b1 00ff subs.w r0, r1, #255 ; 0xff\n", + " 35c: f5b1 7080 subs.w r0, r1, #256 ; 0x100\n", + " 360: f46f 7080 mvn.w r0, #256 ; 0x100\n", + " 364: 1808 adds r0, r1, r0\n", + " 366: f640 70ff movw r0, #4095 ; 0xfff\n", + " 36a: 1a08 subs r0, r1, r0\n", + " 36c: f5b1 5080 subs.w r0, r1, #4096 ; 0x1000\n", + " 370: f46f 5080 mvn.w r0, #4096 ; 0x1000\n", + " 374: 1808 adds r0, r1, r0\n", + " 376: f241 0002 movw r0, #4098 ; 0x1002\n", + " 37a: 1a08 subs r0, r1, r0\n", + " 37c: f64f 70ff movw r0, #65535 ; 0xffff\n", + " 380: 1a08 subs r0, r1, r0\n", + " 382: f5b1 3080 subs.w r0, r1, #65536 ; 0x10000\n", + " 386: f1b1 1001 subs.w r0, r1, #65537 ; 0x10001\n", + " 38a: f06f 1001 mvn.w r0, #65537 ; 0x10001\n", + " 38e: 1808 adds r0, r1, r0\n", + " 390: f64f 70fd movw r0, #65533 ; 0xfffd\n", + " 394: f6cf 70fe movt r0, #65534 ; 0xfffe\n", + " 398: 1808 adds r0, r1, r0\n", + " 39a: 3000 adds r0, #0\n", + " 39c: 3101 adds r1, #1\n", + " 39e: 3007 adds r0, #7\n", + " 3a0: 3108 adds r1, #8\n", + " 3a2: 30ff adds r0, #255 ; 0xff\n", + " 3a4: f511 7180 adds.w r1, r1, #256 ; 0x100\n", + " 3a8: f46f 7c80 mvn.w ip, #256 ; 0x100\n", + " 3ac: ebb0 000c subs.w r0, r0, ip\n", + " 3b0: f640 7cff movw ip, #4095 ; 0xfff\n", + " 3b4: eb11 010c adds.w r1, r1, ip\n", + " 3b8: f510 5080 adds.w r0, r0, #4096 ; 0x1000\n", + " 3bc: f46f 5c80 mvn.w ip, #4096 ; 0x1000\n", + " 3c0: ebb1 010c subs.w r1, r1, ip\n", + " 3c4: f241 0c02 movw ip, #4098 ; 0x1002\n", + " 3c8: eb10 000c adds.w r0, r0, ip\n", + " 3cc: f64f 7cff movw ip, #65535 ; 0xffff\n", + " 3d0: eb11 010c adds.w r1, r1, ip\n", + " 3d4: f510 3080 adds.w r0, r0, #65536 ; 0x10000\n", + " 3d8: f111 1101 adds.w r1, r1, #65537 ; 0x10001\n", + " 3dc: f06f 1c01 mvn.w ip, #65537 ; 0x10001\n", + " 3e0: ebb0 000c subs.w r0, r0, ip\n", + " 3e4: f240 0c03 movw ip, #3\n", + " 3e8: f2c0 0c01 movt ip, #1\n", + " 3ec: eb11 010c adds.w r1, r1, ip\n", + " 3f0: 3801 subs r0, #1\n", + " 3f2: 3907 subs r1, #7\n", + " 3f4: 3808 subs r0, #8\n", + " 3f6: 39ff subs r1, #255 ; 0xff\n", + " 3f8: f5b0 7080 subs.w r0, r0, #256 ; 0x100\n", + " 3fc: f46f 7c80 mvn.w ip, #256 ; 0x100\n", + " 400: eb11 010c adds.w r1, r1, ip\n", + " 404: f640 7cff movw ip, #4095 ; 0xfff\n", + " 408: ebb0 000c subs.w r0, r0, ip\n", + " 40c: f5b1 5180 subs.w r1, r1, #4096 ; 0x1000\n", + " 410: f46f 5c80 mvn.w ip, #4096 ; 0x1000\n", + " 414: eb10 000c adds.w r0, r0, ip\n", + " 418: f241 0c02 movw ip, #4098 ; 0x1002\n", + " 41c: ebb1 010c subs.w r1, r1, ip\n", + " 420: f64f 7cff movw ip, #65535 ; 0xffff\n", + " 424: ebb0 000c subs.w r0, r0, ip\n", + " 428: f5b1 3180 subs.w r1, r1, #65536 ; 0x10000\n", + " 42c: f1b0 1001 subs.w r0, r0, #65537 ; 0x10001\n", + " 430: f06f 1c01 mvn.w ip, #65537 ; 0x10001\n", + " 434: eb11 010c adds.w r1, r1, ip\n", + " 438: f64f 7cfd movw ip, #65533 ; 0xfffd\n", + " 43c: f6cf 7cfe movt ip, #65534 ; 0xfffe\n", + " 440: eb10 000c adds.w r0, r0, ip\n", + " 444: bf08 it eq\n", + " 446: f111 0001 addseq.w r0, r1, #1\n", + " 44a: bf18 it ne\n", + " 44c: 1c48 addne r0, r1, #1\n", + " 44e: bfa8 it ge\n", + " 450: f110 0001 addsge.w r0, r0, #1\n", + " 454: bfd8 it le\n", + " 456: 3001 addle r0, #1\n", + nullptr +}; + std::map<std::string, const char* const*> test_results; void setup_results() { test_results["SimpleMov"] = SimpleMovResults; @@ -5102,4 +5420,5 @@ void setup_results() { test_results["LoadStoreLiteral"] = LoadStoreLiteralResults; test_results["LoadStoreLimits"] = LoadStoreLimitsResults; test_results["CompareAndBranch"] = CompareAndBranchResults; + test_results["AddConstant"] = AddConstantResults; } diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc index 107d5bb572..cfd8421e93 100644 --- a/compiler/utils/mips64/assembler_mips64.cc +++ b/compiler/utils/mips64/assembler_mips64.cc @@ -616,6 +616,14 @@ void Mips64Assembler::Bnezc(GpuRegister rs, uint32_t imm21) { EmitI21(0x3E, rs, imm21); } +void Mips64Assembler::Bc1eqz(FpuRegister ft, uint16_t imm16) { + EmitFI(0x11, 0x9, ft, imm16); +} + +void Mips64Assembler::Bc1nez(FpuRegister ft, uint16_t imm16) { + EmitFI(0x11, 0xD, ft, imm16); +} + void Mips64Assembler::EmitBcondc(BranchCondition cond, GpuRegister rs, GpuRegister rt, @@ -669,6 +677,14 @@ void Mips64Assembler::EmitBcondc(BranchCondition cond, case kCondGEU: Bgeuc(rs, rt, imm16_21); break; + case kCondF: + CHECK_EQ(rt, ZERO); + Bc1eqz(static_cast<FpuRegister>(rs), imm16_21); + break; + case kCondT: + CHECK_EQ(rt, ZERO); + Bc1nez(static_cast<FpuRegister>(rs), imm16_21); + break; case kUncond: LOG(FATAL) << "Unexpected branch condition " << cond; UNREACHABLE(); @@ -827,6 +843,86 @@ void Mips64Assembler::MaxD(FpuRegister fd, FpuRegister fs, FpuRegister ft) { EmitFR(0x11, 0x11, ft, fs, fd, 0x1e); } +void Mips64Assembler::CmpUnS(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x14, ft, fs, fd, 0x01); +} + +void Mips64Assembler::CmpEqS(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x14, ft, fs, fd, 0x02); +} + +void Mips64Assembler::CmpUeqS(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x14, ft, fs, fd, 0x03); +} + +void Mips64Assembler::CmpLtS(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x14, ft, fs, fd, 0x04); +} + +void Mips64Assembler::CmpUltS(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x14, ft, fs, fd, 0x05); +} + +void Mips64Assembler::CmpLeS(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x14, ft, fs, fd, 0x06); +} + +void Mips64Assembler::CmpUleS(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x14, ft, fs, fd, 0x07); +} + +void Mips64Assembler::CmpOrS(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x14, ft, fs, fd, 0x11); +} + +void Mips64Assembler::CmpUneS(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x14, ft, fs, fd, 0x12); +} + +void Mips64Assembler::CmpNeS(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x14, ft, fs, fd, 0x13); +} + +void Mips64Assembler::CmpUnD(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x15, ft, fs, fd, 0x01); +} + +void Mips64Assembler::CmpEqD(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x15, ft, fs, fd, 0x02); +} + +void Mips64Assembler::CmpUeqD(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x15, ft, fs, fd, 0x03); +} + +void Mips64Assembler::CmpLtD(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x15, ft, fs, fd, 0x04); +} + +void Mips64Assembler::CmpUltD(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x15, ft, fs, fd, 0x05); +} + +void Mips64Assembler::CmpLeD(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x15, ft, fs, fd, 0x06); +} + +void Mips64Assembler::CmpUleD(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x15, ft, fs, fd, 0x07); +} + +void Mips64Assembler::CmpOrD(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x15, ft, fs, fd, 0x11); +} + +void Mips64Assembler::CmpUneD(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x15, ft, fs, fd, 0x12); +} + +void Mips64Assembler::CmpNeD(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x15, ft, fs, fd, 0x13); +} + void Mips64Assembler::Cvtsw(FpuRegister fd, FpuRegister fs) { EmitFR(0x11, 0x14, static_cast<FpuRegister>(0), fs, fd, 0x20); } @@ -1134,6 +1230,10 @@ Mips64Assembler::Branch::Branch(uint32_t location, CHECK_NE(lhs_reg, ZERO); CHECK_EQ(rhs_reg, ZERO); break; + case kCondF: + case kCondT: + CHECK_EQ(rhs_reg, ZERO); + break; case kUncond: UNREACHABLE(); } @@ -1188,6 +1288,10 @@ Mips64Assembler::BranchCondition Mips64Assembler::Branch::OppositeCondition( return kCondGEU; case kCondGEU: return kCondLTU; + case kCondF: + return kCondT; + case kCondT: + return kCondF; case kUncond: LOG(FATAL) << "Unexpected branch condition " << cond; } @@ -1567,7 +1671,7 @@ void Mips64Assembler::EmitBranch(Mips64Assembler::Branch* branch) { case Branch::kCondBranch: CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); EmitBcondc(condition, lhs, rhs, offset); - Nop(); // TODO: improve by filling the forbidden slot. + Nop(); // TODO: improve by filling the forbidden/delay slot. break; case Branch::kCall: CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); @@ -1657,6 +1761,14 @@ void Mips64Assembler::Bnezc(GpuRegister rs, Mips64Label* label) { Bcond(label, kCondNEZ, rs); } +void Mips64Assembler::Bc1eqz(FpuRegister ft, Mips64Label* label) { + Bcond(label, kCondF, static_cast<GpuRegister>(ft), ZERO); +} + +void Mips64Assembler::Bc1nez(FpuRegister ft, Mips64Label* label) { + Bcond(label, kCondT, static_cast<GpuRegister>(ft), ZERO); +} + void Mips64Assembler::LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base, int32_t offset) { if (!IsInt<16>(offset)) { diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h index 57fc19a6e9..883f013f87 100644 --- a/compiler/utils/mips64/assembler_mips64.h +++ b/compiler/utils/mips64/assembler_mips64.h @@ -227,6 +227,8 @@ class Mips64Assembler FINAL : public Assembler { void Bnec(GpuRegister rs, GpuRegister rt, uint16_t imm16); void Beqzc(GpuRegister rs, uint32_t imm21); void Bnezc(GpuRegister rs, uint32_t imm21); + void Bc1eqz(FpuRegister ft, uint16_t imm16); + void Bc1nez(FpuRegister ft, uint16_t imm16); void AddS(FpuRegister fd, FpuRegister fs, FpuRegister ft); void SubS(FpuRegister fd, FpuRegister fs, FpuRegister ft); @@ -266,6 +268,26 @@ class Mips64Assembler FINAL : public Assembler { void MinD(FpuRegister fd, FpuRegister fs, FpuRegister ft); void MaxS(FpuRegister fd, FpuRegister fs, FpuRegister ft); void MaxD(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpUnS(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpEqS(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpUeqS(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpLtS(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpUltS(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpLeS(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpUleS(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpOrS(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpUneS(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpNeS(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpUnD(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpEqD(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpUeqD(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpLtD(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpUltD(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpLeD(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpUleD(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpOrD(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpUneD(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpNeD(FpuRegister fd, FpuRegister fs, FpuRegister ft); void Cvtsw(FpuRegister fd, FpuRegister fs); void Cvtdw(FpuRegister fd, FpuRegister fs); @@ -317,6 +339,8 @@ class Mips64Assembler FINAL : public Assembler { void Bnec(GpuRegister rs, GpuRegister rt, Mips64Label* label); void Beqzc(GpuRegister rs, Mips64Label* label); void Bnezc(GpuRegister rs, Mips64Label* label); + void Bc1eqz(FpuRegister ft, Mips64Label* label); + void Bc1nez(FpuRegister ft, Mips64Label* label); void EmitLoad(ManagedRegister m_dst, GpuRegister src_register, int32_t src_offset, size_t size); void LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base, int32_t offset); @@ -474,6 +498,8 @@ class Mips64Assembler FINAL : public Assembler { kCondNEZ, kCondLTU, kCondGEU, + kCondF, // Floating-point predicate false. + kCondT, // Floating-point predicate true. kUncond, }; friend std::ostream& operator<<(std::ostream& os, const BranchCondition& rhs); diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc index 29a5a88316..bac4375b35 100644 --- a/compiler/utils/mips64/assembler_mips64_test.cc +++ b/compiler/utils/mips64/assembler_mips64_test.cc @@ -403,6 +403,106 @@ TEST_F(AssemblerMIPS64Test, MaxD) { DriverStr(RepeatFFF(&mips64::Mips64Assembler::MaxD, "max.d ${reg1}, ${reg2}, ${reg3}"), "max.d"); } +TEST_F(AssemblerMIPS64Test, CmpUnS) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUnS, "cmp.un.s ${reg1}, ${reg2}, ${reg3}"), + "cmp.un.s"); +} + +TEST_F(AssemblerMIPS64Test, CmpEqS) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpEqS, "cmp.eq.s ${reg1}, ${reg2}, ${reg3}"), + "cmp.eq.s"); +} + +TEST_F(AssemblerMIPS64Test, CmpUeqS) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUeqS, "cmp.ueq.s ${reg1}, ${reg2}, ${reg3}"), + "cmp.ueq.s"); +} + +TEST_F(AssemblerMIPS64Test, CmpLtS) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpLtS, "cmp.lt.s ${reg1}, ${reg2}, ${reg3}"), + "cmp.lt.s"); +} + +TEST_F(AssemblerMIPS64Test, CmpUltS) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUltS, "cmp.ult.s ${reg1}, ${reg2}, ${reg3}"), + "cmp.ult.s"); +} + +TEST_F(AssemblerMIPS64Test, CmpLeS) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpLeS, "cmp.le.s ${reg1}, ${reg2}, ${reg3}"), + "cmp.le.s"); +} + +TEST_F(AssemblerMIPS64Test, CmpUleS) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUleS, "cmp.ule.s ${reg1}, ${reg2}, ${reg3}"), + "cmp.ule.s"); +} + +TEST_F(AssemblerMIPS64Test, CmpOrS) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpOrS, "cmp.or.s ${reg1}, ${reg2}, ${reg3}"), + "cmp.or.s"); +} + +TEST_F(AssemblerMIPS64Test, CmpUneS) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUneS, "cmp.une.s ${reg1}, ${reg2}, ${reg3}"), + "cmp.une.s"); +} + +TEST_F(AssemblerMIPS64Test, CmpNeS) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpNeS, "cmp.ne.s ${reg1}, ${reg2}, ${reg3}"), + "cmp.ne.s"); +} + +TEST_F(AssemblerMIPS64Test, CmpUnD) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUnD, "cmp.un.d ${reg1}, ${reg2}, ${reg3}"), + "cmp.un.d"); +} + +TEST_F(AssemblerMIPS64Test, CmpEqD) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpEqD, "cmp.eq.d ${reg1}, ${reg2}, ${reg3}"), + "cmp.eq.d"); +} + +TEST_F(AssemblerMIPS64Test, CmpUeqD) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUeqD, "cmp.ueq.d ${reg1}, ${reg2}, ${reg3}"), + "cmp.ueq.d"); +} + +TEST_F(AssemblerMIPS64Test, CmpLtD) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpLtD, "cmp.lt.d ${reg1}, ${reg2}, ${reg3}"), + "cmp.lt.d"); +} + +TEST_F(AssemblerMIPS64Test, CmpUltD) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUltD, "cmp.ult.d ${reg1}, ${reg2}, ${reg3}"), + "cmp.ult.d"); +} + +TEST_F(AssemblerMIPS64Test, CmpLeD) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpLeD, "cmp.le.d ${reg1}, ${reg2}, ${reg3}"), + "cmp.le.d"); +} + +TEST_F(AssemblerMIPS64Test, CmpUleD) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUleD, "cmp.ule.d ${reg1}, ${reg2}, ${reg3}"), + "cmp.ule.d"); +} + +TEST_F(AssemblerMIPS64Test, CmpOrD) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpOrD, "cmp.or.d ${reg1}, ${reg2}, ${reg3}"), + "cmp.or.d"); +} + +TEST_F(AssemblerMIPS64Test, CmpUneD) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUneD, "cmp.une.d ${reg1}, ${reg2}, ${reg3}"), + "cmp.une.d"); +} + +TEST_F(AssemblerMIPS64Test, CmpNeD) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpNeD, "cmp.ne.d ${reg1}, ${reg2}, ${reg3}"), + "cmp.ne.d"); +} + TEST_F(AssemblerMIPS64Test, CvtDL) { DriverStr(RepeatFF(&mips64::Mips64Assembler::Cvtdl, "cvt.d.l ${reg1}, ${reg2}"), "cvt.d.l"); } @@ -591,6 +691,58 @@ TEST_F(AssemblerMIPS64Test, Bgeuc) { BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bgeuc, "Bgeuc"); } +TEST_F(AssemblerMIPS64Test, Bc1eqz) { + mips64::Mips64Label label; + __ Bc1eqz(mips64::F0, &label); + constexpr size_t kAdduCount1 = 63; + for (size_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label); + constexpr size_t kAdduCount2 = 64; + for (size_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bc1eqz(mips64::F31, &label); + + std::string expected = + ".set noreorder\n" + "bc1eqz $f0, 1f\n" + "nop\n" + + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + + "1:\n" + + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + + "bc1eqz $f31, 1b\n" + "nop\n"; + DriverStr(expected, "Bc1eqz"); +} + +TEST_F(AssemblerMIPS64Test, Bc1nez) { + mips64::Mips64Label label; + __ Bc1nez(mips64::F0, &label); + constexpr size_t kAdduCount1 = 63; + for (size_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label); + constexpr size_t kAdduCount2 = 64; + for (size_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bc1nez(mips64::F31, &label); + + std::string expected = + ".set noreorder\n" + "bc1nez $f0, 1f\n" + "nop\n" + + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + + "1:\n" + + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + + "bc1nez $f31, 1b\n" + "nop\n"; + DriverStr(expected, "Bc1nez"); +} + TEST_F(AssemblerMIPS64Test, LongBeqc) { mips64::Mips64Label label; __ Beqc(mips64::A0, mips64::A1, &label); |