diff options
-rw-r--r-- | compiler/optimizing/builder.cc | 30 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm.cc | 105 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm.h | 4 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86.cc | 138 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86.h | 4 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86_64.cc | 64 | ||||
-rw-r--r-- | compiler/optimizing/codegen_test.cc | 45 | ||||
-rw-r--r-- | compiler/optimizing/nodes.h | 19 | ||||
-rw-r--r-- | compiler/utils/x86_64/assembler_x86_64.cc | 29 | ||||
-rw-r--r-- | compiler/utils/x86_64/assembler_x86_64.h | 4 | ||||
-rw-r--r-- | compiler/utils/x86_64/assembler_x86_64_test.cc | 3 | ||||
-rw-r--r-- | test/411-optimizing-arith/expected.txt | 0 | ||||
-rw-r--r-- | test/411-optimizing-arith/info.txt | 1 | ||||
-rw-r--r-- | test/411-optimizing-arith/src/Main.java | 64 |
14 files changed, 481 insertions, 29 deletions
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index 5bcc65b03b..2648d4d670 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -713,6 +713,16 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 break; } + case Instruction::MUL_INT: { + Binop_23x<HMul>(instruction, Primitive::kPrimInt); + break; + } + + case Instruction::MUL_LONG: { + Binop_23x<HMul>(instruction, Primitive::kPrimLong); + break; + } + case Instruction::ADD_LONG_2ADDR: { Binop_12x<HAdd>(instruction, Primitive::kPrimLong); break; @@ -738,6 +748,16 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 break; } + case Instruction::MUL_INT_2ADDR: { + Binop_12x<HMul>(instruction, Primitive::kPrimInt); + break; + } + + case Instruction::MUL_LONG_2ADDR: { + Binop_12x<HMul>(instruction, Primitive::kPrimLong); + break; + } + case Instruction::ADD_INT_LIT16: { Binop_22s<HAdd>(instruction, false); break; @@ -748,6 +768,11 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 break; } + case Instruction::MUL_INT_LIT16: { + Binop_22s<HMul>(instruction, false); + break; + } + case Instruction::ADD_INT_LIT8: { Binop_22b<HAdd>(instruction, false); break; @@ -758,6 +783,11 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 break; } + case Instruction::MUL_INT_LIT8: { + Binop_22b<HMul>(instruction, false); + break; + } + case Instruction::NEW_INSTANCE: { current_block_->AddInstruction( new (arena_) HNewInstance(dex_offset, instruction.VRegB_21c())); diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index cdee845343..a2cf670b0f 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -236,19 +236,12 @@ Location CodeGeneratorARM::AllocateFreeRegister(Primitive::Type type) const { size_t reg = FindFreeEntry(blocked_register_pairs_, kNumberOfRegisterPairs); ArmManagedRegister pair = ArmManagedRegister::FromRegisterPair(static_cast<RegisterPair>(reg)); + DCHECK(!blocked_core_registers_[pair.AsRegisterPairLow()]); + DCHECK(!blocked_core_registers_[pair.AsRegisterPairHigh()]); + blocked_core_registers_[pair.AsRegisterPairLow()] = true; blocked_core_registers_[pair.AsRegisterPairHigh()] = true; - // Block all other register pairs that share a register with `pair`. - for (int i = 0; i < kNumberOfRegisterPairs; i++) { - ArmManagedRegister current = - ArmManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i)); - if (current.AsRegisterPairLow() == pair.AsRegisterPairLow() - || current.AsRegisterPairLow() == pair.AsRegisterPairHigh() - || current.AsRegisterPairHigh() == pair.AsRegisterPairLow() - || current.AsRegisterPairHigh() == pair.AsRegisterPairHigh()) { - blocked_register_pairs_[i] = true; - } - } + UpdateBlockedPairRegisters(); return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh()); } @@ -294,7 +287,6 @@ void CodeGeneratorARM::SetupBlockedRegisters() const { // Reserve R4 for suspend check. blocked_core_registers_[R4] = true; - blocked_register_pairs_[R4_R5] = true; // Reserve thread register. blocked_core_registers_[TR] = true; @@ -318,6 +310,19 @@ void CodeGeneratorARM::SetupBlockedRegisters() const { blocked_fpu_registers_[D13] = true; blocked_fpu_registers_[D14] = true; blocked_fpu_registers_[D15] = true; + + UpdateBlockedPairRegisters(); +} + +void CodeGeneratorARM::UpdateBlockedPairRegisters() const { + for (int i = 0; i < kNumberOfRegisterPairs; i++) { + ArmManagedRegister current = + ArmManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i)); + if (blocked_core_registers_[current.AsRegisterPairLow()] + || blocked_core_registers_[current.AsRegisterPairHigh()]) { + blocked_register_pairs_[i] = true; + } + } } InstructionCodeGeneratorARM::InstructionCodeGeneratorARM(HGraph* graph, CodeGeneratorARM* codegen) @@ -1139,6 +1144,82 @@ void InstructionCodeGeneratorARM::VisitSub(HSub* sub) { } } +void LocationsBuilderARM::VisitMul(HMul* mul) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(mul, LocationSummary::kNoCall); + switch (mul->GetResultType()) { + case Primitive::kPrimInt: + case Primitive::kPrimLong: { + locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry); + locations->SetInAt(1, Location::RequiresRegister(), Location::kDiesAtEntry); + locations->SetOut(Location::RequiresRegister()); + break; + } + + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + LOG(FATAL) << "Unexpected mul type " << mul->GetResultType(); + break; + + default: + LOG(FATAL) << "Unimplemented mul type " << mul->GetResultType(); + } +} + +void InstructionCodeGeneratorARM::VisitMul(HMul* mul) { + LocationSummary* locations = mul->GetLocations(); + Location out = locations->Out(); + Location first = locations->InAt(0); + Location second = locations->InAt(1); + switch (mul->GetResultType()) { + case Primitive::kPrimInt: { + __ mul(out.As<Register>(), first.As<Register>(), second.As<Register>()); + break; + } + case Primitive::kPrimLong: { + Register out_hi = out.AsRegisterPairHigh<Register>(); + Register out_lo = out.AsRegisterPairLow<Register>(); + Register in1_hi = first.AsRegisterPairHigh<Register>(); + Register in1_lo = first.AsRegisterPairLow<Register>(); + Register in2_hi = second.AsRegisterPairHigh<Register>(); + Register in2_lo = second.AsRegisterPairLow<Register>(); + + // Extra checks to protect caused by the existence of R1_R2. + // The algorithm is wrong if out.hi is either in1.lo or in2.lo: + // (e.g. in1=r0_r1, in2=r2_r3 and out=r1_r2); + DCHECK_NE(out_hi, in1_lo); + DCHECK_NE(out_hi, in2_lo); + + // input: in1 - 64 bits, in2 - 64 bits + // output: out + // formula: out.hi : out.lo = (in1.lo * in2.hi + in1.hi * in2.lo)* 2^32 + in1.lo * in2.lo + // parts: out.hi = in1.lo * in2.hi + in1.hi * in2.lo + (in1.lo * in2.lo)[63:32] + // parts: out.lo = (in1.lo * in2.lo)[31:0] + + // IP <- in1.lo * in2.hi + __ mul(IP, in1_lo, in2_hi); + // out.hi <- in1.lo * in2.hi + in1.hi * in2.lo + __ mla(out_hi, in1_hi, in2_lo, IP); + // out.lo <- (in1.lo * in2.lo)[31:0]; + __ umull(out_lo, IP, in1_lo, in2_lo); + // out.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32] + __ add(out_hi, out_hi, ShifterOperand(IP)); + break; + } + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + LOG(FATAL) << "Unexpected mul type " << mul->GetResultType(); + break; + + default: + LOG(FATAL) << "Unimplemented mul type " << mul->GetResultType(); + } +} + void LocationsBuilderARM::VisitNewInstance(HNewInstance* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 7c063f1728..57b289c801 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -164,6 +164,7 @@ class CodeGeneratorARM : public CodeGenerator { } virtual void SetupBlockedRegisters() const OVERRIDE; + virtual Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE; virtual Location GetStackLocation(HLoadLocal* load) const OVERRIDE; @@ -171,6 +172,9 @@ class CodeGeneratorARM : public CodeGenerator { virtual void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE; virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE; + // Blocks all register pairs made out of blocked core registers. + void UpdateBlockedPairRegisters() const; + ParallelMoveResolverARM* GetMoveResolver() { return &move_resolver_; } diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 98d3ad4185..041acdf91e 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -207,19 +207,11 @@ Location CodeGeneratorX86::AllocateFreeRegister(Primitive::Type type) const { size_t reg = FindFreeEntry(blocked_register_pairs_, kNumberOfRegisterPairs); X86ManagedRegister pair = X86ManagedRegister::FromRegisterPair(static_cast<RegisterPair>(reg)); + DCHECK(!blocked_core_registers_[pair.AsRegisterPairLow()]); + DCHECK(!blocked_core_registers_[pair.AsRegisterPairHigh()]); blocked_core_registers_[pair.AsRegisterPairLow()] = true; blocked_core_registers_[pair.AsRegisterPairHigh()] = true; - // Block all other register pairs that share a register with `pair`. - for (int i = 0; i < kNumberOfRegisterPairs; i++) { - X86ManagedRegister current = - X86ManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i)); - if (current.AsRegisterPairLow() == pair.AsRegisterPairLow() - || current.AsRegisterPairLow() == pair.AsRegisterPairHigh() - || current.AsRegisterPairHigh() == pair.AsRegisterPairLow() - || current.AsRegisterPairHigh() == pair.AsRegisterPairHigh()) { - blocked_register_pairs_[i] = true; - } - } + UpdateBlockedPairRegisters(); return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh()); } @@ -266,10 +258,19 @@ void CodeGeneratorX86::SetupBlockedRegisters() const { blocked_core_registers_[EBP] = true; blocked_core_registers_[ESI] = true; blocked_core_registers_[EDI] = true; - blocked_register_pairs_[EAX_EDI] = true; - blocked_register_pairs_[EDX_EDI] = true; - blocked_register_pairs_[ECX_EDI] = true; - blocked_register_pairs_[EBX_EDI] = true; + + UpdateBlockedPairRegisters(); +} + +void CodeGeneratorX86::UpdateBlockedPairRegisters() const { + for (int i = 0; i < kNumberOfRegisterPairs; i++) { + X86ManagedRegister current = + X86ManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i)); + if (blocked_core_registers_[current.AsRegisterPairLow()] + || blocked_core_registers_[current.AsRegisterPairHigh()]) { + blocked_register_pairs_[i] = true; + } + } } InstructionCodeGeneratorX86::InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen) @@ -1118,6 +1119,113 @@ void InstructionCodeGeneratorX86::VisitSub(HSub* sub) { } } +void LocationsBuilderX86::VisitMul(HMul* mul) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(mul, LocationSummary::kNoCall); + switch (mul->GetResultType()) { + case Primitive::kPrimInt: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::Any()); + locations->SetOut(Location::SameAsFirstInput()); + break; + case Primitive::kPrimLong: { + locations->SetInAt(0, Location::RequiresRegister()); + // TODO: Currently this handles only stack operands: + // - we don't have enough registers because we currently use Quick ABI. + // - by the time we have a working register allocator we will probably change the ABI + // and fix the above. + // - we don't have a way yet to request operands on stack but the base line compiler + // will leave the operands on the stack with Any(). + locations->SetInAt(1, Location::Any()); + locations->SetOut(Location::SameAsFirstInput()); + // Needed for imul on 32bits with 64bits output. + locations->AddTemp(Location::RegisterLocation(EAX)); + locations->AddTemp(Location::RegisterLocation(EDX)); + break; + } + + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + LOG(FATAL) << "Unexpected mul type " << mul->GetResultType(); + break; + + default: + LOG(FATAL) << "Unimplemented mul type " << mul->GetResultType(); + } +} + +void InstructionCodeGeneratorX86::VisitMul(HMul* mul) { + LocationSummary* locations = mul->GetLocations(); + Location first = locations->InAt(0); + Location second = locations->InAt(1); + DCHECK(first.Equals(locations->Out())); + + switch (mul->GetResultType()) { + case Primitive::kPrimInt: { + if (second.IsRegister()) { + __ imull(first.As<Register>(), second.As<Register>()); + } else if (second.IsConstant()) { + Immediate imm(second.GetConstant()->AsIntConstant()->GetValue()); + __ imull(first.As<Register>(), imm); + } else { + DCHECK(second.IsStackSlot()); + __ imull(first.As<Register>(), Address(ESP, second.GetStackIndex())); + } + break; + } + + case Primitive::kPrimLong: { + DCHECK(second.IsDoubleStackSlot()); + + Register in1_hi = first.AsRegisterPairHigh<Register>(); + Register in1_lo = first.AsRegisterPairLow<Register>(); + Address in2_hi(ESP, second.GetHighStackIndex(kX86WordSize)); + Address in2_lo(ESP, second.GetStackIndex()); + Register eax = locations->GetTemp(0).As<Register>(); + Register edx = locations->GetTemp(1).As<Register>(); + + DCHECK_EQ(EAX, eax); + DCHECK_EQ(EDX, edx); + + // input: in1 - 64 bits, in2 - 64 bits + // output: in1 + // formula: in1.hi : in1.lo = (in1.lo * in2.hi + in1.hi * in2.lo)* 2^32 + in1.lo * in2.lo + // parts: in1.hi = in1.lo * in2.hi + in1.hi * in2.lo + (in1.lo * in2.lo)[63:32] + // parts: in1.lo = (in1.lo * in2.lo)[31:0] + + __ movl(eax, in2_hi); + // eax <- in1.lo * in2.hi + __ imull(eax, in1_lo); + // in1.hi <- in1.hi * in2.lo + __ imull(in1_hi, in2_lo); + // in1.hi <- in1.lo * in2.hi + in1.hi * in2.lo + __ addl(in1_hi, eax); + // move in1_lo to eax to prepare for double precision + __ movl(eax, in1_lo); + // edx:eax <- in1.lo * in2.lo + __ mull(in2_lo); + // in1.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32] + __ addl(in1_hi, edx); + // in1.lo <- (in1.lo * in2.lo)[31:0]; + __ movl(in1_lo, eax); + + break; + } + + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + LOG(FATAL) << "Unexpected mul type " << mul->GetResultType(); + break; + + default: + LOG(FATAL) << "Unimplemented mul type " << mul->GetResultType(); + } +} + void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index aa5fee00e0..db8b9abd91 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -166,6 +166,7 @@ class CodeGeneratorX86 : public CodeGenerator { } virtual void SetupBlockedRegisters() const OVERRIDE; + virtual Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE; virtual Location GetStackLocation(HLoadLocal* load) const OVERRIDE; @@ -173,6 +174,9 @@ class CodeGeneratorX86 : public CodeGenerator { virtual void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE; virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE; + // Blocks all register pairs made out of blocked core registers. + void UpdateBlockedPairRegisters() const; + ParallelMoveResolverX86* GetMoveResolver() { return &move_resolver_; } diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 059ff3fa79..5fa930512b 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -1080,6 +1080,70 @@ void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) { } } +void LocationsBuilderX86_64::VisitMul(HMul* mul) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(mul, LocationSummary::kNoCall); + switch (mul->GetResultType()) { + case Primitive::kPrimInt: { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::Any()); + locations->SetOut(Location::SameAsFirstInput()); + break; + } + case Primitive::kPrimLong: { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + } + + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + LOG(FATAL) << "Unexpected mul type " << mul->GetResultType(); + break; + + default: + LOG(FATAL) << "Unimplemented mul type " << mul->GetResultType(); + } +} + +void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) { + LocationSummary* locations = mul->GetLocations(); + Location first = locations->InAt(0); + Location second = locations->InAt(1); + DCHECK(first.Equals(locations->Out())); + switch (mul->GetResultType()) { + case Primitive::kPrimInt: { + if (second.IsRegister()) { + __ imull(first.As<CpuRegister>(), second.As<CpuRegister>()); + } else if (second.IsConstant()) { + Immediate imm(second.GetConstant()->AsIntConstant()->GetValue()); + __ imull(first.As<CpuRegister>(), imm); + } else { + DCHECK(second.IsStackSlot()); + __ imull(first.As<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex())); + } + break; + } + case Primitive::kPrimLong: { + __ imulq(first.As<CpuRegister>(), second.As<CpuRegister>()); + break; + } + + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + LOG(FATAL) << "Unexpected mul type " << mul->GetResultType(); + break; + + default: + LOG(FATAL) << "Unimplemented mul type " << mul->GetResultType(); + } +} + void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc index 3037f1c2e8..8bb12de387 100644 --- a/compiler/optimizing/codegen_test.cc +++ b/compiler/optimizing/codegen_test.cc @@ -349,4 +349,49 @@ TEST(CodegenTest, NonMaterializedCondition) { RunCodeOptimized(graph, hook_before_codegen, true, 0); } +#define MUL_TEST(TYPE, TEST_NAME) \ + TEST(CodegenTest, Return ## TEST_NAME) { \ + const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( \ + Instruction::CONST_4 | 3 << 12 | 0, \ + Instruction::CONST_4 | 4 << 12 | 1 << 8, \ + Instruction::MUL_ ## TYPE, 1 << 8 | 0, \ + Instruction::RETURN); \ + \ + TestCode(data, true, 12); \ + } \ + \ + TEST(CodegenTest, Return ## TEST_NAME ## 2addr) { \ + const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( \ + Instruction::CONST_4 | 3 << 12 | 0, \ + Instruction::CONST_4 | 4 << 12 | 1 << 8, \ + Instruction::MUL_ ## TYPE ## _2ADDR | 1 << 12, \ + Instruction::RETURN); \ + \ + TestCode(data, true, 12); \ + } + +MUL_TEST(INT, MulInt); +MUL_TEST(LONG, MulLong); +// MUL_TEST(FLOAT, Float); +// MUL_TEST(DOUBLE, Double); + +TEST(CodegenTest, ReturnMulIntLit8) { + const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + Instruction::CONST_4 | 4 << 12 | 0 << 8, + Instruction::MUL_INT_LIT8, 3 << 8 | 0, + Instruction::RETURN); + + TestCode(data, true, 12); +} + +TEST(CodegenTest, ReturnMulIntLit16) { + const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + Instruction::CONST_4 | 4 << 12 | 0 << 8, + Instruction::MUL_INT_LIT16, 3, + Instruction::RETURN); + + TestCode(data, true, 12); +} + + } // namespace art diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index e60a7e62db..ec26c4a4dc 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -502,11 +502,12 @@ class HBasicBlock : public ArenaObject { M(NullCheck, Instruction) \ M(Temporary, Instruction) \ M(SuspendCheck, Instruction) \ + M(Mul, BinaryOperation) \ #define FOR_EACH_INSTRUCTION(M) \ FOR_EACH_CONCRETE_INSTRUCTION(M) \ M(Constant, Instruction) \ - M(BinaryOperation, Instruction) \ + M(BinaryOperation, Instruction) \ M(Invoke, Instruction) #define FORWARD_DECLARATION(type, super) class H##type; @@ -1556,6 +1557,22 @@ class HSub : public HBinaryOperation { DISALLOW_COPY_AND_ASSIGN(HSub); }; +class HMul : public HBinaryOperation { + public: + HMul(Primitive::Type result_type, HInstruction* left, HInstruction* right) + : HBinaryOperation(result_type, left, right) {} + + virtual bool IsCommutative() { return true; } + + virtual int32_t Evaluate(int32_t x, int32_t y) const { return x * y; } + virtual int64_t Evaluate(int64_t x, int64_t y) const { return x * y; } + + DECLARE_INSTRUCTION(Mul); + + private: + DISALLOW_COPY_AND_ASSIGN(HMul); +}; + // The value of a parameter in this method. Its location depends on // the calling convention. class HParameterValue : public HExpression<0> { diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index 75823e336b..db7151c3c1 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -1238,6 +1238,34 @@ void X86_64Assembler::imull(CpuRegister reg, const Address& address) { } +void X86_64Assembler::imulq(CpuRegister dst, CpuRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitRex64(dst, src); + EmitUint8(0x0F); + EmitUint8(0xAF); + EmitRegisterOperand(dst.LowBits(), src.LowBits()); +} + + +void X86_64Assembler::imulq(CpuRegister reg, const Immediate& imm) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + CHECK(imm.is_int32()); // imulq only supports 32b immediate. + EmitRex64(reg); + EmitUint8(0x69); + EmitOperand(reg.LowBits(), Operand(reg)); + EmitImmediate(imm); +} + + +void X86_64Assembler::imulq(CpuRegister reg, const Address& address) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitRex64(reg, address); + EmitUint8(0x0F); + EmitUint8(0xAF); + EmitOperand(reg.LowBits(), address); +} + + void X86_64Assembler::imull(CpuRegister reg) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitOptionalRex32(reg); @@ -1270,7 +1298,6 @@ void X86_64Assembler::mull(const Address& address) { } - void X86_64Assembler::shll(CpuRegister reg, const Immediate& imm) { EmitGenericShift(false, 4, reg, imm); } diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index 1d9eba446a..4ffb6b5663 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -436,6 +436,10 @@ class X86_64Assembler FINAL : public Assembler { void imull(CpuRegister reg, const Immediate& imm); void imull(CpuRegister reg, const Address& address); + void imulq(CpuRegister dst, CpuRegister src); + void imulq(CpuRegister reg, const Immediate& imm); + void imulq(CpuRegister reg, const Address& address); + void imull(CpuRegister reg); void imull(const Address& address); diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc index 7a48b638e7..69a5fa0dba 100644 --- a/compiler/utils/x86_64/assembler_x86_64_test.cc +++ b/compiler/utils/x86_64/assembler_x86_64_test.cc @@ -112,6 +112,9 @@ TEST_F(AssemblerX86_64Test, AddqImm) { DriverStr(RepeatRI(&x86_64::X86_64Assembler::addq, 4U, "addq ${imm}, %{reg}"), "addqi"); } +TEST_F(AssemblerX86_64Test, ImulqRegs) { + DriverStr(RepeatRR(&x86_64::X86_64Assembler::imulq, "imulq %{reg2}, %{reg1}"), "imulq"); +} TEST_F(AssemblerX86_64Test, SubqRegs) { DriverStr(RepeatRR(&x86_64::X86_64Assembler::subq, "subq %{reg2}, %{reg1}"), "subq"); diff --git a/test/411-optimizing-arith/expected.txt b/test/411-optimizing-arith/expected.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/test/411-optimizing-arith/expected.txt diff --git a/test/411-optimizing-arith/info.txt b/test/411-optimizing-arith/info.txt new file mode 100644 index 0000000000..10155512f0 --- /dev/null +++ b/test/411-optimizing-arith/info.txt @@ -0,0 +1 @@ +Tests for basic arithmethic operations. diff --git a/test/411-optimizing-arith/src/Main.java b/test/411-optimizing-arith/src/Main.java new file mode 100644 index 0000000000..74c47a606c --- /dev/null +++ b/test/411-optimizing-arith/src/Main.java @@ -0,0 +1,64 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Note that $opt$ is a marker for the optimizing compiler to ensure +// it does compile the method. + +public class Main { + + public static void expectEquals(int expected, int result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } + + public static void expectEquals(long expected, long result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } + + public static void main(String[] args) { + mul(); + } + + public static void mul() { + expectEquals(15, $opt$Mul(5, 3)); + expectEquals(0, $opt$Mul(0, 3)); + expectEquals(0, $opt$Mul(3, 0)); + expectEquals(-3, $opt$Mul(1, -3)); + expectEquals(36, $opt$Mul(-12, -3)); + expectEquals(33, $opt$Mul(1, 3) * 11); + expectEquals(671088645, $opt$Mul(134217729, 5)); // (2^27 + 1) * 5 + + expectEquals(15L, $opt$Mul(5L, 3L)); + expectEquals(0L, $opt$Mul(0L, 3L)); + expectEquals(0L, $opt$Mul(3L, 0L)); + expectEquals(-3L, $opt$Mul(1L, -3L)); + expectEquals(36L, $opt$Mul(-12L, -3L)); + expectEquals(33L, $opt$Mul(1L, 3L) * 11); + expectEquals(240518168583L, $opt$Mul(34359738369L, 7L)); // (2^35 + 1) * 7 + } + + static int $opt$Mul(int a, int b) { + return a * b; + } + + static long $opt$Mul(long a, long b) { + return a * b; + } + +} |