| /* |
| * Copyright (C) 2015 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #include "instruction_simplifier_shared.h" |
| |
| #include "code_generator.h" |
| #include "mirror/array-inl.h" |
| |
| namespace art HIDDEN { |
| |
| namespace { |
| |
| bool TrySimpleMultiplyAccumulatePatterns(HMul* mul, |
| HBinaryOperation* input_binop, |
| HInstruction* input_other) { |
| DCHECK(DataType::IsIntOrLongType(mul->GetType())); |
| DCHECK(input_binop->IsAdd() || input_binop->IsSub()); |
| DCHECK_NE(input_binop, input_other); |
| if (!input_binop->HasOnlyOneNonEnvironmentUse()) { |
| return false; |
| } |
| |
| // Try to interpret patterns like |
| // a * (b <+/-> 1) |
| // as |
| // (a * b) <+/-> a |
| HInstruction* input_a = input_other; |
| HInstruction* input_b = nullptr; // Set to a non-null value if we found a pattern to optimize. |
| HInstruction::InstructionKind op_kind; |
| |
| if (input_binop->IsAdd()) { |
| if ((input_binop->GetConstantRight() != nullptr) && input_binop->GetConstantRight()->IsOne()) { |
| // Interpret |
| // a * (b + 1) |
| // as |
| // (a * b) + a |
| input_b = input_binop->GetLeastConstantLeft(); |
| op_kind = HInstruction::kAdd; |
| } |
| } else { |
| DCHECK(input_binop->IsSub()); |
| if (input_binop->GetRight()->IsConstant() && |
| input_binop->GetRight()->AsConstant()->IsMinusOne()) { |
| // Interpret |
| // a * (b - (-1)) |
| // as |
| // a + (a * b) |
| input_b = input_binop->GetLeft(); |
| op_kind = HInstruction::kAdd; |
| } else if (input_binop->GetLeft()->IsConstant() && |
| input_binop->GetLeft()->AsConstant()->IsOne()) { |
| // Interpret |
| // a * (1 - b) |
| // as |
| // a - (a * b) |
| input_b = input_binop->GetRight(); |
| op_kind = HInstruction::kSub; |
| } |
| } |
| |
| if (input_b == nullptr) { |
| // We did not find a pattern we can optimize. |
| return false; |
| } |
| |
| ArenaAllocator* allocator = mul->GetBlock()->GetGraph()->GetAllocator(); |
| HMultiplyAccumulate* mulacc = new (allocator) HMultiplyAccumulate( |
| mul->GetType(), op_kind, input_a, input_a, input_b, mul->GetDexPc()); |
| |
| mul->GetBlock()->ReplaceAndRemoveInstructionWith(mul, mulacc); |
| input_binop->GetBlock()->RemoveInstruction(input_binop); |
| |
| return true; |
| } |
| |
| } // namespace |
| |
| bool TryCombineMultiplyAccumulate(HMul* mul, InstructionSet isa) { |
| DataType::Type type = mul->GetType(); |
| switch (isa) { |
| case InstructionSet::kArm: |
| case InstructionSet::kThumb2: |
| if (type != DataType::Type::kInt32) { |
| return false; |
| } |
| break; |
| case InstructionSet::kArm64: |
| if (!DataType::IsIntOrLongType(type)) { |
| return false; |
| } |
| break; |
| default: |
| return false; |
| } |
| |
| ArenaAllocator* allocator = mul->GetBlock()->GetGraph()->GetAllocator(); |
| |
| if (mul->HasOnlyOneNonEnvironmentUse()) { |
| HInstruction* use = mul->GetUses().front().GetUser(); |
| if (use->IsAdd() || use->IsSub()) { |
| // Replace code looking like |
| // MUL tmp, x, y |
| // SUB dst, acc, tmp |
| // with |
| // MULSUB dst, acc, x, y |
| // Note that we do not want to (unconditionally) perform the merge when the |
| // multiplication has multiple uses and it can be merged in all of them. |
| // Multiple uses could happen on the same control-flow path, and we would |
| // then increase the amount of work. In the future we could try to evaluate |
| // whether all uses are on different control-flow paths (using dominance and |
| // reverse-dominance information) and only perform the merge when they are. |
| HInstruction* accumulator = nullptr; |
| HBinaryOperation* binop = use->AsBinaryOperation(); |
| HInstruction* binop_left = binop->GetLeft(); |
| HInstruction* binop_right = binop->GetRight(); |
| // Be careful after GVN. This should not happen since the `HMul` has only |
| // one use. |
| DCHECK_NE(binop_left, binop_right); |
| if (binop_right == mul) { |
| accumulator = binop_left; |
| } else if (use->IsAdd()) { |
| DCHECK_EQ(binop_left, mul); |
| accumulator = binop_right; |
| } |
| |
| if (accumulator != nullptr) { |
| HMultiplyAccumulate* mulacc = |
| new (allocator) HMultiplyAccumulate(type, |
| binop->GetKind(), |
| accumulator, |
| mul->GetLeft(), |
| mul->GetRight()); |
| |
| binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc); |
| DCHECK(!mul->HasUses()); |
| mul->GetBlock()->RemoveInstruction(mul); |
| return true; |
| } |
| } else if (use->IsNeg() && isa != InstructionSet::kArm) { |
| HMultiplyAccumulate* mulacc = |
| new (allocator) HMultiplyAccumulate(type, |
| HInstruction::kSub, |
| mul->GetBlock()->GetGraph()->GetConstant(type, 0), |
| mul->GetLeft(), |
| mul->GetRight()); |
| |
| use->GetBlock()->ReplaceAndRemoveInstructionWith(use, mulacc); |
| DCHECK(!mul->HasUses()); |
| mul->GetBlock()->RemoveInstruction(mul); |
| return true; |
| } |
| } |
| |
| // Use multiply accumulate instruction for a few simple patterns. |
| // We prefer not applying the following transformations if the left and |
| // right inputs perform the same operation. |
| // We rely on GVN having squashed the inputs if appropriate. However the |
| // results are still correct even if that did not happen. |
| if (mul->GetLeft() == mul->GetRight()) { |
| return false; |
| } |
| |
| HInstruction* left = mul->GetLeft(); |
| HInstruction* right = mul->GetRight(); |
| if ((right->IsAdd() || right->IsSub()) && |
| TrySimpleMultiplyAccumulatePatterns(mul, right->AsBinaryOperation(), left)) { |
| return true; |
| } |
| if ((left->IsAdd() || left->IsSub()) && |
| TrySimpleMultiplyAccumulatePatterns(mul, left->AsBinaryOperation(), right)) { |
| return true; |
| } |
| return false; |
| } |
| |
| |
| bool TryMergeNegatedInput(HBinaryOperation* op) { |
| DCHECK(op->IsAnd() || op->IsOr() || op->IsXor()) << op->DebugName(); |
| HInstruction* left = op->GetLeft(); |
| HInstruction* right = op->GetRight(); |
| |
| // Only consider the case where there is exactly one Not, with 2 Not's De |
| // Morgan's laws should be applied instead. |
| if (left->IsNot() ^ right->IsNot()) { |
| HInstruction* hnot = (left->IsNot() ? left : right); |
| HInstruction* hother = (left->IsNot() ? right : left); |
| |
| // Only do the simplification if the Not has only one use and can thus be |
| // safely removed. Even though ARM64 negated bitwise operations do not have |
| // an immediate variant (only register), we still do the simplification when |
| // `hother` is a constant, because it removes an instruction if the constant |
| // cannot be encoded as an immediate: |
| // mov r0, #large_constant |
| // neg r2, r1 |
| // and r0, r0, r2 |
| // becomes: |
| // mov r0, #large_constant |
| // bic r0, r0, r1 |
| if (hnot->HasOnlyOneNonEnvironmentUse()) { |
| // Replace code looking like |
| // NOT tmp, mask |
| // AND dst, src, tmp (respectively ORR, EOR) |
| // with |
| // BIC dst, src, mask (respectively ORN, EON) |
| HInstruction* src = hnot->AsNot()->GetInput(); |
| |
| HBitwiseNegatedRight* neg_op = new (hnot->GetBlock()->GetGraph()->GetAllocator()) |
| HBitwiseNegatedRight(op->GetType(), op->GetKind(), hother, src, op->GetDexPc()); |
| |
| op->GetBlock()->ReplaceAndRemoveInstructionWith(op, neg_op); |
| hnot->GetBlock()->RemoveInstruction(hnot); |
| return true; |
| } |
| } |
| |
| return false; |
| } |
| |
| |
| bool TryExtractArrayAccessAddress(CodeGenerator* codegen, |
| HInstruction* access, |
| HInstruction* array, |
| HInstruction* index, |
| size_t data_offset) { |
| if (index->IsConstant() || |
| (index->IsBoundsCheck() && index->AsBoundsCheck()->GetIndex()->IsConstant())) { |
| // When the index is a constant all the addressing can be fitted in the |
| // memory access instruction, so do not split the access. |
| return false; |
| } |
| if (access->IsArraySet() && |
| access->AsArraySet()->GetValue()->GetType() == DataType::Type::kReference) { |
| // The access may require a runtime call or the original array pointer. |
| return false; |
| } |
| if (codegen->EmitNonBakerReadBarrier() && |
| access->IsArrayGet() && |
| access->GetType() == DataType::Type::kReference) { |
| // For object arrays, the non-Baker read barrier instrumentation requires |
| // the original array pointer. |
| return false; |
| } |
| |
| // Proceed to extract the base address computation. |
| HGraph* graph = access->GetBlock()->GetGraph(); |
| ArenaAllocator* allocator = graph->GetAllocator(); |
| |
| HIntConstant* offset = graph->GetIntConstant(data_offset); |
| HIntermediateAddress* address = new (allocator) HIntermediateAddress(array, offset, kNoDexPc); |
| // TODO: Is it ok to not have this on the intermediate address? |
| // address->SetReferenceTypeInfo(array->GetReferenceTypeInfo()); |
| access->GetBlock()->InsertInstructionBefore(address, access); |
| access->ReplaceInput(address, 0); |
| // Both instructions must depend on GC to prevent any instruction that can |
| // trigger GC to be inserted between the two. |
| access->AddSideEffects(SideEffects::DependsOnGC()); |
| DCHECK(address->GetSideEffects().Includes(SideEffects::DependsOnGC())); |
| DCHECK(access->GetSideEffects().Includes(SideEffects::DependsOnGC())); |
| // TODO: Code generation for HArrayGet and HArraySet will check whether the input address |
| // is an HIntermediateAddress and generate appropriate code. |
| // We would like to replace the `HArrayGet` and `HArraySet` with custom instructions (maybe |
| // `HArm64Load` and `HArm64Store`,`HArmLoad` and `HArmStore`). We defer these changes |
| // because these new instructions would not bring any advantages yet. |
| // Also see the comments in |
| // `InstructionCodeGeneratorARMVIXL::VisitArrayGet()` |
| // `InstructionCodeGeneratorARMVIXL::VisitArraySet()` |
| // `InstructionCodeGeneratorARM64::VisitArrayGet()` |
| // `InstructionCodeGeneratorARM64::VisitArraySet()`. |
| return true; |
| } |
| |
| bool TryExtractVecArrayAccessAddress(HVecMemoryOperation* access, HInstruction* index) { |
| if (index->IsConstant()) { |
| // If index is constant the whole address calculation often can be done by LDR/STR themselves. |
| // TODO: Treat the case with not-embedable constant. |
| return false; |
| } |
| |
| HGraph* graph = access->GetBlock()->GetGraph(); |
| ArenaAllocator* allocator = graph->GetAllocator(); |
| DataType::Type packed_type = access->GetPackedType(); |
| uint32_t data_offset = mirror::Array::DataOffset( |
| DataType::Size(packed_type)).Uint32Value(); |
| size_t component_shift = DataType::SizeShift(packed_type); |
| |
| bool is_extracting_beneficial = false; |
| // It is beneficial to extract index intermediate address only if there are at least 2 users. |
| for (const HUseListNode<HInstruction*>& use : index->GetUses()) { |
| HInstruction* user = use.GetUser(); |
| if (user->IsVecMemoryOperation() && user != access) { |
| HVecMemoryOperation* another_access = user->AsVecMemoryOperation(); |
| DataType::Type another_packed_type = another_access->GetPackedType(); |
| uint32_t another_data_offset = mirror::Array::DataOffset( |
| DataType::Size(another_packed_type)).Uint32Value(); |
| size_t another_component_shift = DataType::SizeShift(another_packed_type); |
| if (another_data_offset == data_offset && another_component_shift == component_shift) { |
| is_extracting_beneficial = true; |
| break; |
| } |
| } else if (user->IsIntermediateAddressIndex()) { |
| HIntermediateAddressIndex* another_access = user->AsIntermediateAddressIndex(); |
| uint32_t another_data_offset = another_access->GetOffset()->AsIntConstant()->GetValue(); |
| size_t another_component_shift = another_access->GetShift()->AsIntConstant()->GetValue(); |
| if (another_data_offset == data_offset && another_component_shift == component_shift) { |
| is_extracting_beneficial = true; |
| break; |
| } |
| } |
| } |
| |
| if (!is_extracting_beneficial) { |
| return false; |
| } |
| |
| // Proceed to extract the index + data_offset address computation. |
| HIntConstant* offset = graph->GetIntConstant(data_offset); |
| HIntConstant* shift = graph->GetIntConstant(component_shift); |
| HIntermediateAddressIndex* address = |
| new (allocator) HIntermediateAddressIndex(index, offset, shift, kNoDexPc); |
| |
| access->GetBlock()->InsertInstructionBefore(address, access); |
| access->ReplaceInput(address, 1); |
| |
| return true; |
| } |
| |
| bool TryReplaceSubSubWithSubAdd(HSub* last_sub) { |
| DCHECK(last_sub->GetRight()->IsSub()); |
| HBasicBlock* basic_block = last_sub->GetBlock(); |
| ArenaAllocator* allocator = basic_block->GetGraph()->GetAllocator(); |
| HInstruction* last_sub_right = last_sub->GetRight(); |
| HInstruction* last_sub_left = last_sub->GetLeft(); |
| if (last_sub_right->GetUses().HasExactlyOneElement()) { |
| // Reorder operands of last_sub_right: Sub(a, b) -> Sub(b, a). |
| HInstruction* a = last_sub_right->InputAt(0); |
| HInstruction* b = last_sub_right->InputAt(1); |
| last_sub_right->ReplaceInput(b, 0); |
| last_sub_right->ReplaceInput(a, 1); |
| |
| // Replace Sub(c, Sub(a, b)) with Add(c, Sub(b, a). |
| HAdd* add = new (allocator) HAdd(last_sub->GetType(), last_sub_left, last_sub_right); |
| basic_block->ReplaceAndRemoveInstructionWith(last_sub, add); |
| return true; |
| } else { |
| return false; |
| } |
| } |
| |
| } // namespace art |