diff options
Diffstat (limited to 'compiler/optimizing')
-rw-r--r-- | compiler/optimizing/code_generator.cc | 42 | ||||
-rw-r--r-- | compiler/optimizing/instruction_simplifier.cc | 59 | ||||
-rw-r--r-- | compiler/optimizing/load_store_elimination.cc | 91 | ||||
-rw-r--r-- | compiler/optimizing/loop_optimization.cc | 3 | ||||
-rw-r--r-- | compiler/optimizing/nodes.h | 1 | ||||
-rw-r--r-- | compiler/optimizing/nodes_vector.h | 15 | ||||
-rw-r--r-- | compiler/optimizing/ssa_liveness_analysis.cc | 7 |
7 files changed, 183 insertions, 35 deletions
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index aff6f9f64f..45eec6d744 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -981,21 +981,6 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, } } - uint32_t outer_dex_pc = dex_pc; - uint32_t outer_environment_size = 0; - uint32_t inlining_depth = 0; - if (instruction != nullptr) { - for (HEnvironment* environment = instruction->GetEnvironment(); - environment != nullptr; - environment = environment->GetParent()) { - outer_dex_pc = environment->GetDexPc(); - outer_environment_size = environment->Size(); - if (environment != instruction->GetEnvironment()) { - inlining_depth++; - } - } - } - // Collect PC infos for the mapping table. uint32_t native_pc = GetAssembler()->CodePosition(); @@ -1003,12 +988,12 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, if (instruction == nullptr) { // For stack overflow checks and native-debug-info entries without dex register // mapping (i.e. start of basic block or start of slow path). - stack_map_stream->BeginStackMapEntry(outer_dex_pc, native_pc, 0, 0, 0, 0); + stack_map_stream->BeginStackMapEntry(dex_pc, native_pc, 0, 0, 0, 0); stack_map_stream->EndStackMapEntry(); return; } - LocationSummary* locations = instruction->GetLocations(); + LocationSummary* locations = instruction->GetLocations(); uint32_t register_mask = locations->GetRegisterMask(); DCHECK_EQ(register_mask & ~locations->GetLiveRegisters()->GetCoreRegisters(), 0u); if (locations->OnlyCallsOnSlowPath()) { @@ -1023,22 +1008,33 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, // The register mask must be a subset of callee-save registers. DCHECK_EQ(register_mask & core_callee_save_mask_, register_mask); } + + uint32_t outer_dex_pc = dex_pc; + uint32_t outer_environment_size = 0u; + uint32_t inlining_depth = 0; + HEnvironment* const environment = instruction->GetEnvironment(); + if (environment != nullptr) { + HEnvironment* outer_environment = environment; + while (outer_environment->GetParent() != nullptr) { + outer_environment = outer_environment->GetParent(); + ++inlining_depth; + } + outer_dex_pc = outer_environment->GetDexPc(); + outer_environment_size = outer_environment->Size(); + } stack_map_stream->BeginStackMapEntry(outer_dex_pc, native_pc, register_mask, locations->GetStackMask(), outer_environment_size, inlining_depth); - - HEnvironment* const environment = instruction->GetEnvironment(); EmitEnvironment(environment, slow_path); // Record invoke info, the common case for the trampoline is super and static invokes. Only // record these to reduce oat file size. if (kEnableDexLayoutOptimizations) { - if (environment != nullptr && - instruction->IsInvoke() && - instruction->IsInvokeStaticOrDirect()) { - HInvoke* const invoke = instruction->AsInvoke(); + if (instruction->IsInvokeStaticOrDirect()) { + HInvoke* const invoke = instruction->AsInvokeStaticOrDirect(); + DCHECK(environment != nullptr); stack_map_stream->AddInvoke(invoke->GetInvokeType(), invoke->GetDexMethodIndex()); } } diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index a65d7b1984..a42a85dc1d 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -1081,6 +1081,58 @@ static inline bool TryReplaceFieldOrArrayGetType(HInstruction* maybe_get, DataTy } } +// The type conversion is only used for storing into a field/element of the +// same/narrower size. +static bool IsTypeConversionForStoringIntoNoWiderFieldOnly(HTypeConversion* type_conversion) { + if (type_conversion->HasEnvironmentUses()) { + return false; + } + DataType::Type input_type = type_conversion->GetInputType(); + DataType::Type result_type = type_conversion->GetResultType(); + if (!DataType::IsIntegralType(input_type) || + !DataType::IsIntegralType(result_type) || + input_type == DataType::Type::kInt64 || + result_type == DataType::Type::kInt64) { + // Type conversion is needed if non-integer types are involved, or 64-bit + // types are involved, which may use different number of registers. + return false; + } + if (DataType::Size(input_type) >= DataType::Size(result_type)) { + // Type conversion is not necessary when storing to a field/element of the + // same/smaller size. + } else { + // We do not handle this case here. + return false; + } + + // Check if the converted value is only used for storing into heap. + for (const HUseListNode<HInstruction*>& use : type_conversion->GetUses()) { + HInstruction* instruction = use.GetUser(); + if (instruction->IsInstanceFieldSet() && + instruction->AsInstanceFieldSet()->GetFieldType() == result_type) { + DCHECK_EQ(instruction->AsInstanceFieldSet()->GetValue(), type_conversion); + continue; + } + if (instruction->IsStaticFieldSet() && + instruction->AsStaticFieldSet()->GetFieldType() == result_type) { + DCHECK_EQ(instruction->AsStaticFieldSet()->GetValue(), type_conversion); + continue; + } + if (instruction->IsArraySet() && + instruction->AsArraySet()->GetComponentType() == result_type && + // not index use. + instruction->AsArraySet()->GetIndex() != type_conversion) { + DCHECK_EQ(instruction->AsArraySet()->GetValue(), type_conversion); + continue; + } + // The use is not as a store value, or the field/element type is not the + // same as the result_type, keep the type conversion. + return false; + } + // Codegen automatically handles the type conversion during the store. + return true; +} + void InstructionSimplifierVisitor::VisitTypeConversion(HTypeConversion* instruction) { HInstruction* input = instruction->GetInput(); DataType::Type input_type = input->GetType(); @@ -1169,6 +1221,13 @@ void InstructionSimplifierVisitor::VisitTypeConversion(HTypeConversion* instruct return; } } + + if (IsTypeConversionForStoringIntoNoWiderFieldOnly(instruction)) { + instruction->ReplaceWith(input); + instruction->GetBlock()->RemoveInstruction(instruction); + RecordSimplification(); + return; + } } void InstructionSimplifierVisitor::VisitAdd(HAdd* instruction) { diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc index f03fffabe2..88326d321b 100644 --- a/compiler/optimizing/load_store_elimination.cc +++ b/compiler/optimizing/load_store_elimination.cc @@ -74,6 +74,20 @@ class LSEVisitor : public HGraphDelegateVisitor { HGraphVisitor::VisitBasicBlock(block); } + HTypeConversion* AddTypeConversionIfNecessary(HInstruction* instruction, + HInstruction* value, + DataType::Type expected_type) { + HTypeConversion* type_conversion = nullptr; + // Should never add type conversion into boolean value. + if (expected_type != DataType::Type::kBool && + !DataType::IsTypeConversionImplicit(value->GetType(), expected_type)) { + type_conversion = new (GetGraph()->GetAllocator()) HTypeConversion( + expected_type, value, instruction->GetDexPc()); + instruction->GetBlock()->InsertInstructionBefore(type_conversion, instruction); + } + return type_conversion; + } + // Find an instruction's substitute if it should be removed. // Return the same instruction if it should not be removed. HInstruction* FindSubstitute(HInstruction* instruction) { @@ -86,13 +100,59 @@ class LSEVisitor : public HGraphDelegateVisitor { return instruction; } + void AddRemovedLoad(HInstruction* load, HInstruction* heap_value) { + DCHECK_EQ(FindSubstitute(heap_value), heap_value) << + "Unexpected heap_value that has a substitute " << heap_value->DebugName(); + removed_loads_.push_back(load); + substitute_instructions_for_loads_.push_back(heap_value); + } + + // Scan the list of removed loads to see if we can reuse `type_conversion`, if + // the other removed load has the same substitute and type and is dominated + // by `type_conversioni`. + void TryToReuseTypeConversion(HInstruction* type_conversion, size_t index) { + size_t size = removed_loads_.size(); + HInstruction* load = removed_loads_[index]; + HInstruction* substitute = substitute_instructions_for_loads_[index]; + for (size_t j = index + 1; j < size; j++) { + HInstruction* load2 = removed_loads_[j]; + HInstruction* substitute2 = substitute_instructions_for_loads_[j]; + if (load2 == nullptr) { + DCHECK(substitute2->IsTypeConversion()); + continue; + } + DCHECK(load2->IsInstanceFieldGet() || + load2->IsStaticFieldGet() || + load2->IsArrayGet()); + DCHECK(substitute2 != nullptr); + if (substitute2 == substitute && + load2->GetType() == load->GetType() && + type_conversion->GetBlock()->Dominates(load2->GetBlock()) && + // Don't share across irreducible loop headers. + // TODO: can be more fine-grained than this by testing each dominator. + (load2->GetBlock() == type_conversion->GetBlock() || + !GetGraph()->HasIrreducibleLoops())) { + // The removed_loads_ are added in reverse post order. + DCHECK(type_conversion->StrictlyDominates(load2)); + load2->ReplaceWith(type_conversion); + load2->GetBlock()->RemoveInstruction(load2); + removed_loads_[j] = nullptr; + substitute_instructions_for_loads_[j] = type_conversion; + } + } + } + // Remove recorded instructions that should be eliminated. void RemoveInstructions() { size_t size = removed_loads_.size(); DCHECK_EQ(size, substitute_instructions_for_loads_.size()); for (size_t i = 0; i < size; i++) { HInstruction* load = removed_loads_[i]; - DCHECK(load != nullptr); + if (load == nullptr) { + // The load has been handled in the scan for type conversion below. + DCHECK(substitute_instructions_for_loads_[i]->IsTypeConversion()); + continue; + } DCHECK(load->IsInstanceFieldGet() || load->IsStaticFieldGet() || load->IsArrayGet()); @@ -102,7 +162,28 @@ class LSEVisitor : public HGraphDelegateVisitor { // a load that has a substitute should not be observed as a heap // location value. DCHECK_EQ(FindSubstitute(substitute), substitute); - load->ReplaceWith(substitute); + + // The load expects to load the heap value as type load->GetType(). + // However the tracked heap value may not be of that type. An explicit + // type conversion may be needed. + // There are actually three types involved here: + // (1) tracked heap value's type (type A) + // (2) heap location (field or element)'s type (type B) + // (3) load's type (type C) + // We guarantee that type A stored as type B and then fetched out as + // type C is the same as casting from type A to type C directly, since + // type B and type C will have the same size which is guarenteed in + // HInstanceFieldGet/HStaticFieldGet/HArrayGet's SetType(). + // So we only need one type conversion from type A to type C. + HTypeConversion* type_conversion = AddTypeConversionIfNecessary( + load, substitute, load->GetType()); + if (type_conversion != nullptr) { + TryToReuseTypeConversion(type_conversion, i); + load->ReplaceWith(type_conversion); + substitute_instructions_for_loads_[i] = type_conversion; + } else { + load->ReplaceWith(substitute); + } load->GetBlock()->RemoveInstruction(load); } @@ -338,8 +419,7 @@ class LSEVisitor : public HGraphDelegateVisitor { HInstruction* heap_value = heap_values[idx]; if (heap_value == kDefaultHeapValue) { HInstruction* constant = GetDefaultValue(instruction->GetType()); - removed_loads_.push_back(instruction); - substitute_instructions_for_loads_.push_back(constant); + AddRemovedLoad(instruction, constant); heap_values[idx] = constant; return; } @@ -374,8 +454,7 @@ class LSEVisitor : public HGraphDelegateVisitor { } return; } - removed_loads_.push_back(instruction); - substitute_instructions_for_loads_.push_back(heap_value); + AddRemovedLoad(instruction, heap_value); TryRemovingNullCheck(instruction); } } diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index 1ca096035e..3dc1ef7534 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -1749,7 +1749,8 @@ void HLoopOptimization::GenerateVecReductionPhiInputs(HPhi* phi, HInstruction* r HInstruction* HLoopOptimization::ReduceAndExtractIfNeeded(HInstruction* instruction) { if (instruction->IsPhi()) { HInstruction* input = instruction->InputAt(1); - if (input->IsVecOperation() && !input->IsVecExtractScalar()) { + if (HVecOperation::ReturnsSIMDValue(input)) { + DCHECK(!input->IsPhi()); HVecOperation* input_vector = input->AsVecOperation(); uint32_t vector_length = input_vector->GetVectorLength(); DataType::Type type = input_vector->GetPackedType(); diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 58030df9d4..7fbd7f48a2 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -4614,7 +4614,6 @@ class HInvokeInterface FINAL : public HInvoke { } uint32_t GetImtIndex() const { return imt_index_; } - uint32_t GetDexMethodIndex() const { return dex_method_index_; } DECLARE_INSTRUCTION(InvokeInterface); diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h index 59d5b9f847..096349fd73 100644 --- a/compiler/optimizing/nodes_vector.h +++ b/compiler/optimizing/nodes_vector.h @@ -150,6 +150,19 @@ class HVecOperation : public HVariableInputSizeInstruction { } } + // Helper method to determine if an instruction returns a SIMD value. + // TODO: This method is needed until we introduce SIMD as proper type. + static bool ReturnsSIMDValue(HInstruction* instruction) { + if (instruction->IsVecOperation()) { + return !instruction->IsVecExtractScalar(); // only scalar returning vec op + } else if (instruction->IsPhi()) { + return + instruction->GetType() == kSIMDType && + instruction->InputAt(1)->IsVecOperation(); // vectorizer does not go deeper + } + return false; + } + DECLARE_ABSTRACT_INSTRUCTION(VecOperation); protected: @@ -879,7 +892,7 @@ class HVecSetScalars FINAL : public HVecOperation { vector_length, dex_pc) { for (size_t i = 0; i < number_of_scalars; i++) { - DCHECK(!scalars[i]->IsVecOperation() || scalars[i]->IsVecExtractScalar()); + DCHECK(!ReturnsSIMDValue(scalars[i])); SetRawInputAt(0, scalars[i]); } } diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc index 9ab7a89b33..f6bd05269e 100644 --- a/compiler/optimizing/ssa_liveness_analysis.cc +++ b/compiler/optimizing/ssa_liveness_analysis.cc @@ -474,9 +474,10 @@ size_t LiveInterval::NumberOfSpillSlotsNeeded() const { // For a SIMD operation, compute the number of needed spill slots. // TODO: do through vector type? HInstruction* definition = GetParent()->GetDefinedBy(); - if (definition != nullptr && - definition->IsVecOperation() && - !definition->IsVecExtractScalar()) { + if (definition != nullptr && HVecOperation::ReturnsSIMDValue(definition)) { + if (definition->IsPhi()) { + definition = definition->InputAt(1); // SIMD always appears on back-edge + } return definition->AsVecOperation()->GetVectorNumberOfBytes() / kVRegSize; } // Return number of needed spill slots based on type. |