diff options
20 files changed, 48 insertions, 734 deletions
diff --git a/compiler/Android.bp b/compiler/Android.bp index e1d382f6f4..eff4955d44 100644 --- a/compiler/Android.bp +++ b/compiler/Android.bp @@ -161,7 +161,6 @@ art_cc_defaults { "utils/x86/assembler_x86.cc", "utils/x86/jni_macro_assembler_x86.cc", "utils/x86/managed_register_x86.cc", - "optimizing/instruction_simplifier_x86.cc", ], }, x86_64: { diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc index 58808769e2..086ae07a06 100644 --- a/compiler/optimizing/code_generator_vector_x86.cc +++ b/compiler/optimizing/code_generator_vector_x86.cc @@ -1125,59 +1125,13 @@ static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* in } } -void LocationsBuilderX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr); - switch (instr->GetPackedType()) { - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - locations->SetInAt( - HVecMultiplyAccumulate::kInputAccumulatorIndex, Location::RequiresFpuRegister()); - locations->SetInAt( - HVecMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresFpuRegister()); - locations->SetInAt( - HVecMultiplyAccumulate::kInputMulRightIndex, Location::RequiresFpuRegister()); - DCHECK_EQ(HVecMultiplyAccumulate::kInputAccumulatorIndex, 0); - locations->SetOut(Location::SameAsFirstInput()); - break; - default: - // VecMultiplyAccumulate is supported only for single and - // double precision floating points. Hence integral types - // are still not converted. - LOG(FATAL) << "Unsupported SIMD Type"; - } +void LocationsBuilderX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) { + CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction); } -void InstructionCodeGeneratorX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { - LocationSummary* locations = instr->GetLocations(); - DCHECK(locations->InAt(0).Equals(locations->Out())); - XmmRegister accumulator = locations->InAt( - HVecMultiplyAccumulate::kInputAccumulatorIndex).AsFpuRegister<XmmRegister>(); - XmmRegister mul_left = locations->InAt( - HVecMultiplyAccumulate::kInputMulLeftIndex).AsFpuRegister<XmmRegister>(); - XmmRegister mul_right = locations->InAt( - HVecMultiplyAccumulate::kInputMulRightIndex).AsFpuRegister<XmmRegister>(); - switch (instr->GetPackedType()) { - case DataType::Type::kFloat32: - DCHECK_EQ(4u, instr->GetVectorLength()); - if (instr->GetOpKind() == HInstruction::InstructionKind::kAdd) - __ vfmadd231ps(accumulator, mul_left, mul_right); - else - __ vfmsub231ps(accumulator, mul_left, mul_right); - break; - case DataType::Type::kFloat64: - DCHECK_EQ(2u, instr->GetVectorLength()); - if (instr->GetOpKind() == HInstruction::InstructionKind::kAdd) - __ vfmadd231pd(accumulator, mul_left, mul_right); - else - __ vfmsub231pd(accumulator, mul_left, mul_right); - break; - default: - - // VecMultiplyAccumulate is supported only for single and - // double precision floating points. Hence integral types - // are still not converted. - LOG(FATAL) << "Unsupported SIMD Type"; - } +void InstructionCodeGeneratorX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) { + // TODO: pmaddwd? + LOG(FATAL) << "No SIMD for " << instruction->GetId(); } void LocationsBuilderX86::VisitVecSADAccumulate(HVecSADAccumulate* instruction) { diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc index 4795e86933..4d31ab68d1 100644 --- a/compiler/optimizing/code_generator_vector_x86_64.cc +++ b/compiler/optimizing/code_generator_vector_x86_64.cc @@ -1098,61 +1098,13 @@ static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* in } } -void LocationsBuilderX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { - LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr); - switch (instr->GetPackedType()) { - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - locations->SetInAt( - HVecMultiplyAccumulate::kInputAccumulatorIndex, Location::RequiresFpuRegister()); - locations->SetInAt( - HVecMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresFpuRegister()); - locations->SetInAt( - HVecMultiplyAccumulate::kInputMulRightIndex, Location::RequiresFpuRegister()); - DCHECK_EQ(HVecMultiplyAccumulate::kInputAccumulatorIndex, 0); - locations->SetOut(Location::SameAsFirstInput()); - break; - default: - // VecMultiplyAccumulate is supported only for single and - // double precision floating points. Hence integral types - // are still not converted. - LOG(FATAL) << "Unsupported SIMD type"; - } +void LocationsBuilderX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) { + CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction); } - -void InstructionCodeGeneratorX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { - LocationSummary* locations = instr->GetLocations(); - DCHECK(locations->InAt(0).Equals(locations->Out())); - XmmRegister accumulator = locations->InAt( - HVecMultiplyAccumulate::kInputAccumulatorIndex).AsFpuRegister<XmmRegister>(); - XmmRegister mul_left = locations->InAt( - HVecMultiplyAccumulate::kInputMulLeftIndex).AsFpuRegister<XmmRegister>(); - XmmRegister mul_right = locations->InAt( - HVecMultiplyAccumulate::kInputMulRightIndex).AsFpuRegister<XmmRegister>(); - - switch (instr->GetPackedType()) { - case DataType::Type::kFloat32: - DCHECK_EQ(4u, instr->GetVectorLength()); - if (instr->GetOpKind() == HInstruction::InstructionKind::kAdd) - __ vfmadd231ps(accumulator, mul_left, mul_right); - else - __ vfmsub231ps(accumulator, mul_left, mul_right); - break; - case DataType::Type::kFloat64: - DCHECK_EQ(2u, instr->GetVectorLength()); - if (instr->GetOpKind() == HInstruction::InstructionKind::kAdd) - __ vfmadd231pd(accumulator, mul_left, mul_right); - else - __ vfmsub231pd(accumulator, mul_left, mul_right); - break; - default: - - // VecMultiplyAccumulate is supported only for single and - // double precision floating points. Hence integral types - // are still not converted. - LOG(FATAL) << "Unsupported SIMD Type"; - } +void InstructionCodeGeneratorX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) { + // TODO: pmaddwd? + LOG(FATAL) << "No SIMD for " << instruction->GetId(); } void LocationsBuilderX86_64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) { diff --git a/compiler/optimizing/instruction_simplifier_x86.cc b/compiler/optimizing/instruction_simplifier_x86.cc deleted file mode 100644 index b3f67d6e84..0000000000 --- a/compiler/optimizing/instruction_simplifier_x86.cc +++ /dev/null @@ -1,149 +0,0 @@ -/* - * Copyright (C) 2018 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "instruction_simplifier_x86.h" -#include "arch/x86/instruction_set_features_x86.h" -#include "mirror/array-inl.h" -#include "code_generator.h" - - -namespace art { - -namespace x86 { - -class InstructionSimplifierX86Visitor : public HGraphVisitor { - public: - InstructionSimplifierX86Visitor(HGraph* graph, - CodeGeneratorX86 *codegen, - OptimizingCompilerStats* stats) - : HGraphVisitor(graph), codegen_(codegen), stats_(stats) {} - - private: - void RecordSimplification() { - MaybeRecordStat(stats_, MethodCompilationStat::kInstructionSimplificationsArch); - } - - bool HasCpuFeatureFlag() { - return (codegen_->GetInstructionSetFeatures().HasAVX2()); - } - - /** - * This simplifier uses a special-purpose BB visitor. - * (1) No need to visit Phi nodes. - * (2) Since statements can be removed in a "forward" fashion, - * the visitor should test if each statement is still there. - */ - void VisitBasicBlock(HBasicBlock* block) OVERRIDE { - // TODO: fragile iteration, provide more robust iterators? - for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { - HInstruction* instruction = it.Current(); - if (instruction->IsInBlock()) { - instruction->Accept(this); - } - } - } - - bool TryGenerateVecMultiplyAccumulate(HVecMul* mul); - void VisitVecMul(HVecMul* instruction) OVERRIDE; - - CodeGeneratorX86* codegen_; - OptimizingCompilerStats* stats_; -}; - -/* generic expressions for FMA -a = (b * c) + a -a = (b * c) – a -*/ -bool InstructionSimplifierX86Visitor::TryGenerateVecMultiplyAccumulate(HVecMul* mul) { - if (!(mul->GetPackedType() == DataType::Type::kFloat32 || - mul->GetPackedType() == DataType::Type::kFloat64)) { - return false; - } - ArenaAllocator* allocator = mul->GetBlock()->GetGraph()->GetAllocator(); - if (mul->HasOnlyOneNonEnvironmentUse()) { - HInstruction* use = mul->GetUses().front().GetUser(); - if (use->IsVecAdd() || use->IsVecSub()) { - // Replace code looking like - // VECMUL tmp, x, y - // VECADD dst, acc, tmp or VECADD dst, tmp, acc - // or - // VECSUB dst, tmp, acc - // with - // VECMULACC dst, acc, x, y - - // Note that we do not want to (unconditionally) perform the merge when the - // multiplication has multiple uses and it can be merged in all of them. - // Multiple uses could happen on the same control-flow path, and we would - // then increase the amount of work. In the future we could try to evaluate - // whether all uses are on different control-flow paths (using dominance and - // reverse-dominance information) and only perform the merge when they are. - HInstruction* accumulator = nullptr; - HVecBinaryOperation* binop = use->AsVecBinaryOperation(); - HInstruction* binop_left = binop->GetLeft(); - HInstruction* binop_right = binop->GetRight(); - DCHECK_NE(binop_left, binop_right); - if (use->IsVecSub()) { - if (binop_left == mul) { - accumulator = binop_right; - } - } else { - // VecAdd - if (binop_right == mul) { - accumulator = binop_left; - } else { - DCHECK_EQ(binop_left, mul); - accumulator = binop_right; - } - } - HInstruction::InstructionKind kind = - use->IsVecAdd() ? HInstruction::kAdd : HInstruction::kSub; - - if (accumulator != nullptr) { - HVecMultiplyAccumulate* mulacc = - new (allocator) HVecMultiplyAccumulate(allocator, - kind, - accumulator, - mul->GetLeft(), - mul->GetRight(), - binop->GetPackedType(), - binop->GetVectorLength(), - binop->GetDexPc()); - binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc); - DCHECK(!mul->HasUses()); - mul->GetBlock()->RemoveInstruction(mul); - return true; - } - } - } - return false; -} - -void InstructionSimplifierX86Visitor::VisitVecMul(HVecMul* instruction) { - if (HasCpuFeatureFlag()) { - if (TryGenerateVecMultiplyAccumulate(instruction)) { - RecordSimplification(); - } - } -} - -bool InstructionSimplifierX86::Run() { - InstructionSimplifierX86Visitor visitor(graph_, codegen_, stats_); - visitor.VisitReversePostOrder(); - return true; -} - -} // namespace x86 -} // namespace art diff --git a/compiler/optimizing/instruction_simplifier_x86.h b/compiler/optimizing/instruction_simplifier_x86.h deleted file mode 100644 index 1fb199f728..0000000000 --- a/compiler/optimizing/instruction_simplifier_x86.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (C) 2018 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_H_ -#define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_H_ - -#include "nodes.h" -#include "optimization.h" -#include "code_generator_x86.h" - -namespace art { -namespace x86 { - -class InstructionSimplifierX86 : public HOptimization { - public: - InstructionSimplifierX86(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats) - : HOptimization(graph, kInstructionSimplifierX86PassName, stats), - codegen_(down_cast<CodeGeneratorX86*>(codegen)) {} - - static constexpr const char* kInstructionSimplifierX86PassName = "instruction_simplifier_x86"; - - bool Run() OVERRIDE; - - private: - CodeGeneratorX86* codegen_; -}; - -} // namespace x86 -} // namespace art - -#endif // ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_H_ diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h index b4f9993ad6..95fb5ab76a 100644 --- a/compiler/optimizing/nodes_vector.h +++ b/compiler/optimizing/nodes_vector.h @@ -931,6 +931,9 @@ class HVecSetScalars FINAL : public HVecOperation { // Multiplies every component in the two vectors, adds the result vector to the accumulator vector, // viz. [ a1, .. , an ] + [ x1, .. , xn ] * [ y1, .. , yn ] = [ a1 + x1 * y1, .. , an + xn * yn ]. +// For floating point types, Java rounding behavior must be preserved; the products are rounded to +// the proper precision before being added. "Fused" multiply-add operations available on several +// architectures are not usable since they would violate Java language rules. class HVecMultiplyAccumulate FINAL : public HVecOperation { public: HVecMultiplyAccumulate(ArenaAllocator* allocator, @@ -953,15 +956,14 @@ class HVecMultiplyAccumulate FINAL : public HVecOperation { DCHECK(HasConsistentPackedTypes(accumulator, packed_type)); DCHECK(HasConsistentPackedTypes(mul_left, packed_type)); DCHECK(HasConsistentPackedTypes(mul_right, packed_type)); + // Remove the following if we add an architecture that supports floating point multiply-add + // with Java-compatible rounding. + DCHECK(DataType::IsIntegralType(packed_type)); SetRawInputAt(0, accumulator); SetRawInputAt(1, mul_left); SetRawInputAt(2, mul_right); } - static constexpr int kInputAccumulatorIndex = 0; - static constexpr int kInputMulLeftIndex = 1; - static constexpr int kInputMulRightIndex = 2; - bool CanBeMoved() const OVERRIDE { return true; } bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { diff --git a/compiler/optimizing/optimization.cc b/compiler/optimizing/optimization.cc index 3c803ab627..142ddb5fbb 100644 --- a/compiler/optimizing/optimization.cc +++ b/compiler/optimizing/optimization.cc @@ -28,7 +28,6 @@ #endif #ifdef ART_ENABLE_CODEGEN_x86 #include "pc_relative_fixups_x86.h" -#include "instruction_simplifier_x86.h" #endif #if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64) #include "x86_memory_gen.h" @@ -122,8 +121,6 @@ const char* OptimizationPassName(OptimizationPass pass) { #if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64) case OptimizationPass::kX86MemoryOperandGeneration: return x86::X86MemoryOperandGeneration::kX86MemoryOperandGenerationPassName; - case OptimizationPass::kInstructionSimplifierX86: - return x86::InstructionSimplifierX86::kInstructionSimplifierX86PassName; #endif case OptimizationPass::kNone: LOG(FATAL) << "kNone does not represent an actual pass"; @@ -166,7 +163,6 @@ OptimizationPass OptimizationPassByName(const std::string& pass_name) { #ifdef ART_ENABLE_CODEGEN_x86 X(OptimizationPass::kPcRelativeFixupsX86); X(OptimizationPass::kX86MemoryOperandGeneration); - X(OptimizationPass::kInstructionSimplifierX86); #endif LOG(FATAL) << "Cannot find optimization " << pass_name; UNREACHABLE(); @@ -327,10 +323,6 @@ ArenaVector<HOptimization*> ConstructOptimizations( DCHECK(alt_name == nullptr) << "arch-specific pass does not support alternative name"; opt = new (allocator) x86::X86MemoryOperandGeneration(graph, codegen, stats); break; - case OptimizationPass::kInstructionSimplifierX86: - DCHECK(alt_name == nullptr) << "arch-specific pass does not support alternative name"; - opt = new (allocator) x86::InstructionSimplifierX86(graph, codegen, stats); - break; #endif case OptimizationPass::kNone: LOG(FATAL) << "kNone does not represent an actual pass"; diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h index a9fafa0864..88b283cebf 100644 --- a/compiler/optimizing/optimization.h +++ b/compiler/optimizing/optimization.h @@ -101,7 +101,6 @@ enum class OptimizationPass { #endif #if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64) kX86MemoryOperandGeneration, - kInstructionSimplifierX86, #endif kNone, kLast = kNone diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index f4bafcbef0..2f530a911a 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -531,8 +531,7 @@ bool OptimizingCompiler::RunArchOptimizations(HGraph* graph, OptDef(OptimizationPass::kSideEffectsAnalysis), OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"), OptDef(OptimizationPass::kPcRelativeFixupsX86), - OptDef(OptimizationPass::kX86MemoryOperandGeneration), - OptDef(OptimizationPass::kInstructionSimplifierX86) + OptDef(OptimizationPass::kX86MemoryOperandGeneration) }; return RunOptimizations(graph, codegen, @@ -547,8 +546,7 @@ bool OptimizingCompiler::RunArchOptimizations(HGraph* graph, OptimizationDef x86_64_optimizations[] = { OptDef(OptimizationPass::kSideEffectsAnalysis), OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"), - OptDef(OptimizationPass::kX86MemoryOperandGeneration), - OptDef(OptimizationPass::kInstructionSimplifierX86) + OptDef(OptimizationPass::kX86MemoryOperandGeneration) }; return RunOptimizations(graph, codegen, diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index c2ce03b1f2..86f9010ea3 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -525,58 +525,6 @@ void X86Assembler::divss(XmmRegister dst, const Address& src) { EmitOperand(dst, src); } -void X86Assembler::vfmadd231ps(XmmRegister acc, XmmRegister mul_left, XmmRegister mul_right) { - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - uint8_t byte_zero = EmitVexByteZero(false /*is_two_byte*/); - uint8_t byte_one = EmitVexByte1(false, false, false, 2); - uint8_t byte_two = EmitVexByte2(false, 128, X86ManagedRegister::FromXmmRegister(mul_left), 1); - EmitUint8(byte_zero); - EmitUint8(byte_one); - EmitUint8(byte_two); - // Opcode field. - EmitUint8(0xB8); - EmitXmmRegisterOperand(acc, mul_right); -} - -void X86Assembler::vfmsub231ps(XmmRegister acc, XmmRegister mul_left, XmmRegister mul_right) { - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - uint8_t byte_zero = EmitVexByteZero(false /*is_two_byte*/); - uint8_t byte_one = EmitVexByte1(false, false, false, 2); - uint8_t byte_two = EmitVexByte2(false, 128, X86ManagedRegister::FromXmmRegister(mul_left), 1); - EmitUint8(byte_zero); - EmitUint8(byte_one); - EmitUint8(byte_two); - // Opcode field. - EmitUint8(0xBA); - EmitXmmRegisterOperand(acc, mul_right); -} - -void X86Assembler::vfmadd231pd(XmmRegister acc, XmmRegister mul_left, XmmRegister mul_right) { - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - uint8_t byte_zero = EmitVexByteZero(false /*is_two_byte*/); - uint8_t byte_one = EmitVexByte1(false, false, false, 2); - uint8_t byte_two = EmitVexByte2(true, 128, X86ManagedRegister::FromXmmRegister(mul_left), 1); - EmitUint8(byte_zero); - EmitUint8(byte_one); - EmitUint8(byte_two); - // Opcode field. - EmitUint8(0xB8); - EmitXmmRegisterOperand(acc, mul_right); -} - -void X86Assembler::vfmsub231pd(XmmRegister acc, XmmRegister mul_left, XmmRegister mul_right) { - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - uint8_t byte_zero = EmitVexByteZero(false /*is_two_byte*/); - uint8_t byte_one = EmitVexByte1(false, false, false, 2); - uint8_t byte_two = EmitVexByte2(true, 128, X86ManagedRegister::FromXmmRegister(mul_left), 1); - EmitUint8(byte_zero); - EmitUint8(byte_one); - EmitUint8(byte_two); - // Opcode field. - EmitUint8(0xBA); - EmitXmmRegisterOperand(acc, mul_right); -} - void X86Assembler::addps(XmmRegister dst, XmmRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); @@ -2950,99 +2898,6 @@ void X86Assembler::EmitLabelLink(NearLabel* label) { } -uint8_t X86Assembler::EmitVexByteZero(bool is_two_byte) { - uint8_t vex_zero = 0xC0; - if (!is_two_byte) { - vex_zero |= 0xC4; - } else { - vex_zero |= 0xC5; - } - return vex_zero; -} - -uint8_t X86Assembler::EmitVexByte1(bool r, bool x, bool b, int mmmmm ) { - // VEX Byte 1. - uint8_t vex_prefix = 0; - if (!r) { - vex_prefix |= 0x80; // VEX.R . - } - if (!x) { - vex_prefix |= 0x40; // VEX.X . - } - if (!b) { - vex_prefix |= 0x20; // VEX.B . - } - - // VEX.mmmmm. - switch (mmmmm) { - case 1: - // Implied 0F leading opcode byte. - vex_prefix |= 0x01; - break; - case 2: - // Implied leading 0F 38 opcode byte. - vex_prefix |= 0x02; - break; - case 3: - // Implied leading OF 3A opcode byte. - vex_prefix |= 0x03; - break; - default: - LOG(FATAL) << "unknown opcode bytes"; - } - return vex_prefix; -} - -uint8_t X86Assembler::EmitVexByte2(bool w, int l, X86ManagedRegister operand, int pp) { - uint8_t vex_prefix = 0; - // VEX Byte 2. - if (w) { - vex_prefix |= 0x80; - } - - // VEX.vvvv. - if (operand.IsXmmRegister()) { - XmmRegister vvvv = operand.AsXmmRegister(); - int inverted_reg = 15-static_cast<int>(vvvv); - uint8_t reg = static_cast<uint8_t>(inverted_reg); - vex_prefix |= ((reg & 0x0F) << 3); - } else if (operand.IsCpuRegister()) { - Register vvvv = operand.AsCpuRegister(); - int inverted_reg = 15 - static_cast<int>(vvvv); - uint8_t reg = static_cast<uint8_t>(inverted_reg); - vex_prefix |= ((reg & 0x0F) << 3); - } - - // VEX.L. - if (l == 256) { - vex_prefix |= 0x04; - } - - // VEX.pp. - switch (pp) { - case 0: - // SIMD Pefix - None. - vex_prefix |= 0x00; - break; - case 1: - // SIMD Prefix - 66. - vex_prefix |= 0x01; - break; - case 2: - // SIMD Prefix - F3. - vex_prefix |= 0x02; - break; - case 3: - // SIMD Prefix - F2. - vex_prefix |= 0x03; - break; - default: - LOG(FATAL) << "unknown SIMD Prefix"; - } - - return vex_prefix; -} - void X86Assembler::EmitGenericShift(int reg_or_opcode, const Operand& operand, const Immediate& imm) { diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h index 8c9ce82687..e42c4c986a 100644 --- a/compiler/utils/x86/assembler_x86.h +++ b/compiler/utils/x86/assembler_x86.h @@ -397,12 +397,6 @@ class X86Assembler FINAL : public Assembler { void divss(XmmRegister dst, XmmRegister src); void divss(XmmRegister dst, const Address& src); - // FMA Mac Instructions - void vfmadd231ps(XmmRegister dst, XmmRegister src1, XmmRegister src2); - void vfmadd231pd(XmmRegister dst, XmmRegister src1, XmmRegister src2); - void vfmsub231ps(XmmRegister dst, XmmRegister src1, XmmRegister src2); - void vfmsub231pd(XmmRegister dst, XmmRegister src1, XmmRegister src2); - void addps(XmmRegister dst, XmmRegister src); // no addr variant (for now) void subps(XmmRegister dst, XmmRegister src); void mulps(XmmRegister dst, XmmRegister src); @@ -840,11 +834,6 @@ class X86Assembler FINAL : public Assembler { void EmitLabelLink(Label* label); void EmitLabelLink(NearLabel* label); - // Emit a 3 byte VEX Prefix - uint8_t EmitVexByteZero(bool is_two_byte); - uint8_t EmitVexByte1(bool r, bool x, bool b, int mmmmm); - uint8_t EmitVexByte2(bool w , int l , X86ManagedRegister vvv, int pp); - void EmitGenericShift(int rm, const Operand& operand, const Immediate& imm); void EmitGenericShift(int rm, const Operand& operand, Register shifter); diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index 9983eaeeea..bd31561937 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -603,56 +603,6 @@ void X86_64Assembler::divss(XmmRegister dst, const Address& src) { } -void X86_64Assembler::vfmadd231ps(XmmRegister acc, XmmRegister mul_left, XmmRegister mul_right) { - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - uint8_t byte_zero = EmitVexByteZero(false /*is_two_byte*/); - uint8_t byte_one = EmitVexByte1(acc.NeedsRex(), false, mul_right.NeedsRex(), 2); - uint8_t byte_two = EmitVexByte2(false, 128, X86_64ManagedRegister::FromXmmRegister(mul_left.AsFloatRegister()), 1); - EmitUint8(byte_zero); - EmitUint8(byte_one); - EmitUint8(byte_two); - // Opcode field. - EmitUint8(0xB8); - EmitXmmRegisterOperand(acc.LowBits(), mul_right); -} - - -void X86_64Assembler::vfmsub231ps(XmmRegister acc, XmmRegister mul_left, XmmRegister mul_right) { - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - uint8_t byte_zero = EmitVexByteZero(false /*is_two_byte*/); - uint8_t byte_one = EmitVexByte1(acc.NeedsRex(), false, mul_right.NeedsRex(), 2); - uint8_t byte_two = EmitVexByte2(false, 128, X86_64ManagedRegister::FromXmmRegister(mul_left.AsFloatRegister()), 1); - EmitUint8(byte_zero); - EmitUint8(byte_one); - EmitUint8(byte_two); - // Opcode field - EmitUint8(0xBA); - EmitXmmRegisterOperand(acc.LowBits(), mul_right); -} - -void X86_64Assembler::vfmadd231pd(XmmRegister acc, XmmRegister mul_left, XmmRegister mul_right) { - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - uint8_t byte_zero = EmitVexByteZero(false /*is_two_byte*/); - uint8_t byte_one = EmitVexByte1(acc.NeedsRex(), false, mul_right.NeedsRex(), 2); - uint8_t byte_two = EmitVexByte2(true, 128, X86_64ManagedRegister::FromXmmRegister(mul_left.AsFloatRegister()), 1); - EmitUint8(byte_zero); - EmitUint8(byte_one); - EmitUint8(byte_two); - EmitUint8(0xB8); - EmitXmmRegisterOperand(acc.LowBits(), mul_right); -} - -void X86_64Assembler::vfmsub231pd(XmmRegister acc, XmmRegister mul_left, XmmRegister mul_right) { - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - uint8_t byte_zero = EmitVexByteZero(false /*is_two_byte*/); - uint8_t byte_one = EmitVexByte1(acc.NeedsRex(), false, mul_right.NeedsRex(), 2); - uint8_t byte_two = EmitVexByte2(true, 128, X86_64ManagedRegister::FromXmmRegister(mul_left.AsFloatRegister()), 1); - EmitUint8(byte_zero); - EmitUint8(byte_one); - EmitUint8(byte_two); - EmitUint8(0xBA); - EmitXmmRegisterOperand(acc.LowBits(), mul_right); -} void X86_64Assembler::addps(XmmRegister dst, XmmRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitOptionalRex32(dst, src); @@ -3594,98 +3544,6 @@ void X86_64Assembler::EmitLabelLink(NearLabel* label) { label->LinkTo(position); } -uint8_t X86_64Assembler::EmitVexByteZero(bool is_two_byte) { - uint8_t vex_zero = 0xC0; - if (!is_two_byte) { - vex_zero |= 0xC4; - } else { - vex_zero |= 0xC5; - } - return vex_zero; -} - -uint8_t X86_64Assembler::EmitVexByte1(bool r, bool x, bool b, int mmmmm) { - // VEX Byte 1. - uint8_t vex_prefix = 0; - if (!r) { - vex_prefix |= 0x80; // VEX.R . - } - if (!x) { - vex_prefix |= 0x40; // VEX.X . - } - if (!b) { - vex_prefix |= 0x20; // VEX.B . - } - - // VEX.mmmmm. - switch (mmmmm) { - case 1: - // Implied 0F leading opcode byte. - vex_prefix |= 0x01; - break; - case 2: - // Implied leading 0F 38 opcode byte. - vex_prefix |= 0x02; - break; - case 3: - // Implied leading OF 3A opcode byte. - vex_prefix |= 0x03; - break; - default: - LOG(FATAL) << "unknown opcode bytes"; - } - - return vex_prefix; -} - -uint8_t X86_64Assembler::EmitVexByte2(bool w, int l, X86_64ManagedRegister operand, int pp) { - // VEX Byte 2. - uint8_t vex_prefix = 0; - if (w) { - vex_prefix |= 0x80; - } - // VEX.vvvv. - if (operand.IsXmmRegister()) { - XmmRegister vvvv = operand.AsXmmRegister(); - int inverted_reg = 15-static_cast<int>(vvvv.AsFloatRegister()); - uint8_t reg = static_cast<uint8_t>(inverted_reg); - vex_prefix |= ((reg & 0x0F) << 3); - } else if (operand.IsCpuRegister()) { - CpuRegister vvvv = operand.AsCpuRegister(); - int inverted_reg = 15 - static_cast<int>(vvvv.AsRegister()); - uint8_t reg = static_cast<uint8_t>(inverted_reg); - vex_prefix |= ((reg & 0x0F) << 3); - } - - // VEX.L. - if (l == 256) { - vex_prefix |= 0x04; - } - - // VEX.pp. - switch (pp) { - case 0: - // SIMD Pefix - None. - vex_prefix |= 0x00; - break; - case 1: - // SIMD Prefix - 66. - vex_prefix |= 0x01; - break; - case 2: - // SIMD Prefix - F3. - vex_prefix |= 0x02; - break; - case 3: - // SIMD Prefix - F2. - vex_prefix |= 0x03; - break; - default: - LOG(FATAL) << "unknown SIMD Prefix"; - } - - return vex_prefix; -} void X86_64Assembler::EmitGenericShift(bool wide, int reg_or_opcode, diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index d5779aa786..e4d72a7ba2 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -436,16 +436,6 @@ class X86_64Assembler FINAL : public Assembler { void divss(XmmRegister dst, XmmRegister src); void divss(XmmRegister dst, const Address& src); - // Mac Instructions - // For reference look at the Instruction reference volume 2C. - // The below URL is broken down in two lines. - // https://www.intel.com/content/www/us/en/architecture-and-technology/ - // 64-ia-32-architectures-software-developer-vol-2c-manual.html - void vfmadd231ps(XmmRegister acc, XmmRegister left, XmmRegister right); - void vfmadd231pd(XmmRegister acc, XmmRegister left, XmmRegister right); - void vfmsub231ps(XmmRegister acc, XmmRegister left, XmmRegister right); - void vfmsub231pd(XmmRegister acc, XmmRegister left, XmmRegister right); - void addps(XmmRegister dst, XmmRegister src); // no addr variant (for now) void subps(XmmRegister dst, XmmRegister src); void mulps(XmmRegister dst, XmmRegister src); @@ -931,11 +921,6 @@ class X86_64Assembler FINAL : public Assembler { void EmitLabelLink(Label* label); void EmitLabelLink(NearLabel* label); - // Emit a 3 byte VEX Prefix. - uint8_t EmitVexByteZero(bool is_two_byte); - uint8_t EmitVexByte1(bool r, bool x, bool b, int mmmmm); - uint8_t EmitVexByte2(bool w , int l , X86_64ManagedRegister operand, int pp); - void EmitGenericShift(bool wide, int rm, CpuRegister reg, const Immediate& imm); void EmitGenericShift(bool wide, int rm, CpuRegister operand, CpuRegister shifter); diff --git a/runtime/Android.bp b/runtime/Android.bp index 6ec626591a..8411982b30 100644 --- a/runtime/Android.bp +++ b/runtime/Android.bp @@ -31,7 +31,6 @@ libart_cc_defaults { "aot_class_linker.cc", "art_field.cc", "art_method.cc", - "backtrace_helper.cc", "barrier.cc", "base/mem_map_arena_pool.cc", "base/mutex.cc", diff --git a/runtime/arch/x86/instruction_set_features_x86.cc b/runtime/arch/x86/instruction_set_features_x86.cc index 745e925611..98462512da 100644 --- a/runtime/arch/x86/instruction_set_features_x86.cc +++ b/runtime/arch/x86/instruction_set_features_x86.cc @@ -35,7 +35,6 @@ static constexpr const char* x86_known_variants[] = { "atom", "sandybridge", "silvermont", - "kabylake" }; static constexpr const char* x86_variants_with_ssse3[] = { @@ -47,27 +46,16 @@ static constexpr const char* x86_variants_with_ssse3[] = { static constexpr const char* x86_variants_with_sse4_1[] = { "sandybridge", "silvermont", - "kabylake" }; static constexpr const char* x86_variants_with_sse4_2[] = { "sandybridge", "silvermont", - "kabylake" }; static constexpr const char* x86_variants_with_popcnt[] = { "sandybridge", "silvermont", - "kabylake" -}; - -static constexpr const char* x86_variants_with_avx[] = { - "kabylake", -}; - -static constexpr const char* x86_variants_with_avx2[] = { - "kabylake", }; X86FeaturesUniquePtr X86InstructionSetFeatures::Create(bool x86_64, @@ -105,12 +93,9 @@ X86FeaturesUniquePtr X86InstructionSetFeatures::FromVariant( bool has_SSE4_2 = FindVariantInArray(x86_variants_with_sse4_2, arraysize(x86_variants_with_sse4_2), variant); - bool has_AVX = FindVariantInArray(x86_variants_with_avx, - arraysize(x86_variants_with_avx), - variant); - bool has_AVX2 = FindVariantInArray(x86_variants_with_avx2, - arraysize(x86_variants_with_avx2), - variant); + bool has_AVX = false; + bool has_AVX2 = false; + bool has_POPCNT = FindVariantInArray(x86_variants_with_popcnt, arraysize(x86_variants_with_popcnt), variant); diff --git a/runtime/arch/x86/instruction_set_features_x86.h b/runtime/arch/x86/instruction_set_features_x86.h index f5974cc2e1..57cf4b2741 100644 --- a/runtime/arch/x86/instruction_set_features_x86.h +++ b/runtime/arch/x86/instruction_set_features_x86.h @@ -67,8 +67,6 @@ class X86InstructionSetFeatures : public InstructionSetFeatures { bool HasPopCnt() const { return has_POPCNT_; } - bool HasAVX2() const { return has_AVX2_; } - protected: // Parse a string of the form "ssse3" adding these to a new InstructionSetFeatures. virtual std::unique_ptr<const InstructionSetFeatures> diff --git a/runtime/backtrace_helper.cc b/runtime/backtrace_helper.cc deleted file mode 100644 index c2c0ceeaee..0000000000 --- a/runtime/backtrace_helper.cc +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (C) 2018 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "backtrace_helper.h" - -#if defined(__linux__) - -#include <backtrace/Backtrace.h> -#include <backtrace/BacktraceMap.h> - -#include <unistd.h> -#include <sys/types.h> - -#else - -// For UNUSED -#include "base/macros.h" - -#endif - -namespace art { - -// We only really support libbacktrace on linux which is unfortunate but since this is only for -// gcstress this isn't a huge deal. -#if defined(__linux__) - -void BacktraceCollector::Collect() { - std::unique_ptr<BacktraceMap> map(BacktraceMap::Create(getpid())); - // We don't care about the function names. Turning this off makes everything significantly faster. - map->SetResolveNames(false); - std::unique_ptr<Backtrace> backtrace(Backtrace::Create(BACKTRACE_CURRENT_PROCESS, - BACKTRACE_CURRENT_THREAD, - map.get())); - backtrace->SetSkipFrames(true); - if (!backtrace->Unwind(skip_count_, nullptr)) { - return; - } - for (Backtrace::const_iterator it = backtrace->begin(); - max_depth_ > num_frames_ && it != backtrace->end(); - ++it) { - out_frames_[num_frames_++] = static_cast<uintptr_t>(it->pc); - } -} - -#else - -#pragma clang diagnostic push -#pragma clang diagnostic warning "-W#warnings" -#warning "Backtrace collector is not implemented. GCStress cannot be used." -#pragma clang diagnostic pop - -// We only have an implementation for linux. On other plaforms just return nothing. This is not -// really correct but we only use this for hashing and gcstress so it's not too big a deal. -void BacktraceCollector::Collect() { - UNUSED(skip_count_); - UNUSED(out_frames_); - UNUSED(max_depth_); - num_frames_ = 0; -} - -#endif - -} // namespace art diff --git a/runtime/backtrace_helper.h b/runtime/backtrace_helper.h index 8eda3fa0a1..ace118c50b 100644 --- a/runtime/backtrace_helper.h +++ b/runtime/backtrace_helper.h @@ -17,12 +17,11 @@ #ifndef ART_RUNTIME_BACKTRACE_HELPER_H_ #define ART_RUNTIME_BACKTRACE_HELPER_H_ -#include <stddef.h> -#include <stdint.h> +#include <unwind.h> namespace art { -// Using libbacktrace +// Based on debug malloc logic from libc/bionic/debug_stacktrace.cpp. class BacktraceCollector { public: BacktraceCollector(uintptr_t* out_frames, size_t max_depth, size_t skip_count) @@ -33,9 +32,25 @@ class BacktraceCollector { } // Collect the backtrace, do not call more than once. - void Collect(); + void Collect() { + _Unwind_Backtrace(&Callback, this); + } private: + static _Unwind_Reason_Code Callback(_Unwind_Context* context, void* arg) { + auto* const state = reinterpret_cast<BacktraceCollector*>(arg); + const uintptr_t ip = _Unwind_GetIP(context); + // The first stack frame is get_backtrace itself. Skip it. + if (ip != 0 && state->skip_count_ > 0) { + --state->skip_count_; + return _URC_NO_REASON; + } + // ip may be off for ARM but it shouldn't matter since we only use it for hashing. + state->out_frames_[state->num_frames_] = ip; + state->num_frames_++; + return state->num_frames_ >= state->max_depth_ ? _URC_END_OF_STACK : _URC_NO_REASON; + } + uintptr_t* const out_frames_ = nullptr; size_t num_frames_ = 0u; const size_t max_depth_ = 0u; diff --git a/tools/ahat/src/main/com/android/ahat/heapdump/AhatInstance.java b/tools/ahat/src/main/com/android/ahat/heapdump/AhatInstance.java index 20f368f4ff..a321ec0785 100644 --- a/tools/ahat/src/main/com/android/ahat/heapdump/AhatInstance.java +++ b/tools/ahat/src/main/com/android/ahat/heapdump/AhatInstance.java @@ -82,7 +82,6 @@ public abstract class AhatInstance implements Diffable<AhatInstance>, void initialize(AhatHeap heap, Site site, AhatClassObj classObj) { mHeap = heap; mSite = site; - site.addInstance(this); mClassObj = classObj; } diff --git a/tools/ahat/src/main/com/android/ahat/heapdump/AhatSnapshot.java b/tools/ahat/src/main/com/android/ahat/heapdump/AhatSnapshot.java index d9c7a19431..12d3755784 100644 --- a/tools/ahat/src/main/com/android/ahat/heapdump/AhatSnapshot.java +++ b/tools/ahat/src/main/com/android/ahat/heapdump/AhatSnapshot.java @@ -47,15 +47,19 @@ public class AhatSnapshot implements Diffable<AhatSnapshot> { mHeaps = heaps; mRootSite = rootSite; - // Update registered native allocation size. - for (AhatInstance cleaner : mInstances) { - AhatInstance.RegisteredNativeAllocation nra = cleaner.asRegisteredNativeAllocation(); + AhatInstance.computeReachability(mSuperRoot, progress, mInstances.size()); + + for (AhatInstance inst : mInstances) { + // Add this instance to its site. + inst.getSite().addInstance(inst); + + // Update registered native allocation size. + AhatInstance.RegisteredNativeAllocation nra = inst.asRegisteredNativeAllocation(); if (nra != null) { nra.referent.addRegisteredNativeSize(nra.size); } } - AhatInstance.computeReachability(mSuperRoot, progress, mInstances.size()); DominatorsComputation.computeDominators(mSuperRoot, progress, mInstances.size()); AhatInstance.computeRetainedSize(mSuperRoot, mHeaps.size()); |