diff options
Diffstat (limited to 'compiler/optimizing')
-rw-r--r-- | compiler/optimizing/code_generator_arm64.h | 9 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_vector_arm64.cc | 77 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_vector_x86.cc | 57 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_vector_x86_64.cc | 57 | ||||
-rw-r--r-- | compiler/optimizing/loop_optimization.cc | 6 |
5 files changed, 143 insertions, 63 deletions
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 332ab49153..3ded3e4412 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -318,12 +318,13 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator { void GenerateDivRemIntegral(HBinaryOperation* instruction); void HandleGoto(HInstruction* got, HBasicBlock* successor); - vixl::aarch64::MemOperand CreateVecMemRegisters( + vixl::aarch64::MemOperand VecAddress( HVecMemoryOperation* instruction, - Location* reg_loc, - bool is_load, // This function may acquire a scratch register. - vixl::aarch64::UseScratchRegisterScope* temps_scope); + vixl::aarch64::UseScratchRegisterScope* temps_scope, + size_t size, + bool is_string_char_at, + /*out*/ vixl::aarch64::Register* scratch); Arm64Assembler* const assembler_; CodeGeneratorARM64* const codegen_; diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc index 93befa439c..57f7e6b25c 100644 --- a/compiler/optimizing/code_generator_vector_arm64.cc +++ b/compiler/optimizing/code_generator_vector_arm64.cc @@ -22,6 +22,7 @@ using namespace vixl::aarch64; // NOLINT(build/namespaces) namespace art { namespace arm64 { +using helpers::DRegisterFrom; using helpers::VRegisterFrom; using helpers::HeapOperand; using helpers::InputRegisterAt; @@ -771,20 +772,22 @@ static void CreateVecMemLocations(ArenaAllocator* arena, } } -// Helper to set up registers and address for vector memory operations. -MemOperand InstructionCodeGeneratorARM64::CreateVecMemRegisters( +// Helper to set up locations for vector memory operations. Returns the memory operand and, +// if used, sets the output parameter scratch to a temporary register used in this operand, +// so that the client can release it right after the memory operand use. +MemOperand InstructionCodeGeneratorARM64::VecAddress( HVecMemoryOperation* instruction, - Location* reg_loc, - bool is_load, - UseScratchRegisterScope* temps_scope) { + UseScratchRegisterScope* temps_scope, + size_t size, + bool is_string_char_at, + /*out*/ Register* scratch) { LocationSummary* locations = instruction->GetLocations(); Register base = InputRegisterAt(instruction, 0); Location index = locations->InAt(1); - *reg_loc = is_load ? locations->Out() : locations->InAt(2); - - Primitive::Type packed_type = instruction->GetPackedType(); - uint32_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(packed_type)).Uint32Value(); - size_t shift = Primitive::ComponentSizeShift(packed_type); + uint32_t offset = is_string_char_at + ? mirror::String::ValueOffset().Uint32Value() + : mirror::Array::DataOffset(size).Uint32Value(); + size_t shift = ComponentSizeShiftWidth(size); // HIntermediateAddress optimization is only applied for scalar ArrayGet and ArraySet. DCHECK(!instruction->InputAt(0)->IsIntermediateAddress()); @@ -793,10 +796,9 @@ MemOperand InstructionCodeGeneratorARM64::CreateVecMemRegisters( offset += Int64ConstantFrom(index) << shift; return HeapOperand(base, offset); } else { - Register temp = temps_scope->AcquireSameSizeAs(base); - __ Add(temp, base, Operand(WRegisterFrom(index), LSL, shift)); - - return HeapOperand(temp, offset); + *scratch = temps_scope->AcquireSameSizeAs(base); + __ Add(*scratch, base, Operand(WRegisterFrom(index), LSL, shift)); + return HeapOperand(*scratch, offset); } } @@ -805,15 +807,43 @@ void LocationsBuilderARM64::VisitVecLoad(HVecLoad* instruction) { } void InstructionCodeGeneratorARM64::VisitVecLoad(HVecLoad* instruction) { - Location reg_loc = Location::NoLocation(); + LocationSummary* locations = instruction->GetLocations(); + size_t size = Primitive::ComponentSize(instruction->GetPackedType()); + VRegister reg = VRegisterFrom(locations->Out()); UseScratchRegisterScope temps(GetVIXLAssembler()); - MemOperand mem = CreateVecMemRegisters(instruction, ®_loc, /*is_load*/ true, &temps); - VRegister reg = VRegisterFrom(reg_loc); + Register scratch; switch (instruction->GetPackedType()) { + case Primitive::kPrimChar: + DCHECK_EQ(8u, instruction->GetVectorLength()); + // Special handling of compressed/uncompressed string load. + if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { + vixl::aarch64::Label uncompressed_load, done; + // Test compression bit. + static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, + "Expecting 0=compressed, 1=uncompressed"); + uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); + Register length = temps.AcquireW(); + __ Ldr(length, HeapOperand(InputRegisterAt(instruction, 0), count_offset)); + __ Tbnz(length.W(), 0, &uncompressed_load); + temps.Release(length); // no longer needed + // Zero extend 8 compressed bytes into 8 chars. + __ Ldr(DRegisterFrom(locations->Out()).V8B(), + VecAddress(instruction, &temps, 1, /*is_string_char_at*/ true, &scratch)); + __ Uxtl(reg.V8H(), reg.V8B()); + __ B(&done); + if (scratch.IsValid()) { + temps.Release(scratch); // if used, no longer needed + } + // Load 8 direct uncompressed chars. + __ Bind(&uncompressed_load); + __ Ldr(reg, VecAddress(instruction, &temps, size, /*is_string_char_at*/ true, &scratch)); + __ Bind(&done); + return; + } + FALLTHROUGH_INTENDED; case Primitive::kPrimBoolean: case Primitive::kPrimByte: - case Primitive::kPrimChar: case Primitive::kPrimShort: case Primitive::kPrimInt: case Primitive::kPrimFloat: @@ -821,7 +851,7 @@ void InstructionCodeGeneratorARM64::VisitVecLoad(HVecLoad* instruction) { case Primitive::kPrimDouble: DCHECK_LE(2u, instruction->GetVectorLength()); DCHECK_LE(instruction->GetVectorLength(), 16u); - __ Ldr(reg, mem); + __ Ldr(reg, VecAddress(instruction, &temps, size, instruction->IsStringCharAt(), &scratch)); break; default: LOG(FATAL) << "Unsupported SIMD type"; @@ -834,10 +864,11 @@ void LocationsBuilderARM64::VisitVecStore(HVecStore* instruction) { } void InstructionCodeGeneratorARM64::VisitVecStore(HVecStore* instruction) { - Location reg_loc = Location::NoLocation(); + LocationSummary* locations = instruction->GetLocations(); + size_t size = Primitive::ComponentSize(instruction->GetPackedType()); + VRegister reg = VRegisterFrom(locations->InAt(2)); UseScratchRegisterScope temps(GetVIXLAssembler()); - MemOperand mem = CreateVecMemRegisters(instruction, ®_loc, /*is_load*/ false, &temps); - VRegister reg = VRegisterFrom(reg_loc); + Register scratch; switch (instruction->GetPackedType()) { case Primitive::kPrimBoolean: @@ -850,7 +881,7 @@ void InstructionCodeGeneratorARM64::VisitVecStore(HVecStore* instruction) { case Primitive::kPrimDouble: DCHECK_LE(2u, instruction->GetVectorLength()); DCHECK_LE(instruction->GetVectorLength(), 16u); - __ Str(reg, mem); + __ Str(reg, VecAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch)); break; default: LOG(FATAL) << "Unsupported SIMD type"; diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc index 013b092b5a..5bb19c193c 100644 --- a/compiler/optimizing/code_generator_vector_x86.cc +++ b/compiler/optimizing/code_generator_vector_x86.cc @@ -201,6 +201,7 @@ void InstructionCodeGeneratorX86::VisitVecNeg(HVecNeg* instruction) { void LocationsBuilderX86::VisitVecAbs(HVecAbs* instruction) { CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); + // Integral-abs requires a temporary for the comparison. if (instruction->GetPackedType() == Primitive::kPrimInt) { instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister()); } @@ -766,16 +767,10 @@ static void CreateVecMemLocations(ArenaAllocator* arena, } } -// Helper to set up registers and address for vector memory operations. -static Address CreateVecMemRegisters(HVecMemoryOperation* instruction, - Location* reg_loc, - bool is_load) { - LocationSummary* locations = instruction->GetLocations(); +// Helper to construct address for vector memory operations. +static Address VecAddress(LocationSummary* locations, size_t size, bool is_string_char_at) { Location base = locations->InAt(0); Location index = locations->InAt(1); - *reg_loc = is_load ? locations->Out() : locations->InAt(2); - size_t size = Primitive::ComponentSize(instruction->GetPackedType()); - uint32_t offset = mirror::Array::DataOffset(size).Uint32Value(); ScaleFactor scale = TIMES_1; switch (size) { case 2: scale = TIMES_2; break; @@ -783,22 +778,53 @@ static Address CreateVecMemRegisters(HVecMemoryOperation* instruction, case 8: scale = TIMES_8; break; default: break; } + uint32_t offset = is_string_char_at + ? mirror::String::ValueOffset().Uint32Value() + : mirror::Array::DataOffset(size).Uint32Value(); return CodeGeneratorX86::ArrayAddress(base.AsRegister<Register>(), index, scale, offset); } void LocationsBuilderX86::VisitVecLoad(HVecLoad* instruction) { CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ true); + // String load requires a temporary for the compressed load. + if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { + instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister()); + } } void InstructionCodeGeneratorX86::VisitVecLoad(HVecLoad* instruction) { - Location reg_loc = Location::NoLocation(); - Address address = CreateVecMemRegisters(instruction, ®_loc, /*is_load*/ true); - XmmRegister reg = reg_loc.AsFpuRegister<XmmRegister>(); + LocationSummary* locations = instruction->GetLocations(); + size_t size = Primitive::ComponentSize(instruction->GetPackedType()); + Address address = VecAddress(locations, size, instruction->IsStringCharAt()); + XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>(); bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16); switch (instruction->GetPackedType()) { + case Primitive::kPrimChar: + DCHECK_EQ(8u, instruction->GetVectorLength()); + // Special handling of compressed/uncompressed string load. + if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { + NearLabel done, not_compressed; + XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + // Test compression bit. + static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, + "Expecting 0=compressed, 1=uncompressed"); + uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); + __ testb(Address(locations->InAt(0).AsRegister<Register>(), count_offset), Immediate(1)); + __ j(kNotZero, ¬_compressed); + // Zero extend 8 compressed bytes into 8 chars. + __ movsd(reg, VecAddress(locations, 1, /*is_string_char_at*/ true)); + __ pxor(tmp, tmp); + __ punpcklbw(reg, tmp); + __ jmp(&done); + // Load 4 direct uncompressed chars. + __ Bind(¬_compressed); + is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address); + __ Bind(&done); + return; + } + FALLTHROUGH_INTENDED; case Primitive::kPrimBoolean: case Primitive::kPrimByte: - case Primitive::kPrimChar: case Primitive::kPrimShort: case Primitive::kPrimInt: case Primitive::kPrimLong: @@ -825,9 +851,10 @@ void LocationsBuilderX86::VisitVecStore(HVecStore* instruction) { } void InstructionCodeGeneratorX86::VisitVecStore(HVecStore* instruction) { - Location reg_loc = Location::NoLocation(); - Address address = CreateVecMemRegisters(instruction, ®_loc, /*is_load*/ false); - XmmRegister reg = reg_loc.AsFpuRegister<XmmRegister>(); + LocationSummary* locations = instruction->GetLocations(); + size_t size = Primitive::ComponentSize(instruction->GetPackedType()); + Address address = VecAddress(locations, size, /*is_string_char_at*/ false); + XmmRegister reg = locations->InAt(2).AsFpuRegister<XmmRegister>(); bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16); switch (instruction->GetPackedType()) { case Primitive::kPrimBoolean: diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc index 66f19a4376..6d4aae86e6 100644 --- a/compiler/optimizing/code_generator_vector_x86_64.cc +++ b/compiler/optimizing/code_generator_vector_x86_64.cc @@ -194,6 +194,7 @@ void InstructionCodeGeneratorX86_64::VisitVecNeg(HVecNeg* instruction) { void LocationsBuilderX86_64::VisitVecAbs(HVecAbs* instruction) { CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); + // Integral-abs requires a temporary for the comparison. if (instruction->GetPackedType() == Primitive::kPrimInt) { instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister()); } @@ -755,16 +756,10 @@ static void CreateVecMemLocations(ArenaAllocator* arena, } } -// Helper to set up registers and address for vector memory operations. -static Address CreateVecMemRegisters(HVecMemoryOperation* instruction, - Location* reg_loc, - bool is_load) { - LocationSummary* locations = instruction->GetLocations(); +// Helper to construct address for vector memory operations. +static Address VecAddress(LocationSummary* locations, size_t size, bool is_string_char_at) { Location base = locations->InAt(0); Location index = locations->InAt(1); - *reg_loc = is_load ? locations->Out() : locations->InAt(2); - size_t size = Primitive::ComponentSize(instruction->GetPackedType()); - uint32_t offset = mirror::Array::DataOffset(size).Uint32Value(); ScaleFactor scale = TIMES_1; switch (size) { case 2: scale = TIMES_2; break; @@ -772,22 +767,53 @@ static Address CreateVecMemRegisters(HVecMemoryOperation* instruction, case 8: scale = TIMES_8; break; default: break; } + uint32_t offset = is_string_char_at + ? mirror::String::ValueOffset().Uint32Value() + : mirror::Array::DataOffset(size).Uint32Value(); return CodeGeneratorX86_64::ArrayAddress(base.AsRegister<CpuRegister>(), index, scale, offset); } void LocationsBuilderX86_64::VisitVecLoad(HVecLoad* instruction) { CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ true); + // String load requires a temporary for the compressed load. + if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { + instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister()); + } } void InstructionCodeGeneratorX86_64::VisitVecLoad(HVecLoad* instruction) { - Location reg_loc = Location::NoLocation(); - Address address = CreateVecMemRegisters(instruction, ®_loc, /*is_load*/ true); - XmmRegister reg = reg_loc.AsFpuRegister<XmmRegister>(); + LocationSummary* locations = instruction->GetLocations(); + size_t size = Primitive::ComponentSize(instruction->GetPackedType()); + Address address = VecAddress(locations, size, instruction->IsStringCharAt()); + XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>(); bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16); switch (instruction->GetPackedType()) { + case Primitive::kPrimChar: + DCHECK_EQ(8u, instruction->GetVectorLength()); + // Special handling of compressed/uncompressed string load. + if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { + NearLabel done, not_compressed; + XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + // Test compression bit. + static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, + "Expecting 0=compressed, 1=uncompressed"); + uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); + __ testb(Address(locations->InAt(0).AsRegister<CpuRegister>(), count_offset), Immediate(1)); + __ j(kNotZero, ¬_compressed); + // Zero extend 8 compressed bytes into 8 chars. + __ movsd(reg, VecAddress(locations, 1, /*is_string_char_at*/ true)); + __ pxor(tmp, tmp); + __ punpcklbw(reg, tmp); + __ jmp(&done); + // Load 8 direct uncompressed chars. + __ Bind(¬_compressed); + is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address); + __ Bind(&done); + return; + } + FALLTHROUGH_INTENDED; case Primitive::kPrimBoolean: case Primitive::kPrimByte: - case Primitive::kPrimChar: case Primitive::kPrimShort: case Primitive::kPrimInt: case Primitive::kPrimLong: @@ -814,9 +840,10 @@ void LocationsBuilderX86_64::VisitVecStore(HVecStore* instruction) { } void InstructionCodeGeneratorX86_64::VisitVecStore(HVecStore* instruction) { - Location reg_loc = Location::NoLocation(); - Address address = CreateVecMemRegisters(instruction, ®_loc, /*is_load*/ false); - XmmRegister reg = reg_loc.AsFpuRegister<XmmRegister>(); + LocationSummary* locations = instruction->GetLocations(); + size_t size = Primitive::ComponentSize(instruction->GetPackedType()); + Address address = VecAddress(locations, size, /*is_string_char_at*/ false); + XmmRegister reg = locations->InAt(2).AsFpuRegister<XmmRegister>(); bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16); switch (instruction->GetPackedType()) { case Primitive::kPrimBoolean: diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index da2acd1fd3..c783ddecf5 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -733,12 +733,6 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node, } return true; } else if (instruction->IsArrayGet()) { - // Strings are different, with a different offset to the actual data - // and some compressed to save memory. For now, all cases are rejected - // to avoid the complexity. - if (instruction->AsArrayGet()->IsStringCharAt()) { - return false; - } // Accept a right-hand-side array base[index] for // (1) exact matching vector type, // (2) loop-invariant base, |