diff options
Diffstat (limited to 'compiler/optimizing')
-rw-r--r-- | compiler/optimizing/code_generator_x86.cc | 24 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86_64.cc | 22 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_x86.cc | 201 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_x86_64.cc | 189 |
4 files changed, 385 insertions, 51 deletions
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index dc5c86efc6..9713d6a9c9 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -1312,7 +1312,7 @@ void InstructionCodeGeneratorX86::VisitCondition(HCondition* cond) { } // Convert the jumps into the result. - Label done_label; + NearLabel done_label; // False case: result = 0. __ Bind(&false_label); @@ -1968,7 +1968,7 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio XmmRegister input = in.AsFpuRegister<XmmRegister>(); Register output = out.AsRegister<Register>(); XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); - Label done, nan; + NearLabel done, nan; __ movl(output, Immediate(kPrimIntMax)); // temp = int-to-float(output) @@ -1993,7 +1993,7 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio XmmRegister input = in.AsFpuRegister<XmmRegister>(); Register output = out.AsRegister<Register>(); XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); - Label done, nan; + NearLabel done, nan; __ movl(output, Immediate(kPrimIntMax)); // temp = int-to-double(output) @@ -2652,7 +2652,7 @@ void InstructionCodeGeneratorX86::GenerateRemFP(HRem *rem) { PushOntoFPStack(first, 0, 2 * elem_size, /* is_fp */ true, is_wide); // Loop doing FPREM until we stabilize. - Label retry; + NearLabel retry; __ Bind(&retry); __ fprem(); @@ -2766,8 +2766,8 @@ void InstructionCodeGeneratorX86::GenerateDivRemWithAnyConstant(HBinaryOperation int shift; CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift); - Label ndiv; - Label end; + NearLabel ndiv; + NearLabel end; // If numerator is 0, the result is 0, no computation needed. __ testl(eax, eax); __ j(kNotEqual, &ndiv); @@ -3243,7 +3243,7 @@ void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, int shift } void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, Register shifter) { - Label done; + NearLabel done; __ shld(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>(), shifter); __ shll(loc.AsRegisterPairLow<Register>(), shifter); __ testl(shifter, Immediate(32)); @@ -3275,7 +3275,7 @@ void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, int shift } void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, Register shifter) { - Label done; + NearLabel done; __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter); __ sarl(loc.AsRegisterPairHigh<Register>(), shifter); __ testl(shifter, Immediate(32)); @@ -3310,7 +3310,7 @@ void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, int shif } void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, Register shifter) { - Label done; + NearLabel done; __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter); __ shrl(loc.AsRegisterPairHigh<Register>(), shifter); __ testl(shifter, Immediate(32)); @@ -3485,7 +3485,7 @@ void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) { Location left = locations->InAt(0); Location right = locations->InAt(1); - Label less, greater, done; + NearLabel less, greater, done; switch (compare->InputAt(0)->GetType()) { case Primitive::kPrimLong: { Register left_low = left.AsRegisterPairLow<Register>(); @@ -3709,7 +3709,7 @@ void CodeGeneratorX86::MarkGCCard(Register temp, Register object, Register value, bool value_can_be_null) { - Label is_null; + NearLabel is_null; if (value_can_be_null) { __ testl(value, value); __ j(kEqual, &is_null); @@ -4946,7 +4946,7 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { Location cls = locations->InAt(1); Register out = locations->Out().AsRegister<Register>(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - Label done, zero; + NearLabel done, zero; SlowPathCodeX86* slow_path = nullptr; // Return 0 if `obj` is null. diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 0cf1089cf8..43a3e52a7f 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -1324,7 +1324,7 @@ void InstructionCodeGeneratorX86_64::VisitCondition(HCondition* cond) { } // Convert the jumps into the result. - Label done_label; + NearLabel done_label; // False case: result = 0. __ Bind(&false_label); @@ -1413,7 +1413,7 @@ void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) { Location left = locations->InAt(0); Location right = locations->InAt(1); - Label less, greater, done; + NearLabel less, greater, done; Primitive::Type type = compare->InputAt(0)->GetType(); switch (type) { case Primitive::kPrimLong: { @@ -2123,7 +2123,7 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver // Processing a Dex `float-to-int' instruction. XmmRegister input = in.AsFpuRegister<XmmRegister>(); CpuRegister output = out.AsRegister<CpuRegister>(); - Label done, nan; + NearLabel done, nan; __ movl(output, Immediate(kPrimIntMax)); // if input >= (float)INT_MAX goto done @@ -2145,7 +2145,7 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver // Processing a Dex `double-to-int' instruction. XmmRegister input = in.AsFpuRegister<XmmRegister>(); CpuRegister output = out.AsRegister<CpuRegister>(); - Label done, nan; + NearLabel done, nan; __ movl(output, Immediate(kPrimIntMax)); // if input >= (double)INT_MAX goto done @@ -2187,7 +2187,7 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver // Processing a Dex `float-to-long' instruction. XmmRegister input = in.AsFpuRegister<XmmRegister>(); CpuRegister output = out.AsRegister<CpuRegister>(); - Label done, nan; + NearLabel done, nan; codegen_->Load64BitValue(output, kPrimLongMax); // if input >= (float)LONG_MAX goto done @@ -2209,7 +2209,7 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver // Processing a Dex `double-to-long' instruction. XmmRegister input = in.AsFpuRegister<XmmRegister>(); CpuRegister output = out.AsRegister<CpuRegister>(); - Label done, nan; + NearLabel done, nan; codegen_->Load64BitValue(output, kPrimLongMax); // if input >= (double)LONG_MAX goto done @@ -2772,7 +2772,7 @@ void InstructionCodeGeneratorX86_64::GenerateRemFP(HRem *rem) { PushOntoFPStack(first, 0, 2 * elem_size, is_float); // Loop doing FPREM until we stabilize. - Label retry; + NearLabel retry; __ Bind(&retry); __ fprem(); @@ -2926,8 +2926,8 @@ void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperat __ movl(numerator, eax); - Label no_div; - Label end; + NearLabel no_div; + NearLabel end; __ testl(eax, eax); __ j(kNotEqual, &no_div); @@ -4247,7 +4247,7 @@ void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp, CpuRegister object, CpuRegister value, bool value_can_be_null) { - Label is_null; + NearLabel is_null; if (value_can_be_null) { __ testl(value, value); __ j(kEqual, &is_null); @@ -4674,7 +4674,7 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { Location cls = locations->InAt(1); CpuRegister out = locations->Out().AsRegister<CpuRegister>(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - Label done, zero; + NearLabel done, zero; SlowPathCodeX86_64* slow_path = nullptr; // Return 0 if `obj` is null. diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index c5d88d2b25..e302317d14 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -507,7 +507,7 @@ static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double, XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>(); - Label nan, done, op2_label; + NearLabel nan, done, op2_label; if (is_double) { __ ucomisd(out, op2); } else { @@ -841,7 +841,7 @@ void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) { Register out = locations->Out().AsRegister<Register>(); XmmRegister maxInt = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); XmmRegister inPlusPointFive = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); - Label done, nan; + NearLabel done, nan; X86Assembler* assembler = GetAssembler(); // Generate 0.5 into inPlusPointFive. @@ -888,9 +888,9 @@ void IntrinsicLocationsBuilderX86::VisitStringCharAt(HInvoke* invoke) { void IntrinsicCodeGeneratorX86::VisitStringCharAt(HInvoke* invoke) { LocationSummary* locations = invoke->GetLocations(); - // Location of reference to data array + // Location of reference to data array. const int32_t value_offset = mirror::String::ValueOffset().Int32Value(); - // Location of count + // Location of count. const int32_t count_offset = mirror::String::CountOffset().Int32Value(); Register obj = locations->InAt(0).AsRegister<Register>(); @@ -917,6 +917,183 @@ void IntrinsicCodeGeneratorX86::VisitStringCharAt(HInvoke* invoke) { __ Bind(slow_path->GetExitLabel()); } +void IntrinsicLocationsBuilderX86::VisitSystemArrayCopyChar(HInvoke* invoke) { + // We need at least two of the positions or length to be an integer constant, + // or else we won't have enough free registers. + HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant(); + HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant(); + HIntConstant* length = invoke->InputAt(4)->AsIntConstant(); + + int num_constants = + ((src_pos != nullptr) ? 1 : 0) + + ((dest_pos != nullptr) ? 1 : 0) + + ((length != nullptr) ? 1 : 0); + + if (num_constants < 2) { + // Not enough free registers. + return; + } + + // As long as we are checking, we might as well check to see if the src and dest + // positions are >= 0. + if ((src_pos != nullptr && src_pos->GetValue() < 0) || + (dest_pos != nullptr && dest_pos->GetValue() < 0)) { + // We will have to fail anyways. + return; + } + + // And since we are already checking, check the length too. + if (length != nullptr) { + int32_t len = length->GetValue(); + if (len < 0) { + // Just call as normal. + return; + } + } + + // Okay, it is safe to generate inline code. + LocationSummary* locations = + new (arena_) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified); + // arraycopy(Object src, int srcPos, Object dest, int destPos, int length). + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1))); + locations->SetInAt(2, Location::RequiresRegister()); + locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3))); + locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4))); + + // And we need some temporaries. We will use REP MOVSW, so we need fixed registers. + locations->AddTemp(Location::RegisterLocation(ESI)); + locations->AddTemp(Location::RegisterLocation(EDI)); + locations->AddTemp(Location::RegisterLocation(ECX)); +} + +static void CheckPosition(X86Assembler* assembler, + Location pos, + Register input, + Register length, + SlowPathCodeX86* slow_path, + Register input_len, + Register temp) { + // Where is the length in the String? + const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value(); + + if (pos.IsConstant()) { + int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue(); + if (pos_const == 0) { + // Check that length(input) >= length. + __ cmpl(Address(input, length_offset), length); + __ j(kLess, slow_path->GetEntryLabel()); + } else { + // Check that length(input) >= pos. + __ movl(input_len, Address(input, length_offset)); + __ cmpl(input_len, Immediate(pos_const)); + __ j(kLess, slow_path->GetEntryLabel()); + + // Check that (length(input) - pos) >= length. + __ leal(temp, Address(input_len, -pos_const)); + __ cmpl(temp, length); + __ j(kLess, slow_path->GetEntryLabel()); + } + } else { + // Check that pos >= 0. + Register pos_reg = pos.AsRegister<Register>(); + __ testl(pos_reg, pos_reg); + __ j(kLess, slow_path->GetEntryLabel()); + + // Check that pos <= length(input). + __ cmpl(Address(input, length_offset), pos_reg); + __ j(kLess, slow_path->GetEntryLabel()); + + // Check that (length(input) - pos) >= length. + __ movl(temp, Address(input, length_offset)); + __ subl(temp, pos_reg); + __ cmpl(temp, length); + __ j(kLess, slow_path->GetEntryLabel()); + } +} + +void IntrinsicCodeGeneratorX86::VisitSystemArrayCopyChar(HInvoke* invoke) { + X86Assembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + Register src = locations->InAt(0).AsRegister<Register>(); + Location srcPos = locations->InAt(1); + Register dest = locations->InAt(2).AsRegister<Register>(); + Location destPos = locations->InAt(3); + Location length = locations->InAt(4); + + // Temporaries that we need for MOVSW. + Register src_base = locations->GetTemp(0).AsRegister<Register>(); + DCHECK_EQ(src_base, ESI); + Register dest_base = locations->GetTemp(1).AsRegister<Register>(); + DCHECK_EQ(dest_base, EDI); + Register count = locations->GetTemp(2).AsRegister<Register>(); + DCHECK_EQ(count, ECX); + + SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke); + codegen_->AddSlowPath(slow_path); + + // Bail out if the source and destination are the same (to handle overlap). + __ cmpl(src, dest); + __ j(kEqual, slow_path->GetEntryLabel()); + + // Bail out if the source is null. + __ testl(src, src); + __ j(kEqual, slow_path->GetEntryLabel()); + + // Bail out if the destination is null. + __ testl(dest, dest); + __ j(kEqual, slow_path->GetEntryLabel()); + + // If the length is negative, bail out. + // We have already checked in the LocationsBuilder for the constant case. + if (!length.IsConstant()) { + __ cmpl(length.AsRegister<Register>(), length.AsRegister<Register>()); + __ j(kLess, slow_path->GetEntryLabel()); + } + + // We need the count in ECX. + if (length.IsConstant()) { + __ movl(count, Immediate(length.GetConstant()->AsIntConstant()->GetValue())); + } else { + __ movl(count, length.AsRegister<Register>()); + } + + // Validity checks: source. + CheckPosition(assembler, srcPos, src, count, slow_path, src_base, dest_base); + + // Validity checks: dest. + CheckPosition(assembler, destPos, dest, count, slow_path, src_base, dest_base); + + // Okay, everything checks out. Finally time to do the copy. + // Check assumption that sizeof(Char) is 2 (used in scaling below). + const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar); + DCHECK_EQ(char_size, 2u); + + const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value(); + + if (srcPos.IsConstant()) { + int32_t srcPos_const = srcPos.GetConstant()->AsIntConstant()->GetValue(); + __ leal(src_base, Address(src, char_size * srcPos_const + data_offset)); + } else { + __ leal(src_base, Address(src, srcPos.AsRegister<Register>(), + ScaleFactor::TIMES_2, data_offset)); + } + if (destPos.IsConstant()) { + int32_t destPos_const = destPos.GetConstant()->AsIntConstant()->GetValue(); + + __ leal(dest_base, Address(dest, char_size * destPos_const + data_offset)); + } else { + __ leal(dest_base, Address(dest, destPos.AsRegister<Register>(), + ScaleFactor::TIMES_2, data_offset)); + } + + // Do the move. + __ rep_movsw(); + + __ Bind(slow_path->GetExitLabel()); +} + void IntrinsicLocationsBuilderX86::VisitStringCompareTo(HInvoke* invoke) { // The inputs plus one temp. LocationSummary* locations = new (arena_) LocationSummary(invoke, @@ -970,9 +1147,7 @@ void IntrinsicCodeGeneratorX86::VisitStringEquals(HInvoke* invoke) { Register edi = locations->GetTemp(1).AsRegister<Register>(); Register esi = locations->Out().AsRegister<Register>(); - Label end; - Label return_true; - Label return_false; + NearLabel end, return_true, return_false; // Get offsets of count, value, and class fields within a string object. const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); @@ -1004,8 +1179,7 @@ void IntrinsicCodeGeneratorX86::VisitStringEquals(HInvoke* invoke) { __ cmpl(ecx, Address(arg, count_offset)); __ j(kNotEqual, &return_false); // Return true if both strings are empty. - __ testl(ecx, ecx); - __ j(kEqual, &return_true); + __ jecxz(&return_true); // Load starting addresses of string values into ESI/EDI as required for repe_cmpsl instruction. __ leal(esi, Address(str, value_offset)); @@ -1115,7 +1289,7 @@ static void GenerateStringIndexOf(HInvoke* invoke, // Do a zero-length check. // TODO: Support jecxz. - Label not_found_label; + NearLabel not_found_label; __ testl(string_length, string_length); __ j(kEqual, ¬_found_label); @@ -1158,7 +1332,7 @@ static void GenerateStringIndexOf(HInvoke* invoke, __ subl(string_length, counter); __ leal(out, Address(string_length, -1)); - Label done; + NearLabel done; __ jmp(&done); // Failed to match; return -1. @@ -1878,7 +2052,7 @@ static void GenLeadingZeros(X86Assembler* assembler, HInvoke* invoke, bool is_lo } // BSR sets ZF if the input was zero, and the output is undefined. - Label all_zeroes, done; + NearLabel all_zeroes, done; __ j(kEqual, &all_zeroes); // Correct the result from BSR to get the final CLZ result. @@ -1897,7 +2071,7 @@ static void GenLeadingZeros(X86Assembler* assembler, HInvoke* invoke, bool is_lo DCHECK(src.IsRegisterPair()); Register src_lo = src.AsRegisterPairLow<Register>(); Register src_hi = src.AsRegisterPairHigh<Register>(); - Label handle_low, done, all_zeroes; + NearLabel handle_low, done, all_zeroes; // Is the high word zero? __ testl(src_hi, src_hi); @@ -1954,7 +2128,6 @@ void IntrinsicCodeGeneratorX86::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) UNIMPLEMENTED_INTRINSIC(MathRoundDouble) UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck) -UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(IntegerNumberOfTrailingZeros) UNIMPLEMENTED_INTRINSIC(LongNumberOfTrailingZeros) diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 258ae9a55f..51980af36d 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -405,7 +405,7 @@ static void GenMinMaxFP(LocationSummary* locations, XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>(); - Label nan, done, op2_label; + NearLabel nan, done, op2_label; if (is_double) { __ ucomisd(out, op2); } else { @@ -702,7 +702,7 @@ void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) { XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); CpuRegister out = locations->Out().AsRegister<CpuRegister>(); XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); - Label done, nan; + NearLabel done, nan; X86_64Assembler* assembler = GetAssembler(); // Load 0.5 into inPlusPointFive. @@ -750,7 +750,7 @@ void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) { XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); CpuRegister out = locations->Out().AsRegister<CpuRegister>(); XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); - Label done, nan; + NearLabel done, nan; X86_64Assembler* assembler = GetAssembler(); // Load 0.5 into inPlusPointFive. @@ -797,9 +797,9 @@ void IntrinsicLocationsBuilderX86_64::VisitStringCharAt(HInvoke* invoke) { void IntrinsicCodeGeneratorX86_64::VisitStringCharAt(HInvoke* invoke) { LocationSummary* locations = invoke->GetLocations(); - // Location of reference to data array + // Location of reference to data array. const int32_t value_offset = mirror::String::ValueOffset().Int32Value(); - // Location of count + // Location of count. const int32_t count_offset = mirror::String::CountOffset().Int32Value(); CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); @@ -826,6 +826,171 @@ void IntrinsicCodeGeneratorX86_64::VisitStringCharAt(HInvoke* invoke) { __ Bind(slow_path->GetExitLabel()); } +void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) { + // Check to see if we have known failures that will cause us to have to bail out + // to the runtime, and just generate the runtime call directly. + HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant(); + HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant(); + + // The positions must be non-negative. + if ((src_pos != nullptr && src_pos->GetValue() < 0) || + (dest_pos != nullptr && dest_pos->GetValue() < 0)) { + // We will have to fail anyways. + return; + } + + // The length must be > 0. + HIntConstant* length = invoke->InputAt(4)->AsIntConstant(); + if (length != nullptr) { + int32_t len = length->GetValue(); + if (len < 0) { + // Just call as normal. + return; + } + } + + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCallOnSlowPath, + kIntrinsified); + // arraycopy(Object src, int srcPos, Object dest, int destPos, int length). + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1))); + locations->SetInAt(2, Location::RequiresRegister()); + locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3))); + locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4))); + + // And we need some temporaries. We will use REP MOVSW, so we need fixed registers. + locations->AddTemp(Location::RegisterLocation(RSI)); + locations->AddTemp(Location::RegisterLocation(RDI)); + locations->AddTemp(Location::RegisterLocation(RCX)); +} + +static void CheckPosition(X86_64Assembler* assembler, + Location pos, + CpuRegister input, + CpuRegister length, + SlowPathCodeX86_64* slow_path, + CpuRegister input_len, + CpuRegister temp) { + // Where is the length in the String? + const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value(); + + if (pos.IsConstant()) { + int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue(); + if (pos_const == 0) { + // Check that length(input) >= length. + __ cmpl(Address(input, length_offset), length); + __ j(kLess, slow_path->GetEntryLabel()); + } else { + // Check that length(input) >= pos. + __ movl(input_len, Address(input, length_offset)); + __ cmpl(input_len, Immediate(pos_const)); + __ j(kLess, slow_path->GetEntryLabel()); + + // Check that (length(input) - pos) >= length. + __ leal(temp, Address(input_len, -pos_const)); + __ cmpl(temp, length); + __ j(kLess, slow_path->GetEntryLabel()); + } + } else { + // Check that pos >= 0. + CpuRegister pos_reg = pos.AsRegister<CpuRegister>(); + __ testl(pos_reg, pos_reg); + __ j(kLess, slow_path->GetEntryLabel()); + + // Check that pos <= length(input). + __ cmpl(Address(input, length_offset), pos_reg); + __ j(kLess, slow_path->GetEntryLabel()); + + // Check that (length(input) - pos) >= length. + __ movl(temp, Address(input, length_offset)); + __ subl(temp, pos_reg); + __ cmpl(temp, length); + __ j(kLess, slow_path->GetEntryLabel()); + } +} + +void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) { + X86_64Assembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>(); + Location srcPos = locations->InAt(1); + CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>(); + Location destPos = locations->InAt(3); + Location length = locations->InAt(4); + + // Temporaries that we need for MOVSW. + CpuRegister src_base = locations->GetTemp(0).AsRegister<CpuRegister>(); + DCHECK_EQ(src_base.AsRegister(), RSI); + CpuRegister dest_base = locations->GetTemp(1).AsRegister<CpuRegister>(); + DCHECK_EQ(dest_base.AsRegister(), RDI); + CpuRegister count = locations->GetTemp(2).AsRegister<CpuRegister>(); + DCHECK_EQ(count.AsRegister(), RCX); + + SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke); + codegen_->AddSlowPath(slow_path); + + // Bail out if the source and destination are the same. + __ cmpl(src, dest); + __ j(kEqual, slow_path->GetEntryLabel()); + + // Bail out if the source is null. + __ testl(src, src); + __ j(kEqual, slow_path->GetEntryLabel()); + + // Bail out if the destination is null. + __ testl(dest, dest); + __ j(kEqual, slow_path->GetEntryLabel()); + + // If the length is negative, bail out. + // We have already checked in the LocationsBuilder for the constant case. + if (!length.IsConstant()) { + __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>()); + __ j(kLess, slow_path->GetEntryLabel()); + } + + // We need the count in RCX. + if (length.IsConstant()) { + __ movl(count, Immediate(length.GetConstant()->AsIntConstant()->GetValue())); + } else { + __ movl(count, length.AsRegister<CpuRegister>()); + } + + // Validity checks: source. + CheckPosition(assembler, srcPos, src, count, slow_path, src_base, dest_base); + + // Validity checks: dest. + CheckPosition(assembler, destPos, dest, count, slow_path, src_base, dest_base); + + // Okay, everything checks out. Finally time to do the copy. + // Check assumption that sizeof(Char) is 2 (used in scaling below). + const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar); + DCHECK_EQ(char_size, 2u); + + const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value(); + + if (srcPos.IsConstant()) { + int32_t srcPos_const = srcPos.GetConstant()->AsIntConstant()->GetValue(); + __ leal(src_base, Address(src, char_size * srcPos_const + data_offset)); + } else { + __ leal(src_base, Address(src, srcPos.AsRegister<CpuRegister>(), + ScaleFactor::TIMES_2, data_offset)); + } + if (destPos.IsConstant()) { + int32_t destPos_const = destPos.GetConstant()->AsIntConstant()->GetValue(); + __ leal(dest_base, Address(dest, char_size * destPos_const + data_offset)); + } else { + __ leal(dest_base, Address(dest, destPos.AsRegister<CpuRegister>(), + ScaleFactor::TIMES_2, data_offset)); + } + + // Do the move. + __ rep_movsw(); + + __ Bind(slow_path->GetExitLabel()); +} + void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, LocationSummary::kCall, @@ -879,9 +1044,7 @@ void IntrinsicCodeGeneratorX86_64::VisitStringEquals(HInvoke* invoke) { CpuRegister rdi = locations->GetTemp(1).AsRegister<CpuRegister>(); CpuRegister rsi = locations->Out().AsRegister<CpuRegister>(); - Label end; - Label return_true; - Label return_false; + NearLabel end, return_true, return_false; // Get offsets of count, value, and class fields within a string object. const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); @@ -913,8 +1076,7 @@ void IntrinsicCodeGeneratorX86_64::VisitStringEquals(HInvoke* invoke) { __ cmpl(rcx, Address(arg, count_offset)); __ j(kNotEqual, &return_false); // Return true if both strings are empty. - __ testl(rcx, rcx); - __ j(kEqual, &return_true); + __ jrcxz(&return_true); // Load starting addresses of string values into RSI/RDI as required for repe_cmpsq instruction. __ leal(rsi, Address(str, value_offset)); @@ -1024,7 +1186,7 @@ static void GenerateStringIndexOf(HInvoke* invoke, // Do a length check. // TODO: Support jecxz. - Label not_found_label; + NearLabel not_found_label; __ testl(string_length, string_length); __ j(kEqual, ¬_found_label); @@ -1066,7 +1228,7 @@ static void GenerateStringIndexOf(HInvoke* invoke, __ subl(string_length, counter); __ leal(out, Address(string_length, -1)); - Label done; + NearLabel done; __ jmp(&done); // Failed to match; return -1. @@ -1731,7 +1893,7 @@ static void GenLeadingZeros(X86_64Assembler* assembler, HInvoke* invoke, bool is } // BSR sets ZF if the input was zero, and the output is undefined. - Label is_zero, done; + NearLabel is_zero, done; __ j(kEqual, &is_zero); // Correct the result from BSR to get the CLZ result. @@ -1772,7 +1934,6 @@ void IntrinsicCodeGeneratorX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSE } UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck) -UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(IntegerNumberOfTrailingZeros) UNIMPLEMENTED_INTRINSIC(LongNumberOfTrailingZeros) |