summaryrefslogtreecommitdiff
path: root/compiler/optimizing/instruction_simplifier.cc
diff options
context:
space:
mode:
author Vladimir Marko <vmarko@google.com> 2019-05-21 10:00:15 +0100
committer VladimĂ­r Marko <vmarko@google.com> 2023-01-03 12:31:57 +0000
commit41de45060710d64b671a0fa001ec187df221359d (patch)
treeacdd772258d4eb671f8aa00876988b3f59cc4d1f /compiler/optimizing/instruction_simplifier.cc
parent890b19bd625be5d0e4a876e3eb11b8b893fb0c13 (diff)
StringBuilder append pattern for float/double.
Results for added benchmarks on blueline-userdebug with cpu frequencies fxed at 1420800 (cpus 0-3; little) and 1459200 (cpus 4-7; big): 32-bit little (--variant=X32 --invoke-with 'taskset 0f') timeAppendStringAndDouble: ~1260ns -> ~970ns timeAppendStringAndFloat: ~1250ns -> ~940ns timeAppendStringAndHugeDouble: ~4700ns -> ~4690ns (noise) timeAppendStringAndHugeFloat: ~3400ns -> ~3300ns (noise) timeAppendStringDoubleStringAndFloat: ~1980ns -> ~1550ns 64-bit little (--variant=X64 --invoke-with 'taskset 0f') timeAppendStringAndDouble: ~1260ns -> ~970ns timeAppendStringAndFloat: ~1260ns -> ~940ns timeAppendStringAndHugeDouble: ~4700ns -> ~4800ns (noise) timeAppendStringAndHugeFloat: ~3300ns -> ~3400ns (noise) timeAppendStringDoubleStringAndFloat: ~1970ns -> ~1550ns 32-bit big (--variant=X32 --invoke-with 'taskset f0') timeAppendStringAndDouble: ~580ns -> ~450ns timeAppendStringAndFloat: ~590ns -> ~430ns timeAppendStringAndHugeDouble: ~2500ns -> ~2100ns (noise) timeAppendStringAndHugeFloat: ~1500ns -> ~1300ns (noise) timeAppendStringDoubleStringAndFloat: ~880ns -> ~730ns 64-bit big (--variant=X64 --invoke-with 'taskset f0') timeAppendStringAndDouble: ~590ns -> ~450ns timeAppendStringAndFloat: ~590ns -> ~430ns timeAppendStringAndHugeDouble: ~2300ns -> ~2300ns (noise) timeAppendStringAndHugeFloat: ~1500ns -> ~1300ns (noise) timeAppendStringDoubleStringAndFloat: ~870ns -> ~730ns The `timeAppendStringAnd{Double,Float)` benchmarks show very nice improvements, roughly 25% on both little and big cores. The `timeAppendStringDoubleStringAndFloat` also shows decent improvements, over 20% on little and over 15% on big cores. (These benchmarks test the best-case scenario for "before" as the StringBuilder's internal buffer is not reallocated.) The `testAppendStringAndHuge{Double,Float}` results are too noisy to draw any conclusions (especially on little cores but there is still too much noise on big cores as well). There are also small regressions for existing benchmarks `timeAppend{LongStrings,StringAndInt,Strings}` but these non-FP regressions may be mitigated after updating the ThinLTO profile. There is also an opportunity to optimize the calls back to managed code for known shorty (in this change we use "LD" and "LF") by using a dedicated stub instead of going through the generic invoke stub. Boot image size changes are insignificant (few matches). Test: Added tests to 697-checker-string-append Test: m test-art-host-gtest Test: testrunner.py --host --optimizing Test: testrunner.py --target --optimizing Bug: 19575890 Change-Id: I9cf38c2d615a0a2b14255d18588a694d8870aae5
Diffstat (limited to 'compiler/optimizing/instruction_simplifier.cc')
-rw-r--r--compiler/optimizing/instruction_simplifier.cc17
1 files changed, 11 insertions, 6 deletions
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 82c1f6d3ff..a2e9f69933 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -2652,6 +2652,7 @@ static bool TryReplaceStringBuilderAppend(HInvoke* invoke) {
bool seen_to_string = false;
uint32_t format = 0u;
uint32_t num_args = 0u;
+ bool has_fp_args = false;
HInstruction* args[StringBuilderAppend::kMaxArgs]; // Added in reverse order.
for (HBackwardInstructionIterator iter(block->GetInstructions()); !iter.Done(); iter.Advance()) {
HInstruction* user = iter.Current();
@@ -2697,6 +2698,14 @@ static bool TryReplaceStringBuilderAppend(HInvoke* invoke) {
case Intrinsics::kStringBuilderAppendLong:
arg = StringBuilderAppend::Argument::kLong;
break;
+ case Intrinsics::kStringBuilderAppendFloat:
+ arg = StringBuilderAppend::Argument::kFloat;
+ has_fp_args = true;
+ break;
+ case Intrinsics::kStringBuilderAppendDouble:
+ arg = StringBuilderAppend::Argument::kDouble;
+ has_fp_args = true;
+ break;
case Intrinsics::kStringBuilderAppendCharSequence: {
ReferenceTypeInfo rti = user->AsInvokeVirtual()->InputAt(1)->GetReferenceTypeInfo();
if (!rti.IsValid()) {
@@ -2716,10 +2725,6 @@ static bool TryReplaceStringBuilderAppend(HInvoke* invoke) {
}
break;
}
- case Intrinsics::kStringBuilderAppendFloat:
- case Intrinsics::kStringBuilderAppendDouble:
- // TODO: Unimplemented, needs to call FloatingDecimal.getBinaryToASCIIConverter().
- return false;
default: {
return false;
}
@@ -2772,8 +2777,8 @@ static bool TryReplaceStringBuilderAppend(HInvoke* invoke) {
// Create replacement instruction.
HIntConstant* fmt = block->GetGraph()->GetIntConstant(static_cast<int32_t>(format));
ArenaAllocator* allocator = block->GetGraph()->GetAllocator();
- HStringBuilderAppend* append =
- new (allocator) HStringBuilderAppend(fmt, num_args, allocator, invoke->GetDexPc());
+ HStringBuilderAppend* append = new (allocator) HStringBuilderAppend(
+ fmt, num_args, has_fp_args, allocator, invoke->GetDexPc());
append->SetReferenceTypeInfo(invoke->GetReferenceTypeInfo());
for (size_t i = 0; i != num_args; ++i) {
append->SetArgumentAt(i, args[num_args - 1u - i]);