ART: Optimize StringBuilder append pattern.
Recognize appending with StringBuilder and replace the
entire expression with a runtime call that perfoms the
append in a more efficient manner.
For now, require the entire pattern to be in a single block
and be very strict about the StringBuilder environment uses.
Also, do not accept StringBuilder/char[]/Object/float/double
arguments as they throw non-OOME exceptions and/or require a
call from the entrypoint back to a helper function in Java;
these shall be implemented later.
Boot image size for aosp_taimen-userdebug:
- before:
arm/boot*.oat: 19653872
arm64/boot*.oat: 23292784
oat/arm64/services.odex: 22408664
- after:
arm/boot*.oat: 19432184 (-216KiB)
arm64/boot*.oat: 22992488 (-293KiB)
oat/arm64/services.odex: 22376776 (-31KiB)
Note that const-string in compiled boot image methods cannot
throw, but for apps it can and therefore its environment can
prevent the optimization for apps. We could implement either
a simple carve-out for const-string or generic environment
pruning to allow this pattern to be applied more often.
Results for the new StringBuilderAppendBenchmark on taimen:
timeAppendLongStrings: ~700ns -> ~200ns
timeAppendStringAndInt: ~220ns -> ~140ns
timeAppendStrings: ~200ns -> 130ns
Bug: 19575890
Test: 697-checker-string-append
Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Test: aosp_taimen-userdebug boots.
Test: run-gtests.sh
Test: testrunner.py --target --optimizing
Test: vogar --benchmark art/benchmark/stringbuilder-append/src/StringBuilderAppendBenchmark.java
Change-Id: I51789bf299f5219f68ada4c077b6a1d3fe083964
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index e829576..9cc5cad 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -25,6 +25,7 @@
#include "mirror/class-inl.h"
#include "scoped_thread_state_change-inl.h"
#include "sharpening.h"
+#include "string_builder_append.h"
namespace art {
@@ -2479,6 +2480,186 @@
return false;
}
+static bool TryReplaceStringBuilderAppend(HInvoke* invoke) {
+ DCHECK_EQ(invoke->GetIntrinsic(), Intrinsics::kStringBuilderToString);
+ if (invoke->CanThrowIntoCatchBlock()) {
+ return false;
+ }
+
+ HBasicBlock* block = invoke->GetBlock();
+ HInstruction* sb = invoke->InputAt(0);
+
+ // We support only a new StringBuilder, otherwise we cannot ensure that
+ // the StringBuilder data does not need to be populated for other users.
+ if (!sb->IsNewInstance()) {
+ return false;
+ }
+
+ // For now, we support only single-block recognition.
+ // (Ternary operators feeding the append could be implemented.)
+ for (const HUseListNode<HInstruction*>& use : sb->GetUses()) {
+ if (use.GetUser()->GetBlock() != block) {
+ return false;
+ }
+ }
+
+ // Collect args and check for unexpected uses.
+ // We expect one call to a constructor with no arguments, one constructor fence (unless
+ // eliminated), some number of append calls and one call to StringBuilder.toString().
+ bool seen_constructor = false;
+ bool seen_constructor_fence = false;
+ bool seen_to_string = false;
+ uint32_t format = 0u;
+ uint32_t num_args = 0u;
+ HInstruction* args[StringBuilderAppend::kMaxArgs]; // Added in reverse order.
+ for (const HUseListNode<HInstruction*>& use : sb->GetUses()) {
+ // The append pattern uses the StringBuilder only as the first argument.
+ if (use.GetIndex() != 0u) {
+ return false;
+ }
+ // We see the uses in reverse order because they are inserted at the front
+ // of the singly-linked list, so the StringBuilder.append() must come first.
+ HInstruction* user = use.GetUser();
+ if (!seen_to_string) {
+ if (user->IsInvokeVirtual() &&
+ user->AsInvokeVirtual()->GetIntrinsic() == Intrinsics::kStringBuilderToString) {
+ seen_to_string = true;
+ continue;
+ } else {
+ return false;
+ }
+ }
+ // Then we should see the arguments.
+ if (user->IsInvokeVirtual()) {
+ HInvokeVirtual* as_invoke_virtual = user->AsInvokeVirtual();
+ DCHECK(!seen_constructor);
+ DCHECK(!seen_constructor_fence);
+ StringBuilderAppend::Argument arg;
+ switch (as_invoke_virtual->GetIntrinsic()) {
+ case Intrinsics::kStringBuilderAppendObject:
+ // TODO: Unimplemented, needs to call String.valueOf().
+ return false;
+ case Intrinsics::kStringBuilderAppendString:
+ arg = StringBuilderAppend::Argument::kString;
+ break;
+ case Intrinsics::kStringBuilderAppendCharArray:
+ // TODO: Unimplemented, StringBuilder.append(char[]) can throw NPE and we would
+ // not have the correct stack trace for it.
+ return false;
+ case Intrinsics::kStringBuilderAppendBoolean:
+ arg = StringBuilderAppend::Argument::kBoolean;
+ break;
+ case Intrinsics::kStringBuilderAppendChar:
+ arg = StringBuilderAppend::Argument::kChar;
+ break;
+ case Intrinsics::kStringBuilderAppendInt:
+ arg = StringBuilderAppend::Argument::kInt;
+ break;
+ case Intrinsics::kStringBuilderAppendLong:
+ arg = StringBuilderAppend::Argument::kLong;
+ break;
+ case Intrinsics::kStringBuilderAppendCharSequence: {
+ ReferenceTypeInfo rti = user->AsInvokeVirtual()->InputAt(1)->GetReferenceTypeInfo();
+ if (!rti.IsValid()) {
+ return false;
+ }
+ ScopedObjectAccess soa(Thread::Current());
+ Handle<mirror::Class> input_type = rti.GetTypeHandle();
+ DCHECK(input_type != nullptr);
+ if (input_type.Get() == GetClassRoot<mirror::String>()) {
+ arg = StringBuilderAppend::Argument::kString;
+ } else {
+ // TODO: Check and implement for StringBuilder. We could find the StringBuilder's
+ // internal char[] inconsistent with the length, or the string compression
+ // of the result could be compromised with a concurrent modification, and
+ // we would need to throw appropriate exceptions.
+ return false;
+ }
+ break;
+ }
+ case Intrinsics::kStringBuilderAppendFloat:
+ case Intrinsics::kStringBuilderAppendDouble:
+ // TODO: Unimplemented, needs to call FloatingDecimal.getBinaryToASCIIConverter().
+ return false;
+ default: {
+ return false;
+ }
+ }
+ // Uses of the append return value should have been replaced with the first input.
+ DCHECK(!as_invoke_virtual->HasUses());
+ DCHECK(!as_invoke_virtual->HasEnvironmentUses());
+ if (num_args == StringBuilderAppend::kMaxArgs) {
+ return false;
+ }
+ format = (format << StringBuilderAppend::kBitsPerArg) | static_cast<uint32_t>(arg);
+ args[num_args] = as_invoke_virtual->InputAt(1u);
+ ++num_args;
+ } else if (user->IsInvokeStaticOrDirect() &&
+ user->AsInvokeStaticOrDirect()->GetResolvedMethod() != nullptr &&
+ user->AsInvokeStaticOrDirect()->GetResolvedMethod()->IsConstructor() &&
+ user->AsInvokeStaticOrDirect()->GetNumberOfArguments() == 1u) {
+ // After arguments, we should see the constructor.
+ // We accept only the constructor with no extra arguments.
+ DCHECK(!seen_constructor);
+ DCHECK(!seen_constructor_fence);
+ seen_constructor = true;
+ } else if (user->IsConstructorFence()) {
+ // The last use we see is the constructor fence.
+ DCHECK(seen_constructor);
+ DCHECK(!seen_constructor_fence);
+ seen_constructor_fence = true;
+ } else {
+ return false;
+ }
+ }
+
+ if (num_args == 0u) {
+ return false;
+ }
+
+ // Check environment uses.
+ for (const HUseListNode<HEnvironment*>& use : sb->GetEnvUses()) {
+ HInstruction* holder = use.GetUser()->GetHolder();
+ if (holder->GetBlock() != block) {
+ return false;
+ }
+ // Accept only calls on the StringBuilder (which shall all be removed).
+ // TODO: Carve-out for const-string? Or rely on environment pruning (to be implemented)?
+ if (holder->InputCount() == 0 || holder->InputAt(0) != sb) {
+ return false;
+ }
+ }
+
+ // Create replacement instruction.
+ HIntConstant* fmt = block->GetGraph()->GetIntConstant(static_cast<int32_t>(format));
+ ArenaAllocator* allocator = block->GetGraph()->GetAllocator();
+ HStringBuilderAppend* append =
+ new (allocator) HStringBuilderAppend(fmt, num_args, allocator, invoke->GetDexPc());
+ append->SetReferenceTypeInfo(invoke->GetReferenceTypeInfo());
+ for (size_t i = 0; i != num_args; ++i) {
+ append->SetArgumentAt(i, args[num_args - 1u - i]);
+ }
+ block->InsertInstructionBefore(append, invoke);
+ invoke->ReplaceWith(append);
+ // Copy environment, except for the StringBuilder uses.
+ for (size_t i = 0, size = invoke->GetEnvironment()->Size(); i != size; ++i) {
+ if (invoke->GetEnvironment()->GetInstructionAt(i) == sb) {
+ invoke->GetEnvironment()->RemoveAsUserOfInput(i);
+ invoke->GetEnvironment()->SetRawEnvAt(i, nullptr);
+ }
+ }
+ append->CopyEnvironmentFrom(invoke->GetEnvironment());
+ // Remove the old instruction.
+ block->RemoveInstruction(invoke);
+ // Remove the StringBuilder's uses and StringBuilder.
+ while (sb->HasNonEnvironmentUses()) {
+ block->RemoveInstruction(sb->GetUses().front().GetUser());
+ }
+ DCHECK(!sb->HasEnvironmentUses());
+ block->RemoveInstruction(sb);
+ return true;
+}
+
// Certain allocation intrinsics are not removed by dead code elimination
// because of potentially throwing an OOM exception or other side effects.
// This method removes such intrinsics when special circumstances allow.
@@ -2493,6 +2674,9 @@
invoke->GetBlock()->RemoveInstruction(invoke);
RecordSimplification();
}
+ } else if (invoke->GetIntrinsic() == Intrinsics::kStringBuilderToString &&
+ TryReplaceStringBuilderAppend(invoke)) {
+ RecordSimplification();
}
}