ART: Optimize StringBuilder append pattern.
Recognize appending with StringBuilder and replace the
entire expression with a runtime call that perfoms the
append in a more efficient manner.
For now, require the entire pattern to be in a single block
and be very strict about the StringBuilder environment uses.
Also, do not accept StringBuilder/char[]/Object/float/double
arguments as they throw non-OOME exceptions and/or require a
call from the entrypoint back to a helper function in Java;
these shall be implemented later.
Boot image size for aosp_taimen-userdebug:
- before:
arm/boot*.oat: 19653872
arm64/boot*.oat: 23292784
oat/arm64/services.odex: 22408664
- after:
arm/boot*.oat: 19432184 (-216KiB)
arm64/boot*.oat: 22992488 (-293KiB)
oat/arm64/services.odex: 22376776 (-31KiB)
Note that const-string in compiled boot image methods cannot
throw, but for apps it can and therefore its environment can
prevent the optimization for apps. We could implement either
a simple carve-out for const-string or generic environment
pruning to allow this pattern to be applied more often.
Results for the new StringBuilderAppendBenchmark on taimen:
timeAppendLongStrings: ~700ns -> ~200ns
timeAppendStringAndInt: ~220ns -> ~140ns
timeAppendStrings: ~200ns -> 130ns
Bug: 19575890
Test: 697-checker-string-append
Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Test: aosp_taimen-userdebug boots.
Test: run-gtests.sh
Test: testrunner.py --target --optimizing
Test: vogar --benchmark art/benchmark/stringbuilder-append/src/StringBuilderAppendBenchmark.java
Change-Id: I51789bf299f5219f68ada4c077b6a1d3fe083964
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 2bbb570..3b5699b 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -64,6 +64,7 @@
#include "ssa_liveness_analysis.h"
#include "stack_map.h"
#include "stack_map_stream.h"
+#include "string_builder_append.h"
#include "thread-current-inl.h"
#include "utils/assembler.h"
@@ -599,6 +600,57 @@
InvokeRuntime(entrypoint, invoke, invoke->GetDexPc(), nullptr);
}
+void CodeGenerator::CreateStringBuilderAppendLocations(HStringBuilderAppend* instruction,
+ Location out) {
+ ArenaAllocator* allocator = GetGraph()->GetAllocator();
+ LocationSummary* locations =
+ new (allocator) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
+ locations->SetOut(out);
+ instruction->GetLocations()->SetInAt(instruction->FormatIndex(),
+ Location::ConstantLocation(instruction->GetFormat()));
+
+ uint32_t format = static_cast<uint32_t>(instruction->GetFormat()->GetValue());
+ uint32_t f = format;
+ PointerSize pointer_size = InstructionSetPointerSize(GetInstructionSet());
+ size_t stack_offset = static_cast<size_t>(pointer_size); // Start after the ArtMethod*.
+ for (size_t i = 0, num_args = instruction->GetNumberOfArguments(); i != num_args; ++i) {
+ StringBuilderAppend::Argument arg_type =
+ static_cast<StringBuilderAppend::Argument>(f & StringBuilderAppend::kArgMask);
+ switch (arg_type) {
+ case StringBuilderAppend::Argument::kStringBuilder:
+ case StringBuilderAppend::Argument::kString:
+ case StringBuilderAppend::Argument::kCharArray:
+ static_assert(sizeof(StackReference<mirror::Object>) == sizeof(uint32_t), "Size check.");
+ FALLTHROUGH_INTENDED;
+ case StringBuilderAppend::Argument::kBoolean:
+ case StringBuilderAppend::Argument::kChar:
+ case StringBuilderAppend::Argument::kInt:
+ case StringBuilderAppend::Argument::kFloat:
+ locations->SetInAt(i, Location::StackSlot(stack_offset));
+ break;
+ case StringBuilderAppend::Argument::kLong:
+ case StringBuilderAppend::Argument::kDouble:
+ stack_offset = RoundUp(stack_offset, sizeof(uint64_t));
+ locations->SetInAt(i, Location::DoubleStackSlot(stack_offset));
+ // Skip the low word, let the common code skip the high word.
+ stack_offset += sizeof(uint32_t);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected arg format: 0x" << std::hex
+ << (f & StringBuilderAppend::kArgMask) << " full format: 0x" << format;
+ UNREACHABLE();
+ }
+ f >>= StringBuilderAppend::kBitsPerArg;
+ stack_offset += sizeof(uint32_t);
+ }
+ DCHECK_EQ(f, 0u);
+
+ size_t param_size = stack_offset - static_cast<size_t>(pointer_size);
+ DCHECK_ALIGNED(param_size, kVRegSize);
+ size_t num_vregs = param_size / kVRegSize;
+ graph_->UpdateMaximumNumberOfOutVRegs(num_vregs);
+}
+
void CodeGenerator::CreateUnresolvedFieldLocationSummary(
HInstruction* field_access,
DataType::Type field_type,
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index f70ecb6..357c4bb 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -546,6 +546,8 @@
void GenerateInvokeCustomCall(HInvokeCustom* invoke);
+ void CreateStringBuilderAppendLocations(HStringBuilderAppend* instruction, Location out);
+
void CreateUnresolvedFieldLocationSummary(
HInstruction* field_access,
DataType::Type field_type,
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 3086882..28a7583 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -5411,6 +5411,15 @@
HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
}
+void LocationsBuilderARM64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
+ codegen_->CreateStringBuilderAppendLocations(instruction, LocationFrom(x0));
+}
+
+void InstructionCodeGeneratorARM64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
+ __ Mov(w0, instruction->GetFormat()->GetValue());
+ codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc());
+}
+
void LocationsBuilderARM64::VisitUnresolvedInstanceFieldGet(
HUnresolvedInstanceFieldGet* instruction) {
FieldAccessCallingConventionARM64 calling_convention;
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 6469c69..7adfe5d 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -5722,6 +5722,15 @@
HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
}
+void LocationsBuilderARMVIXL::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
+ codegen_->CreateStringBuilderAppendLocations(instruction, LocationFrom(r0));
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
+ __ Mov(r0, instruction->GetFormat()->GetValue());
+ codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc());
+}
+
void LocationsBuilderARMVIXL::VisitUnresolvedInstanceFieldGet(
HUnresolvedInstanceFieldGet* instruction) {
FieldAccessCallingConventionARMVIXL calling_convention;
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 72334af..b3bce78 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -9529,6 +9529,15 @@
instruction->GetValueCanBeNull());
}
+void LocationsBuilderMIPS::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
+ codegen_->CreateStringBuilderAppendLocations(instruction, Location::RegisterLocation(V0));
+}
+
+void InstructionCodeGeneratorMIPS::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
+ __ LoadConst32(A0, instruction->GetFormat()->GetValue());
+ codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc());
+}
+
void LocationsBuilderMIPS::VisitUnresolvedInstanceFieldGet(
HUnresolvedInstanceFieldGet* instruction) {
FieldAccessCallingConventionMIPS calling_convention;
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 0d3cb3b..0a3a3fa 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -7148,6 +7148,15 @@
HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
}
+void LocationsBuilderMIPS64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
+ codegen_->CreateStringBuilderAppendLocations(instruction, Location::RegisterLocation(V0));
+}
+
+void InstructionCodeGeneratorMIPS64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
+ __ LoadConst32(A0, instruction->GetFormat()->GetValue());
+ codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc());
+}
+
void LocationsBuilderMIPS64::VisitUnresolvedInstanceFieldGet(
HUnresolvedInstanceFieldGet* instruction) {
FieldAccessCallingConventionMIPS64 calling_convention;
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index ca1723b..7a73569 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -5510,6 +5510,15 @@
HandleFieldGet(instruction, instruction->GetFieldInfo());
}
+void LocationsBuilderX86::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
+ codegen_->CreateStringBuilderAppendLocations(instruction, Location::RegisterLocation(EAX));
+}
+
+void InstructionCodeGeneratorX86::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
+ __ movl(EAX, Immediate(instruction->GetFormat()->GetValue()));
+ codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc());
+}
+
void LocationsBuilderX86::VisitUnresolvedInstanceFieldGet(
HUnresolvedInstanceFieldGet* instruction) {
FieldAccessCallingConventionX86 calling_convention;
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 7c293b8..ee2ef77 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -4882,6 +4882,15 @@
HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
}
+void LocationsBuilderX86_64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
+ codegen_->CreateStringBuilderAppendLocations(instruction, Location::RegisterLocation(RAX));
+}
+
+void InstructionCodeGeneratorX86_64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
+ __ movl(CpuRegister(RDI), Immediate(instruction->GetFormat()->GetValue()));
+ codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc());
+}
+
void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldGet(
HUnresolvedInstanceFieldGet* instruction) {
FieldAccessCallingConventionX86_64 calling_convention;
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index e829576..9cc5cad 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -25,6 +25,7 @@
#include "mirror/class-inl.h"
#include "scoped_thread_state_change-inl.h"
#include "sharpening.h"
+#include "string_builder_append.h"
namespace art {
@@ -2479,6 +2480,186 @@
return false;
}
+static bool TryReplaceStringBuilderAppend(HInvoke* invoke) {
+ DCHECK_EQ(invoke->GetIntrinsic(), Intrinsics::kStringBuilderToString);
+ if (invoke->CanThrowIntoCatchBlock()) {
+ return false;
+ }
+
+ HBasicBlock* block = invoke->GetBlock();
+ HInstruction* sb = invoke->InputAt(0);
+
+ // We support only a new StringBuilder, otherwise we cannot ensure that
+ // the StringBuilder data does not need to be populated for other users.
+ if (!sb->IsNewInstance()) {
+ return false;
+ }
+
+ // For now, we support only single-block recognition.
+ // (Ternary operators feeding the append could be implemented.)
+ for (const HUseListNode<HInstruction*>& use : sb->GetUses()) {
+ if (use.GetUser()->GetBlock() != block) {
+ return false;
+ }
+ }
+
+ // Collect args and check for unexpected uses.
+ // We expect one call to a constructor with no arguments, one constructor fence (unless
+ // eliminated), some number of append calls and one call to StringBuilder.toString().
+ bool seen_constructor = false;
+ bool seen_constructor_fence = false;
+ bool seen_to_string = false;
+ uint32_t format = 0u;
+ uint32_t num_args = 0u;
+ HInstruction* args[StringBuilderAppend::kMaxArgs]; // Added in reverse order.
+ for (const HUseListNode<HInstruction*>& use : sb->GetUses()) {
+ // The append pattern uses the StringBuilder only as the first argument.
+ if (use.GetIndex() != 0u) {
+ return false;
+ }
+ // We see the uses in reverse order because they are inserted at the front
+ // of the singly-linked list, so the StringBuilder.append() must come first.
+ HInstruction* user = use.GetUser();
+ if (!seen_to_string) {
+ if (user->IsInvokeVirtual() &&
+ user->AsInvokeVirtual()->GetIntrinsic() == Intrinsics::kStringBuilderToString) {
+ seen_to_string = true;
+ continue;
+ } else {
+ return false;
+ }
+ }
+ // Then we should see the arguments.
+ if (user->IsInvokeVirtual()) {
+ HInvokeVirtual* as_invoke_virtual = user->AsInvokeVirtual();
+ DCHECK(!seen_constructor);
+ DCHECK(!seen_constructor_fence);
+ StringBuilderAppend::Argument arg;
+ switch (as_invoke_virtual->GetIntrinsic()) {
+ case Intrinsics::kStringBuilderAppendObject:
+ // TODO: Unimplemented, needs to call String.valueOf().
+ return false;
+ case Intrinsics::kStringBuilderAppendString:
+ arg = StringBuilderAppend::Argument::kString;
+ break;
+ case Intrinsics::kStringBuilderAppendCharArray:
+ // TODO: Unimplemented, StringBuilder.append(char[]) can throw NPE and we would
+ // not have the correct stack trace for it.
+ return false;
+ case Intrinsics::kStringBuilderAppendBoolean:
+ arg = StringBuilderAppend::Argument::kBoolean;
+ break;
+ case Intrinsics::kStringBuilderAppendChar:
+ arg = StringBuilderAppend::Argument::kChar;
+ break;
+ case Intrinsics::kStringBuilderAppendInt:
+ arg = StringBuilderAppend::Argument::kInt;
+ break;
+ case Intrinsics::kStringBuilderAppendLong:
+ arg = StringBuilderAppend::Argument::kLong;
+ break;
+ case Intrinsics::kStringBuilderAppendCharSequence: {
+ ReferenceTypeInfo rti = user->AsInvokeVirtual()->InputAt(1)->GetReferenceTypeInfo();
+ if (!rti.IsValid()) {
+ return false;
+ }
+ ScopedObjectAccess soa(Thread::Current());
+ Handle<mirror::Class> input_type = rti.GetTypeHandle();
+ DCHECK(input_type != nullptr);
+ if (input_type.Get() == GetClassRoot<mirror::String>()) {
+ arg = StringBuilderAppend::Argument::kString;
+ } else {
+ // TODO: Check and implement for StringBuilder. We could find the StringBuilder's
+ // internal char[] inconsistent with the length, or the string compression
+ // of the result could be compromised with a concurrent modification, and
+ // we would need to throw appropriate exceptions.
+ return false;
+ }
+ break;
+ }
+ case Intrinsics::kStringBuilderAppendFloat:
+ case Intrinsics::kStringBuilderAppendDouble:
+ // TODO: Unimplemented, needs to call FloatingDecimal.getBinaryToASCIIConverter().
+ return false;
+ default: {
+ return false;
+ }
+ }
+ // Uses of the append return value should have been replaced with the first input.
+ DCHECK(!as_invoke_virtual->HasUses());
+ DCHECK(!as_invoke_virtual->HasEnvironmentUses());
+ if (num_args == StringBuilderAppend::kMaxArgs) {
+ return false;
+ }
+ format = (format << StringBuilderAppend::kBitsPerArg) | static_cast<uint32_t>(arg);
+ args[num_args] = as_invoke_virtual->InputAt(1u);
+ ++num_args;
+ } else if (user->IsInvokeStaticOrDirect() &&
+ user->AsInvokeStaticOrDirect()->GetResolvedMethod() != nullptr &&
+ user->AsInvokeStaticOrDirect()->GetResolvedMethod()->IsConstructor() &&
+ user->AsInvokeStaticOrDirect()->GetNumberOfArguments() == 1u) {
+ // After arguments, we should see the constructor.
+ // We accept only the constructor with no extra arguments.
+ DCHECK(!seen_constructor);
+ DCHECK(!seen_constructor_fence);
+ seen_constructor = true;
+ } else if (user->IsConstructorFence()) {
+ // The last use we see is the constructor fence.
+ DCHECK(seen_constructor);
+ DCHECK(!seen_constructor_fence);
+ seen_constructor_fence = true;
+ } else {
+ return false;
+ }
+ }
+
+ if (num_args == 0u) {
+ return false;
+ }
+
+ // Check environment uses.
+ for (const HUseListNode<HEnvironment*>& use : sb->GetEnvUses()) {
+ HInstruction* holder = use.GetUser()->GetHolder();
+ if (holder->GetBlock() != block) {
+ return false;
+ }
+ // Accept only calls on the StringBuilder (which shall all be removed).
+ // TODO: Carve-out for const-string? Or rely on environment pruning (to be implemented)?
+ if (holder->InputCount() == 0 || holder->InputAt(0) != sb) {
+ return false;
+ }
+ }
+
+ // Create replacement instruction.
+ HIntConstant* fmt = block->GetGraph()->GetIntConstant(static_cast<int32_t>(format));
+ ArenaAllocator* allocator = block->GetGraph()->GetAllocator();
+ HStringBuilderAppend* append =
+ new (allocator) HStringBuilderAppend(fmt, num_args, allocator, invoke->GetDexPc());
+ append->SetReferenceTypeInfo(invoke->GetReferenceTypeInfo());
+ for (size_t i = 0; i != num_args; ++i) {
+ append->SetArgumentAt(i, args[num_args - 1u - i]);
+ }
+ block->InsertInstructionBefore(append, invoke);
+ invoke->ReplaceWith(append);
+ // Copy environment, except for the StringBuilder uses.
+ for (size_t i = 0, size = invoke->GetEnvironment()->Size(); i != size; ++i) {
+ if (invoke->GetEnvironment()->GetInstructionAt(i) == sb) {
+ invoke->GetEnvironment()->RemoveAsUserOfInput(i);
+ invoke->GetEnvironment()->SetRawEnvAt(i, nullptr);
+ }
+ }
+ append->CopyEnvironmentFrom(invoke->GetEnvironment());
+ // Remove the old instruction.
+ block->RemoveInstruction(invoke);
+ // Remove the StringBuilder's uses and StringBuilder.
+ while (sb->HasNonEnvironmentUses()) {
+ block->RemoveInstruction(sb->GetUses().front().GetUser());
+ }
+ DCHECK(!sb->HasEnvironmentUses());
+ block->RemoveInstruction(sb);
+ return true;
+}
+
// Certain allocation intrinsics are not removed by dead code elimination
// because of potentially throwing an OOM exception or other side effects.
// This method removes such intrinsics when special circumstances allow.
@@ -2493,6 +2674,9 @@
invoke->GetBlock()->RemoveInstruction(invoke);
RecordSimplification();
}
+ } else if (invoke->GetIntrinsic() == Intrinsics::kStringBuilderToString &&
+ TryReplaceStringBuilderAppend(invoke)) {
+ RecordSimplification();
}
}
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 8a8b371..cb53ae3 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1438,6 +1438,7 @@
M(Shr, BinaryOperation) \
M(StaticFieldGet, Instruction) \
M(StaticFieldSet, Instruction) \
+ M(StringBuilderAppend, Instruction) \
M(UnresolvedInstanceFieldGet, Instruction) \
M(UnresolvedInstanceFieldSet, Instruction) \
M(UnresolvedStaticFieldGet, Instruction) \
@@ -6889,6 +6890,55 @@
const FieldInfo field_info_;
};
+class HStringBuilderAppend final : public HVariableInputSizeInstruction {
+ public:
+ HStringBuilderAppend(HIntConstant* format,
+ uint32_t number_of_arguments,
+ ArenaAllocator* allocator,
+ uint32_t dex_pc)
+ : HVariableInputSizeInstruction(
+ kStringBuilderAppend,
+ DataType::Type::kReference,
+ // The runtime call may read memory from inputs. It never writes outside
+ // of the newly allocated result object (or newly allocated helper objects).
+ SideEffects::AllReads().Union(SideEffects::CanTriggerGC()),
+ dex_pc,
+ allocator,
+ number_of_arguments + /* format */ 1u,
+ kArenaAllocInvokeInputs) {
+ DCHECK_GE(number_of_arguments, 1u); // There must be something to append.
+ SetRawInputAt(FormatIndex(), format);
+ }
+
+ void SetArgumentAt(size_t index, HInstruction* argument) {
+ DCHECK_LE(index, GetNumberOfArguments());
+ SetRawInputAt(index, argument);
+ }
+
+ // Return the number of arguments, excluding the format.
+ size_t GetNumberOfArguments() const {
+ DCHECK_GE(InputCount(), 1u);
+ return InputCount() - 1u;
+ }
+
+ size_t FormatIndex() const {
+ return GetNumberOfArguments();
+ }
+
+ HIntConstant* GetFormat() {
+ return InputAt(FormatIndex())->AsIntConstant();
+ }
+
+ bool NeedsEnvironment() const override { return true; }
+
+ bool CanThrow() const override { return true; }
+
+ DECLARE_INSTRUCTION(StringBuilderAppend);
+
+ protected:
+ DEFAULT_COPY_CONSTRUCTOR(StringBuilderAppend);
+};
+
class HUnresolvedInstanceFieldGet final : public HExpression<1> {
public:
HUnresolvedInstanceFieldGet(HInstruction* obj,