ART: Optimize StringBuilder append pattern.

Recognize appending with StringBuilder and replace the
entire expression with a runtime call that perfoms the
append in a more efficient manner.

For now, require the entire pattern to be in a single block
and be very strict about the StringBuilder environment uses.
Also, do not accept StringBuilder/char[]/Object/float/double
arguments as they throw non-OOME exceptions and/or require a
call from the entrypoint back to a helper function in Java;
these shall be implemented later.

Boot image size for aosp_taimen-userdebug:
 - before:
   arm/boot*.oat: 19653872
   arm64/boot*.oat: 23292784
   oat/arm64/services.odex: 22408664
 - after:
   arm/boot*.oat: 19432184 (-216KiB)
   arm64/boot*.oat: 22992488 (-293KiB)
   oat/arm64/services.odex: 22376776 (-31KiB)
Note that const-string in compiled boot image methods cannot
throw, but for apps it can and therefore its environment can
prevent the optimization for apps. We could implement either
a simple carve-out for const-string or generic environment
pruning to allow this pattern to be applied more often.

Results for the new StringBuilderAppendBenchmark on taimen:
  timeAppendLongStrings: ~700ns -> ~200ns
  timeAppendStringAndInt: ~220ns -> ~140ns
  timeAppendStrings: ~200ns -> 130ns

Bug: 19575890
Test: 697-checker-string-append
Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Test: aosp_taimen-userdebug boots.
Test: run-gtests.sh
Test: testrunner.py --target --optimizing
Test: vogar --benchmark art/benchmark/stringbuilder-append/src/StringBuilderAppendBenchmark.java
Change-Id: I51789bf299f5219f68ada4c077b6a1d3fe083964
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index e829576..9cc5cad 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -25,6 +25,7 @@
 #include "mirror/class-inl.h"
 #include "scoped_thread_state_change-inl.h"
 #include "sharpening.h"
+#include "string_builder_append.h"
 
 namespace art {
 
@@ -2479,6 +2480,186 @@
   return false;
 }
 
+static bool TryReplaceStringBuilderAppend(HInvoke* invoke) {
+  DCHECK_EQ(invoke->GetIntrinsic(), Intrinsics::kStringBuilderToString);
+  if (invoke->CanThrowIntoCatchBlock()) {
+    return false;
+  }
+
+  HBasicBlock* block = invoke->GetBlock();
+  HInstruction* sb = invoke->InputAt(0);
+
+  // We support only a new StringBuilder, otherwise we cannot ensure that
+  // the StringBuilder data does not need to be populated for other users.
+  if (!sb->IsNewInstance()) {
+    return false;
+  }
+
+  // For now, we support only single-block recognition.
+  // (Ternary operators feeding the append could be implemented.)
+  for (const HUseListNode<HInstruction*>& use : sb->GetUses()) {
+    if (use.GetUser()->GetBlock() != block) {
+      return false;
+    }
+  }
+
+  // Collect args and check for unexpected uses.
+  // We expect one call to a constructor with no arguments, one constructor fence (unless
+  // eliminated), some number of append calls and one call to StringBuilder.toString().
+  bool seen_constructor = false;
+  bool seen_constructor_fence = false;
+  bool seen_to_string = false;
+  uint32_t format = 0u;
+  uint32_t num_args = 0u;
+  HInstruction* args[StringBuilderAppend::kMaxArgs];  // Added in reverse order.
+  for (const HUseListNode<HInstruction*>& use : sb->GetUses()) {
+    // The append pattern uses the StringBuilder only as the first argument.
+    if (use.GetIndex() != 0u) {
+      return false;
+    }
+    // We see the uses in reverse order because they are inserted at the front
+    // of the singly-linked list, so the StringBuilder.append() must come first.
+    HInstruction* user = use.GetUser();
+    if (!seen_to_string) {
+      if (user->IsInvokeVirtual() &&
+          user->AsInvokeVirtual()->GetIntrinsic() == Intrinsics::kStringBuilderToString) {
+        seen_to_string = true;
+        continue;
+      } else {
+        return false;
+      }
+    }
+    // Then we should see the arguments.
+    if (user->IsInvokeVirtual()) {
+      HInvokeVirtual* as_invoke_virtual = user->AsInvokeVirtual();
+      DCHECK(!seen_constructor);
+      DCHECK(!seen_constructor_fence);
+      StringBuilderAppend::Argument arg;
+      switch (as_invoke_virtual->GetIntrinsic()) {
+        case Intrinsics::kStringBuilderAppendObject:
+          // TODO: Unimplemented, needs to call String.valueOf().
+          return false;
+        case Intrinsics::kStringBuilderAppendString:
+          arg = StringBuilderAppend::Argument::kString;
+          break;
+        case Intrinsics::kStringBuilderAppendCharArray:
+          // TODO: Unimplemented, StringBuilder.append(char[]) can throw NPE and we would
+          // not have the correct stack trace for it.
+          return false;
+        case Intrinsics::kStringBuilderAppendBoolean:
+          arg = StringBuilderAppend::Argument::kBoolean;
+          break;
+        case Intrinsics::kStringBuilderAppendChar:
+          arg = StringBuilderAppend::Argument::kChar;
+          break;
+        case Intrinsics::kStringBuilderAppendInt:
+          arg = StringBuilderAppend::Argument::kInt;
+          break;
+        case Intrinsics::kStringBuilderAppendLong:
+          arg = StringBuilderAppend::Argument::kLong;
+          break;
+        case Intrinsics::kStringBuilderAppendCharSequence: {
+          ReferenceTypeInfo rti = user->AsInvokeVirtual()->InputAt(1)->GetReferenceTypeInfo();
+          if (!rti.IsValid()) {
+            return false;
+          }
+          ScopedObjectAccess soa(Thread::Current());
+          Handle<mirror::Class> input_type = rti.GetTypeHandle();
+          DCHECK(input_type != nullptr);
+          if (input_type.Get() == GetClassRoot<mirror::String>()) {
+            arg = StringBuilderAppend::Argument::kString;
+          } else {
+            // TODO: Check and implement for StringBuilder. We could find the StringBuilder's
+            // internal char[] inconsistent with the length, or the string compression
+            // of the result could be compromised with a concurrent modification, and
+            // we would need to throw appropriate exceptions.
+            return false;
+          }
+          break;
+        }
+        case Intrinsics::kStringBuilderAppendFloat:
+        case Intrinsics::kStringBuilderAppendDouble:
+          // TODO: Unimplemented, needs to call FloatingDecimal.getBinaryToASCIIConverter().
+          return false;
+        default: {
+          return false;
+        }
+      }
+      // Uses of the append return value should have been replaced with the first input.
+      DCHECK(!as_invoke_virtual->HasUses());
+      DCHECK(!as_invoke_virtual->HasEnvironmentUses());
+      if (num_args == StringBuilderAppend::kMaxArgs) {
+        return false;
+      }
+      format = (format << StringBuilderAppend::kBitsPerArg) | static_cast<uint32_t>(arg);
+      args[num_args] = as_invoke_virtual->InputAt(1u);
+      ++num_args;
+    } else if (user->IsInvokeStaticOrDirect() &&
+               user->AsInvokeStaticOrDirect()->GetResolvedMethod() != nullptr &&
+               user->AsInvokeStaticOrDirect()->GetResolvedMethod()->IsConstructor() &&
+               user->AsInvokeStaticOrDirect()->GetNumberOfArguments() == 1u) {
+      // After arguments, we should see the constructor.
+      // We accept only the constructor with no extra arguments.
+      DCHECK(!seen_constructor);
+      DCHECK(!seen_constructor_fence);
+      seen_constructor = true;
+    } else if (user->IsConstructorFence()) {
+      // The last use we see is the constructor fence.
+      DCHECK(seen_constructor);
+      DCHECK(!seen_constructor_fence);
+      seen_constructor_fence = true;
+    } else {
+      return false;
+    }
+  }
+
+  if (num_args == 0u) {
+    return false;
+  }
+
+  // Check environment uses.
+  for (const HUseListNode<HEnvironment*>& use : sb->GetEnvUses()) {
+    HInstruction* holder = use.GetUser()->GetHolder();
+    if (holder->GetBlock() != block) {
+      return false;
+    }
+    // Accept only calls on the StringBuilder (which shall all be removed).
+    // TODO: Carve-out for const-string? Or rely on environment pruning (to be implemented)?
+    if (holder->InputCount() == 0 || holder->InputAt(0) != sb) {
+      return false;
+    }
+  }
+
+  // Create replacement instruction.
+  HIntConstant* fmt = block->GetGraph()->GetIntConstant(static_cast<int32_t>(format));
+  ArenaAllocator* allocator = block->GetGraph()->GetAllocator();
+  HStringBuilderAppend* append =
+      new (allocator) HStringBuilderAppend(fmt, num_args, allocator, invoke->GetDexPc());
+  append->SetReferenceTypeInfo(invoke->GetReferenceTypeInfo());
+  for (size_t i = 0; i != num_args; ++i) {
+    append->SetArgumentAt(i, args[num_args - 1u - i]);
+  }
+  block->InsertInstructionBefore(append, invoke);
+  invoke->ReplaceWith(append);
+  // Copy environment, except for the StringBuilder uses.
+  for (size_t i = 0, size = invoke->GetEnvironment()->Size(); i != size; ++i) {
+    if (invoke->GetEnvironment()->GetInstructionAt(i) == sb) {
+      invoke->GetEnvironment()->RemoveAsUserOfInput(i);
+      invoke->GetEnvironment()->SetRawEnvAt(i, nullptr);
+    }
+  }
+  append->CopyEnvironmentFrom(invoke->GetEnvironment());
+  // Remove the old instruction.
+  block->RemoveInstruction(invoke);
+  // Remove the StringBuilder's uses and StringBuilder.
+  while (sb->HasNonEnvironmentUses()) {
+    block->RemoveInstruction(sb->GetUses().front().GetUser());
+  }
+  DCHECK(!sb->HasEnvironmentUses());
+  block->RemoveInstruction(sb);
+  return true;
+}
+
 // Certain allocation intrinsics are not removed by dead code elimination
 // because of potentially throwing an OOM exception or other side effects.
 // This method removes such intrinsics when special circumstances allow.
@@ -2493,6 +2674,9 @@
       invoke->GetBlock()->RemoveInstruction(invoke);
       RecordSimplification();
     }
+  } else if (invoke->GetIntrinsic() == Intrinsics::kStringBuilderToString &&
+             TryReplaceStringBuilderAppend(invoke)) {
+    RecordSimplification();
   }
 }