Invoke typed arraycopy for primitive arrays.
Apps will always call the Object version of arraycopy. When
we can infer the types of the passed arrays, replace the method
being called to be the typed System.arraycopy one.
10% improvement on ExoPlayerBench.
Test: 641-checker-arraycopy
bug: 7103825
Change-Id: I872d7a6e163a4614510ef04ae582eb90ec48b5fa
diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc
index 5d58207..cb6e14b 100644
--- a/compiler/optimizing/bounds_check_elimination_test.cc
+++ b/compiler/optimizing/bounds_check_elimination_test.cc
@@ -43,7 +43,7 @@
void RunBCE() {
graph_->BuildDominatorTree();
- InstructionSimplifier(graph_).Run();
+ InstructionSimplifier(graph_, /* codegen */ nullptr).Run();
SideEffectsAnalysis side_effects(graph_);
side_effects.Run();
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 3e34090..e3926c5 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -549,7 +549,7 @@
is_referrer,
invoke_instruction->GetDexPc(),
/* needs_access_check */ false);
- HLoadClass::LoadKind kind = HSharpening::SharpenClass(
+ HLoadClass::LoadKind kind = HSharpening::ComputeLoadClassKind(
load_class, codegen_, compiler_driver_, caller_compilation_unit_);
DCHECK(kind != HLoadClass::LoadKind::kInvalid)
<< "We should always be able to reference a class for inline caches";
@@ -1491,7 +1491,7 @@
HDeadCodeElimination dce(callee_graph, inline_stats_, "dead_code_elimination$inliner");
HConstantFolding fold(callee_graph, "constant_folding$inliner");
HSharpening sharpening(callee_graph, codegen_, dex_compilation_unit, compiler_driver_, handles_);
- InstructionSimplifier simplify(callee_graph, inline_stats_);
+ InstructionSimplifier simplify(callee_graph, codegen_, inline_stats_);
IntrinsicsRecognizer intrinsics(callee_graph, inline_stats_);
HOptimization* optimizations[] = {
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index c60f6e5..fe11433 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -1676,10 +1676,10 @@
dex_pc,
needs_access_check);
- HLoadClass::LoadKind load_kind = HSharpening::SharpenClass(load_class,
- code_generator_,
- compiler_driver_,
- *dex_compilation_unit_);
+ HLoadClass::LoadKind load_kind = HSharpening::ComputeLoadClassKind(load_class,
+ code_generator_,
+ compiler_driver_,
+ *dex_compilation_unit_);
if (load_kind == HLoadClass::LoadKind::kInvalid) {
// We actually cannot reference this class, we're forced to bail.
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 35f59cb..17421fc 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -19,14 +19,18 @@
#include "escape.h"
#include "intrinsics.h"
#include "mirror/class-inl.h"
+#include "sharpening.h"
#include "scoped_thread_state_change-inl.h"
namespace art {
class InstructionSimplifierVisitor : public HGraphDelegateVisitor {
public:
- InstructionSimplifierVisitor(HGraph* graph, OptimizingCompilerStats* stats)
+ InstructionSimplifierVisitor(HGraph* graph,
+ CodeGenerator* codegen,
+ OptimizingCompilerStats* stats)
: HGraphDelegateVisitor(graph),
+ codegen_(codegen),
stats_(stats) {}
void Run();
@@ -112,6 +116,7 @@
void SimplifyAllocationIntrinsic(HInvoke* invoke);
void SimplifyMemBarrier(HInvoke* invoke, MemBarrierKind barrier_kind);
+ CodeGenerator* codegen_;
OptimizingCompilerStats* stats_;
bool simplification_occurred_ = false;
int simplifications_at_current_position_ = 0;
@@ -123,7 +128,7 @@
};
void InstructionSimplifier::Run() {
- InstructionSimplifierVisitor visitor(graph_, stats_);
+ InstructionSimplifierVisitor visitor(graph_, codegen_, stats_);
visitor.Run();
}
@@ -1805,6 +1810,8 @@
{
ScopedObjectAccess soa(Thread::Current());
+ Primitive::Type source_component_type = Primitive::kPrimVoid;
+ Primitive::Type destination_component_type = Primitive::kPrimVoid;
ReferenceTypeInfo destination_rti = destination->GetReferenceTypeInfo();
if (destination_rti.IsValid()) {
if (destination_rti.IsObjectArray()) {
@@ -1814,6 +1821,8 @@
optimizations.SetDestinationIsTypedObjectArray();
}
if (destination_rti.IsPrimitiveArrayClass()) {
+ destination_component_type =
+ destination_rti.GetTypeHandle()->GetComponentType()->GetPrimitiveType();
optimizations.SetDestinationIsPrimitiveArray();
} else if (destination_rti.IsNonPrimitiveArrayClass()) {
optimizations.SetDestinationIsNonPrimitiveArray();
@@ -1826,10 +1835,55 @@
}
if (source_rti.IsPrimitiveArrayClass()) {
optimizations.SetSourceIsPrimitiveArray();
+ source_component_type = source_rti.GetTypeHandle()->GetComponentType()->GetPrimitiveType();
} else if (source_rti.IsNonPrimitiveArrayClass()) {
optimizations.SetSourceIsNonPrimitiveArray();
}
}
+ // For primitive arrays, use their optimized ArtMethod implementations.
+ if ((source_component_type != Primitive::kPrimVoid) &&
+ (source_component_type == destination_component_type)) {
+ ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+ PointerSize image_size = class_linker->GetImagePointerSize();
+ HInvokeStaticOrDirect* invoke = instruction->AsInvokeStaticOrDirect();
+ mirror::Class* system = invoke->GetResolvedMethod()->GetDeclaringClass();
+ ArtMethod* method = nullptr;
+ switch (source_component_type) {
+ case Primitive::kPrimBoolean:
+ method = system->FindDeclaredDirectMethod("arraycopy", "([ZI[ZII)V", image_size);
+ break;
+ case Primitive::kPrimByte:
+ method = system->FindDeclaredDirectMethod("arraycopy", "([BI[BII)V", image_size);
+ break;
+ case Primitive::kPrimChar:
+ method = system->FindDeclaredDirectMethod("arraycopy", "([CI[CII)V", image_size);
+ break;
+ case Primitive::kPrimShort:
+ method = system->FindDeclaredDirectMethod("arraycopy", "([SI[SII)V", image_size);
+ break;
+ case Primitive::kPrimInt:
+ method = system->FindDeclaredDirectMethod("arraycopy", "([II[III)V", image_size);
+ break;
+ case Primitive::kPrimFloat:
+ method = system->FindDeclaredDirectMethod("arraycopy", "([FI[FII)V", image_size);
+ break;
+ case Primitive::kPrimLong:
+ method = system->FindDeclaredDirectMethod("arraycopy", "([JI[JII)V", image_size);
+ break;
+ case Primitive::kPrimDouble:
+ method = system->FindDeclaredDirectMethod("arraycopy", "([DI[DII)V", image_size);
+ break;
+ default:
+ LOG(FATAL) << "Unreachable";
+ }
+ DCHECK(method != nullptr);
+ invoke->SetResolvedMethod(method);
+ // Sharpen the new invoke. Note that we do not update the dex method index of
+ // the invoke, as we would need to look it up in the current dex file, and it
+ // is unlikely that it exists. The most usual situation for such typed
+ // arraycopy methods is a direct pointer to the boot image.
+ HSharpening::SharpenInvokeStaticOrDirect(invoke, codegen_);
+ }
}
}
diff --git a/compiler/optimizing/instruction_simplifier.h b/compiler/optimizing/instruction_simplifier.h
index 7fe1067..f7329a4 100644
--- a/compiler/optimizing/instruction_simplifier.h
+++ b/compiler/optimizing/instruction_simplifier.h
@@ -23,6 +23,8 @@
namespace art {
+class CodeGenerator;
+
/**
* Implements optimizations specific to each instruction.
*
@@ -36,15 +38,19 @@
class InstructionSimplifier : public HOptimization {
public:
explicit InstructionSimplifier(HGraph* graph,
+ CodeGenerator* codegen,
OptimizingCompilerStats* stats = nullptr,
const char* name = kInstructionSimplifierPassName)
- : HOptimization(graph, name, stats) {}
+ : HOptimization(graph, name, stats),
+ codegen_(codegen) {}
static constexpr const char* kInstructionSimplifierPassName = "instruction_simplifier";
void Run() OVERRIDE;
private:
+ CodeGenerator* codegen_;
+
DISALLOW_COPY_AND_ASSIGN(InstructionSimplifier);
};
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index c39aed2..69ef9e1 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -3915,6 +3915,7 @@
bool IsIntrinsic() const { return intrinsic_ != Intrinsics::kNone; }
ArtMethod* GetResolvedMethod() const { return resolved_method_; }
+ void SetResolvedMethod(ArtMethod* method) { resolved_method_ = method; }
DECLARE_ABSTRACT_INSTRUCTION(Invoke);
@@ -3957,7 +3958,7 @@
}
uint32_t number_of_arguments_;
- ArtMethod* const resolved_method_;
+ ArtMethod* resolved_method_;
const uint32_t dex_method_index_;
Intrinsics intrinsic_;
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 3842ef9..4fe846f 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -507,7 +507,7 @@
} else if (opt_name == HInductionVarAnalysis::kInductionPassName) {
return new (arena) HInductionVarAnalysis(graph);
} else if (opt_name == InstructionSimplifier::kInstructionSimplifierPassName) {
- return new (arena) InstructionSimplifier(graph, stats, pass_name.c_str());
+ return new (arena) InstructionSimplifier(graph, codegen, stats, pass_name.c_str());
} else if (opt_name == IntrinsicsRecognizer::kIntrinsicsRecognizerPassName) {
return new (arena) IntrinsicsRecognizer(graph, stats);
} else if (opt_name == LICM::kLoopInvariantCodeMotionPassName) {
@@ -768,7 +768,7 @@
HDeadCodeElimination* dce3 = new (arena) HDeadCodeElimination(
graph, stats, "dead_code_elimination$final");
HConstantFolding* fold1 = new (arena) HConstantFolding(graph, "constant_folding");
- InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(graph, stats);
+ InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(graph, codegen, stats);
HSelectGenerator* select_generator = new (arena) HSelectGenerator(graph, stats);
HConstantFolding* fold2 = new (arena) HConstantFolding(
graph, "constant_folding$after_inlining");
@@ -783,11 +783,11 @@
HSharpening* sharpening = new (arena) HSharpening(
graph, codegen, dex_compilation_unit, driver, handles);
InstructionSimplifier* simplify2 = new (arena) InstructionSimplifier(
- graph, stats, "instruction_simplifier$after_inlining");
+ graph, codegen, stats, "instruction_simplifier$after_inlining");
InstructionSimplifier* simplify3 = new (arena) InstructionSimplifier(
- graph, stats, "instruction_simplifier$after_bce");
+ graph, codegen, stats, "instruction_simplifier$after_bce");
InstructionSimplifier* simplify4 = new (arena) InstructionSimplifier(
- graph, stats, "instruction_simplifier$before_codegen");
+ graph, codegen, stats, "instruction_simplifier$before_codegen");
IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, stats);
CHAGuardOptimization* cha_guard = new (arena) CHAGuardOptimization(graph);
CodeSinking* code_sinking = new (arena) CodeSinking(graph, stats);
diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc
index be40092..8f1827b 100644
--- a/compiler/optimizing/sharpening.cc
+++ b/compiler/optimizing/sharpening.cc
@@ -41,7 +41,7 @@
for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
HInstruction* instruction = it.Current();
if (instruction->IsInvokeStaticOrDirect()) {
- ProcessInvokeStaticOrDirect(instruction->AsInvokeStaticOrDirect());
+ SharpenInvokeStaticOrDirect(instruction->AsInvokeStaticOrDirect(), codegen_);
} else if (instruction->IsLoadString()) {
ProcessLoadString(instruction->AsLoadString());
}
@@ -70,7 +70,9 @@
return IsInBootImage(method) && !options.GetCompilePic() && !options.GetIncludePatchInformation();
}
-void HSharpening::ProcessInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+
+void HSharpening::SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke,
+ CodeGenerator* codegen) {
if (invoke->IsStringInit()) {
// Not using the dex cache arrays. But we could still try to use a better dispatch...
// TODO: Use direct_method and direct_code for the appropriate StringFactory method.
@@ -97,12 +99,12 @@
// We don't optimize for debuggable as it would prevent us from obsoleting the method in some
// situations.
- if (callee == codegen_->GetGraph()->GetArtMethod() && !codegen_->GetGraph()->IsDebuggable()) {
+ if (callee == codegen->GetGraph()->GetArtMethod() && !codegen->GetGraph()->IsDebuggable()) {
// Recursive call.
method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kRecursive;
code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallSelf;
} else if (Runtime::Current()->UseJitCompilation() ||
- AOTCanEmbedMethod(callee, codegen_->GetCompilerOptions())) {
+ AOTCanEmbedMethod(callee, codegen->GetCompilerOptions())) {
// JIT or on-device AOT compilation referencing a boot image method.
// Use the method address directly.
method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress;
@@ -111,13 +113,17 @@
} else {
// Use PC-relative access to the dex cache arrays.
method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative;
- DexCacheArraysLayout layout(GetInstructionSetPointerSize(codegen_->GetInstructionSet()),
- &graph_->GetDexFile());
+ // Note: we use the invoke's graph instead of the codegen graph, which are
+ // different when inlining (the codegen graph is the most outer graph). The
+ // invoke's dex method index is relative to the dex file where the invoke's graph
+ // was built from.
+ DexCacheArraysLayout layout(GetInstructionSetPointerSize(codegen->GetInstructionSet()),
+ &invoke->GetBlock()->GetGraph()->GetDexFile());
method_load_data = layout.MethodOffset(invoke->GetDexMethodIndex());
code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
}
- if (graph_->IsDebuggable()) {
+ if (codegen->GetGraph()->IsDebuggable()) {
// For debuggable apps always use the code pointer from ArtMethod
// so that we don't circumvent instrumentation stubs if installed.
code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
@@ -127,14 +133,14 @@
method_load_kind, code_ptr_location, method_load_data
};
HInvokeStaticOrDirect::DispatchInfo dispatch_info =
- codegen_->GetSupportedInvokeStaticOrDirectDispatch(desired_dispatch_info, invoke);
+ codegen->GetSupportedInvokeStaticOrDirectDispatch(desired_dispatch_info, invoke);
invoke->SetDispatchInfo(dispatch_info);
}
-HLoadClass::LoadKind HSharpening::SharpenClass(HLoadClass* load_class,
- CodeGenerator* codegen,
- CompilerDriver* compiler_driver,
- const DexCompilationUnit& dex_compilation_unit) {
+HLoadClass::LoadKind HSharpening::ComputeLoadClassKind(HLoadClass* load_class,
+ CodeGenerator* codegen,
+ CompilerDriver* compiler_driver,
+ const DexCompilationUnit& dex_compilation_unit) {
Handle<mirror::Class> klass = load_class->GetClass();
DCHECK(load_class->GetLoadKind() == HLoadClass::LoadKind::kDexCacheViaMethod ||
load_class->GetLoadKind() == HLoadClass::LoadKind::kReferrersClass)
diff --git a/compiler/optimizing/sharpening.h b/compiler/optimizing/sharpening.h
index 4240b2f..10707c7 100644
--- a/compiler/optimizing/sharpening.h
+++ b/compiler/optimizing/sharpening.h
@@ -48,14 +48,16 @@
static constexpr const char* kSharpeningPassName = "sharpening";
// Used by the builder and the inliner.
- static HLoadClass::LoadKind SharpenClass(HLoadClass* load_class,
- CodeGenerator* codegen,
- CompilerDriver* compiler_driver,
- const DexCompilationUnit& dex_compilation_unit)
+ static HLoadClass::LoadKind ComputeLoadClassKind(HLoadClass* load_class,
+ CodeGenerator* codegen,
+ CompilerDriver* compiler_driver,
+ const DexCompilationUnit& dex_compilation_unit)
REQUIRES_SHARED(Locks::mutator_lock_);
+ // Used by Sharpening and InstructionSimplifier.
+ static void SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke, CodeGenerator* codegen);
+
private:
- void ProcessInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke);
void ProcessLoadString(HLoadString* load_string);
CodeGenerator* codegen_;