summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/block_builder.cc2
-rw-r--r--compiler/optimizing/block_builder.h3
-rw-r--r--compiler/optimizing/block_namer.cc2
-rw-r--r--compiler/optimizing/block_namer.h4
-rw-r--r--compiler/optimizing/bounds_check_elimination.cc20
-rw-r--r--compiler/optimizing/bounds_check_elimination.h3
-rw-r--r--compiler/optimizing/bounds_check_elimination_test.cc3
-rw-r--r--compiler/optimizing/builder.cc4
-rw-r--r--compiler/optimizing/builder.h3
-rw-r--r--compiler/optimizing/cha_guard_optimization.cc3
-rw-r--r--compiler/optimizing/cha_guard_optimization.h3
-rw-r--r--compiler/optimizing/code_generator.cc175
-rw-r--r--compiler/optimizing/code_generator.h44
-rw-r--r--compiler/optimizing/code_generator_arm64.cc260
-rw-r--r--compiler/optimizing/code_generator_arm64.h52
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.cc289
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.h74
-rw-r--r--compiler/optimizing/code_generator_riscv64.h23
-rw-r--r--compiler/optimizing/code_generator_utils.cc2
-rw-r--r--compiler/optimizing/code_generator_utils.h4
-rw-r--r--compiler/optimizing/code_generator_vector_arm64_neon.cc12
-rw-r--r--compiler/optimizing/code_generator_vector_arm64_sve.cc15
-rw-r--r--compiler/optimizing/code_generator_vector_arm_vixl.cc6
-rw-r--r--compiler/optimizing/code_generator_vector_x86.cc12
-rw-r--r--compiler/optimizing/code_generator_vector_x86_64.cc12
-rw-r--r--compiler/optimizing/code_generator_x86.cc252
-rw-r--r--compiler/optimizing/code_generator_x86.h75
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc226
-rw-r--r--compiler/optimizing/code_generator_x86_64.h58
-rw-r--r--compiler/optimizing/code_sinking.cc282
-rw-r--r--compiler/optimizing/code_sinking.h11
-rw-r--r--compiler/optimizing/codegen_test.cc4
-rw-r--r--compiler/optimizing/codegen_test_utils.h23
-rw-r--r--compiler/optimizing/common_arm.h3
-rw-r--r--compiler/optimizing/common_arm64.h11
-rw-r--r--compiler/optimizing/common_dominator.h3
-rw-r--r--compiler/optimizing/constant_folding.cc318
-rw-r--r--compiler/optimizing/constant_folding.h13
-rw-r--r--compiler/optimizing/constant_folding_test.cc10
-rw-r--r--compiler/optimizing/constructor_fence_redundancy_elimination.cc14
-rw-r--r--compiler/optimizing/constructor_fence_redundancy_elimination.h3
-rw-r--r--compiler/optimizing/critical_native_abi_fixup_arm.cc11
-rw-r--r--compiler/optimizing/critical_native_abi_fixup_arm.h3
-rw-r--r--compiler/optimizing/data_type-inl.h2
-rw-r--r--compiler/optimizing/data_type.cc2
-rw-r--r--compiler/optimizing/data_type.h3
-rw-r--r--compiler/optimizing/data_type_test.cc2
-rw-r--r--compiler/optimizing/dead_code_elimination.cc527
-rw-r--r--compiler/optimizing/dead_code_elimination.h81
-rw-r--r--compiler/optimizing/dead_code_elimination_test.cc5
-rw-r--r--compiler/optimizing/dominator_test.cc5
-rw-r--r--compiler/optimizing/escape.cc2
-rw-r--r--compiler/optimizing/escape.h4
-rw-r--r--compiler/optimizing/execution_subgraph.cc2
-rw-r--r--compiler/optimizing/execution_subgraph.h3
-rw-r--r--compiler/optimizing/execution_subgraph_test.cc2
-rw-r--r--compiler/optimizing/execution_subgraph_test.h4
-rw-r--r--compiler/optimizing/find_loops_test.cc5
-rw-r--r--compiler/optimizing/graph_checker.cc269
-rw-r--r--compiler/optimizing/graph_checker.h36
-rw-r--r--compiler/optimizing/graph_checker_test.cc5
-rw-r--r--compiler/optimizing/graph_test.cc3
-rw-r--r--compiler/optimizing/graph_visualizer.cc30
-rw-r--r--compiler/optimizing/graph_visualizer.h3
-rw-r--r--compiler/optimizing/gvn.cc2
-rw-r--r--compiler/optimizing/gvn.h3
-rw-r--r--compiler/optimizing/gvn_test.cc3
-rw-r--r--compiler/optimizing/induction_var_analysis.cc98
-rw-r--r--compiler/optimizing/induction_var_analysis.h16
-rw-r--r--compiler/optimizing/induction_var_analysis_test.cc3
-rw-r--r--compiler/optimizing/induction_var_range.cc56
-rw-r--r--compiler/optimizing/induction_var_range.h12
-rw-r--r--compiler/optimizing/induction_var_range_test.cc7
-rw-r--r--compiler/optimizing/inliner.cc239
-rw-r--r--compiler/optimizing/inliner.h41
-rw-r--r--compiler/optimizing/instruction_builder.cc54
-rw-r--r--compiler/optimizing/instruction_builder.h3
-rw-r--r--compiler/optimizing/instruction_simplifier.cc121
-rw-r--r--compiler/optimizing/instruction_simplifier.h3
-rw-r--r--compiler/optimizing/instruction_simplifier_arm.cc4
-rw-r--r--compiler/optimizing/instruction_simplifier_arm.h3
-rw-r--r--compiler/optimizing/instruction_simplifier_arm64.cc4
-rw-r--r--compiler/optimizing/instruction_simplifier_arm64.h3
-rw-r--r--compiler/optimizing/instruction_simplifier_shared.cc4
-rw-r--r--compiler/optimizing/instruction_simplifier_shared.h3
-rw-r--r--compiler/optimizing/instruction_simplifier_test.cc63
-rw-r--r--compiler/optimizing/instruction_simplifier_x86.cc4
-rw-r--r--compiler/optimizing/instruction_simplifier_x86.h3
-rw-r--r--compiler/optimizing/instruction_simplifier_x86_64.cc4
-rw-r--r--compiler/optimizing/instruction_simplifier_x86_64.h3
-rw-r--r--compiler/optimizing/instruction_simplifier_x86_shared.cc3
-rw-r--r--compiler/optimizing/instruction_simplifier_x86_shared.h5
-rw-r--r--compiler/optimizing/intrinsic_objects.cc2
-rw-r--r--compiler/optimizing/intrinsic_objects.h11
-rw-r--r--compiler/optimizing/intrinsics.cc17
-rw-r--r--compiler/optimizing/intrinsics.h3
-rw-r--r--compiler/optimizing/intrinsics_arm64.cc252
-rw-r--r--compiler/optimizing/intrinsics_arm64.h3
-rw-r--r--compiler/optimizing/intrinsics_arm_vixl.cc142
-rw-r--r--compiler/optimizing/intrinsics_arm_vixl.h3
-rw-r--r--compiler/optimizing/intrinsics_utils.h2
-rw-r--r--compiler/optimizing/intrinsics_x86.cc152
-rw-r--r--compiler/optimizing/intrinsics_x86.h3
-rw-r--r--compiler/optimizing/intrinsics_x86_64.cc141
-rw-r--r--compiler/optimizing/intrinsics_x86_64.h3
-rw-r--r--compiler/optimizing/licm.cc2
-rw-r--r--compiler/optimizing/licm.h3
-rw-r--r--compiler/optimizing/licm_test.cc3
-rw-r--r--compiler/optimizing/linear_order.cc2
-rw-r--r--compiler/optimizing/linear_order.h3
-rw-r--r--compiler/optimizing/linearize_test.cc5
-rw-r--r--compiler/optimizing/live_interval_test.cc3
-rw-r--r--compiler/optimizing/live_ranges_test.cc5
-rw-r--r--compiler/optimizing/liveness_test.cc5
-rw-r--r--compiler/optimizing/load_store_analysis.cc10
-rw-r--r--compiler/optimizing/load_store_analysis.h74
-rw-r--r--compiler/optimizing/load_store_analysis_test.cc89
-rw-r--r--compiler/optimizing/load_store_elimination.cc165
-rw-r--r--compiler/optimizing/load_store_elimination.h3
-rw-r--r--compiler/optimizing/load_store_elimination_test.cc385
-rw-r--r--compiler/optimizing/locations.cc15
-rw-r--r--compiler/optimizing/locations.h11
-rw-r--r--compiler/optimizing/loop_analysis.cc2
-rw-r--r--compiler/optimizing/loop_analysis.h3
-rw-r--r--compiler/optimizing/loop_optimization.cc69
-rw-r--r--compiler/optimizing/loop_optimization.h23
-rw-r--r--compiler/optimizing/loop_optimization_test.cc8
-rw-r--r--compiler/optimizing/nodes.cc306
-rw-r--r--compiler/optimizing/nodes.h262
-rw-r--r--compiler/optimizing/nodes_shared.cc2
-rw-r--r--compiler/optimizing/nodes_shared.h2
-rw-r--r--compiler/optimizing/nodes_test.cc3
-rw-r--r--compiler/optimizing/nodes_vector.h2
-rw-r--r--compiler/optimizing/nodes_vector_test.cc3
-rw-r--r--compiler/optimizing/nodes_x86.h2
-rw-r--r--compiler/optimizing/optimization.cc19
-rw-r--r--compiler/optimizing/optimization.h7
-rw-r--r--compiler/optimizing/optimizing_cfi_test.cc20
-rw-r--r--compiler/optimizing/optimizing_compiler.cc112
-rw-r--r--compiler/optimizing/optimizing_compiler.h7
-rw-r--r--compiler/optimizing/optimizing_compiler_stats.h17
-rw-r--r--compiler/optimizing/optimizing_unit_test.h52
-rw-r--r--compiler/optimizing/parallel_move_resolver.cc2
-rw-r--r--compiler/optimizing/parallel_move_resolver.h3
-rw-r--r--compiler/optimizing/parallel_move_test.cc3
-rw-r--r--compiler/optimizing/pc_relative_fixups_x86.cc4
-rw-r--r--compiler/optimizing/pc_relative_fixups_x86.h3
-rw-r--r--compiler/optimizing/prepare_for_register_allocation.cc21
-rw-r--r--compiler/optimizing/prepare_for_register_allocation.h3
-rw-r--r--compiler/optimizing/pretty_printer.h3
-rw-r--r--compiler/optimizing/pretty_printer_test.cc5
-rw-r--r--compiler/optimizing/reference_type_propagation.cc94
-rw-r--r--compiler/optimizing/reference_type_propagation.h8
-rw-r--r--compiler/optimizing/reference_type_propagation_test.cc10
-rw-r--r--compiler/optimizing/register_allocation_resolver.cc2
-rw-r--r--compiler/optimizing/register_allocation_resolver.h3
-rw-r--r--compiler/optimizing/register_allocator.cc2
-rw-r--r--compiler/optimizing/register_allocator.h2
-rw-r--r--compiler/optimizing/register_allocator_graph_color.cc2
-rw-r--r--compiler/optimizing/register_allocator_graph_color.h2
-rw-r--r--compiler/optimizing/register_allocator_linear_scan.cc2
-rw-r--r--compiler/optimizing/register_allocator_linear_scan.h2
-rw-r--r--compiler/optimizing/register_allocator_test.cc7
-rw-r--r--compiler/optimizing/scheduler.cc5
-rw-r--r--compiler/optimizing/scheduler.h3
-rw-r--r--compiler/optimizing/scheduler_arm.cc6
-rw-r--r--compiler/optimizing/scheduler_arm.h12
-rw-r--r--compiler/optimizing/scheduler_arm64.cc2
-rw-r--r--compiler/optimizing/scheduler_arm64.h5
-rw-r--r--compiler/optimizing/scheduler_test.cc5
-rw-r--r--compiler/optimizing/select_generator.cc377
-rw-r--r--compiler/optimizing/select_generator.h42
-rw-r--r--compiler/optimizing/select_generator_test.cc3
-rw-r--r--compiler/optimizing/sharpening.cc6
-rw-r--r--compiler/optimizing/sharpening.h3
-rw-r--r--compiler/optimizing/side_effects_analysis.cc2
-rw-r--r--compiler/optimizing/side_effects_analysis.h3
-rw-r--r--compiler/optimizing/side_effects_test.cc3
-rw-r--r--compiler/optimizing/ssa_builder.cc3
-rw-r--r--compiler/optimizing/ssa_builder.h3
-rw-r--r--compiler/optimizing/ssa_liveness_analysis.cc2
-rw-r--r--compiler/optimizing/ssa_liveness_analysis.h3
-rw-r--r--compiler/optimizing/ssa_liveness_analysis_test.cc3
-rw-r--r--compiler/optimizing/ssa_phi_elimination.cc2
-rw-r--r--compiler/optimizing/ssa_phi_elimination.h3
-rw-r--r--compiler/optimizing/ssa_test.cc5
-rw-r--r--compiler/optimizing/stack_map_stream.cc28
-rw-r--r--compiler/optimizing/stack_map_stream.h22
-rw-r--r--compiler/optimizing/stack_map_test.cc59
-rw-r--r--compiler/optimizing/superblock_cloner.cc4
-rw-r--r--compiler/optimizing/superblock_cloner.h3
-rw-r--r--compiler/optimizing/superblock_cloner_test.cc3
-rw-r--r--compiler/optimizing/suspend_check_test.cc5
-rw-r--r--compiler/optimizing/write_barrier_elimination.cc161
-rw-r--r--compiler/optimizing/write_barrier_elimination.h56
-rw-r--r--compiler/optimizing/x86_memory_gen.cc4
-rw-r--r--compiler/optimizing/x86_memory_gen.h3
197 files changed, 5145 insertions, 2437 deletions
diff --git a/compiler/optimizing/block_builder.cc b/compiler/optimizing/block_builder.cc
index e1f061ae70..703584c537 100644
--- a/compiler/optimizing/block_builder.cc
+++ b/compiler/optimizing/block_builder.cc
@@ -22,7 +22,7 @@
#include "dex/dex_file_exception_helpers.h"
#include "quicken_info.h"
-namespace art {
+namespace art HIDDEN {
HBasicBlockBuilder::HBasicBlockBuilder(HGraph* graph,
const DexFile* const dex_file,
diff --git a/compiler/optimizing/block_builder.h b/compiler/optimizing/block_builder.h
index 42a3f327e7..8668ef8221 100644
--- a/compiler/optimizing/block_builder.h
+++ b/compiler/optimizing/block_builder.h
@@ -17,13 +17,14 @@
#ifndef ART_COMPILER_OPTIMIZING_BLOCK_BUILDER_H_
#define ART_COMPILER_OPTIMIZING_BLOCK_BUILDER_H_
+#include "base/macros.h"
#include "base/scoped_arena_allocator.h"
#include "base/scoped_arena_containers.h"
#include "dex/code_item_accessors.h"
#include "dex/dex_file.h"
#include "nodes.h"
-namespace art {
+namespace art HIDDEN {
class HBasicBlockBuilder : public ValueObject {
public:
diff --git a/compiler/optimizing/block_namer.cc b/compiler/optimizing/block_namer.cc
index d30448cd23..029e26b2be 100644
--- a/compiler/optimizing/block_namer.cc
+++ b/compiler/optimizing/block_namer.cc
@@ -18,7 +18,7 @@
#include "nodes.h"
-namespace art {
+namespace art HIDDEN {
std::ostream& BlockNamer::PrintName(std::ostream& os, HBasicBlock* blk) const {
os << "B";
diff --git a/compiler/optimizing/block_namer.h b/compiler/optimizing/block_namer.h
index ed396b9bf8..39c5973297 100644
--- a/compiler/optimizing/block_namer.h
+++ b/compiler/optimizing/block_namer.h
@@ -19,7 +19,9 @@
#include <ostream>
-namespace art {
+#include "base/macros.h"
+
+namespace art HIDDEN {
class HBasicBlock;
struct BlockNamer {
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index dad3c818fa..919abfdc49 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -24,7 +24,7 @@
#include "nodes.h"
#include "side_effects_analysis.h"
-namespace art {
+namespace art HIDDEN {
class MonotonicValueRange;
@@ -490,7 +490,7 @@ class MonotonicValueRange : public ValueRange {
DISALLOW_COPY_AND_ASSIGN(MonotonicValueRange);
};
-class BCEVisitor : public HGraphVisitor {
+class BCEVisitor final : public HGraphVisitor {
public:
// The least number of bounds checks that should be eliminated by triggering
// the deoptimization technique.
@@ -564,6 +564,19 @@ class BCEVisitor : public HGraphVisitor {
early_exit_loop_.clear();
taken_test_loop_.clear();
finite_loop_.clear();
+
+ // We may have eliminated all bounds checks so we should update the flag.
+ // TODO(solanes): Do this without a linear pass of the graph?
+ GetGraph()->SetHasBoundsChecks(false);
+ for (HBasicBlock* block : GetGraph()->GetReversePostOrder()) {
+ for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+ HInstruction* instruction = it.Current();
+ if (instruction->IsBoundsCheck()) {
+ GetGraph()->SetHasBoundsChecks(true);
+ return;
+ }
+ }
+ }
}
private:
@@ -1818,6 +1831,7 @@ class BCEVisitor : public HGraphVisitor {
HInstruction* condition,
bool is_null_check = false) {
HInstruction* suspend = loop->GetSuspendCheck();
+ DCHECK(suspend != nullptr);
block->InsertInstructionBefore(condition, block->GetLastInstruction());
DeoptimizationKind kind =
is_null_check ? DeoptimizationKind::kLoopNullBCE : DeoptimizationKind::kLoopBoundsBCE;
@@ -1997,7 +2011,7 @@ class BCEVisitor : public HGraphVisitor {
phi->SetRawInputAt(0, instruction);
phi->SetRawInputAt(1, zero);
if (type == DataType::Type::kReference) {
- phi->SetReferenceTypeInfo(instruction->GetReferenceTypeInfo());
+ phi->SetReferenceTypeInfoIfValid(instruction->GetReferenceTypeInfo());
}
new_preheader->AddPhi(phi);
return phi;
diff --git a/compiler/optimizing/bounds_check_elimination.h b/compiler/optimizing/bounds_check_elimination.h
index ef08877daa..f210fa9127 100644
--- a/compiler/optimizing/bounds_check_elimination.h
+++ b/compiler/optimizing/bounds_check_elimination.h
@@ -17,9 +17,10 @@
#ifndef ART_COMPILER_OPTIMIZING_BOUNDS_CHECK_ELIMINATION_H_
#define ART_COMPILER_OPTIMIZING_BOUNDS_CHECK_ELIMINATION_H_
+#include "base/macros.h"
#include "optimization.h"
-namespace art {
+namespace art HIDDEN {
class SideEffectsAnalysis;
class HInductionVarAnalysis;
diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc
index 5927d681b2..929a9e7fe7 100644
--- a/compiler/optimizing/bounds_check_elimination_test.cc
+++ b/compiler/optimizing/bounds_check_elimination_test.cc
@@ -17,6 +17,7 @@
#include "bounds_check_elimination.h"
#include "base/arena_allocator.h"
+#include "base/macros.h"
#include "builder.h"
#include "gvn.h"
#include "induction_var_analysis.h"
@@ -27,7 +28,7 @@
#include "gtest/gtest.h"
-namespace art {
+namespace art HIDDEN {
/**
* Fixture class for the BoundsCheckElimination tests.
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index e7826bbba3..48d1a9da2f 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -33,7 +33,7 @@
#include "ssa_builder.h"
#include "thread.h"
-namespace art {
+namespace art HIDDEN {
HGraphBuilder::HGraphBuilder(HGraph* graph,
const CodeItemDebugInfoAccessor& accessor,
@@ -103,7 +103,6 @@ GraphAnalysisResult HGraphBuilder::BuildGraph() {
graph_->SetNumberOfVRegs(code_item_accessor_.RegistersSize());
graph_->SetNumberOfInVRegs(code_item_accessor_.InsSize());
graph_->SetMaximumNumberOfOutVRegs(code_item_accessor_.OutsSize());
- graph_->SetHasTryCatch(code_item_accessor_.TriesSize() != 0);
// Use ScopedArenaAllocator for all local allocations.
ScopedArenaAllocator local_allocator(graph_->GetArenaStack());
@@ -168,7 +167,6 @@ void HGraphBuilder::BuildIntrinsicGraph(ArtMethod* method) {
graph_->SetNumberOfVRegs(return_vregs + num_arg_vregs);
graph_->SetNumberOfInVRegs(num_arg_vregs);
graph_->SetMaximumNumberOfOutVRegs(num_arg_vregs);
- graph_->SetHasTryCatch(false);
// Use ScopedArenaAllocator for all local allocations.
ScopedArenaAllocator local_allocator(graph_->GetArenaStack());
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 580769e0f9..ef225d9a6a 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -19,12 +19,13 @@
#include "base/arena_object.h"
#include "base/array_ref.h"
+#include "base/macros.h"
#include "dex/code_item_accessors.h"
#include "dex/dex_file-inl.h"
#include "dex/dex_file.h"
#include "nodes.h"
-namespace art {
+namespace art HIDDEN {
class ArtMethod;
class CodeGenerator;
diff --git a/compiler/optimizing/cha_guard_optimization.cc b/compiler/optimizing/cha_guard_optimization.cc
index c6232ef661..20a763cf6d 100644
--- a/compiler/optimizing/cha_guard_optimization.cc
+++ b/compiler/optimizing/cha_guard_optimization.cc
@@ -16,7 +16,7 @@
#include "cha_guard_optimization.h"
-namespace art {
+namespace art HIDDEN {
// Note we can only do CHA guard elimination/motion in a single pass, since
// if a guard is not removed, another guard might be removed due to
@@ -200,6 +200,7 @@ bool CHAGuardVisitor::HoistGuard(HShouldDeoptimizeFlag* flag,
block->RemoveInstruction(deopt);
HInstruction* suspend = loop_info->GetSuspendCheck();
+ DCHECK(suspend != nullptr);
// Need a new deoptimize instruction that copies the environment
// of the suspend instruction for the loop.
HDeoptimize* deoptimize = new (GetGraph()->GetAllocator()) HDeoptimize(
diff --git a/compiler/optimizing/cha_guard_optimization.h b/compiler/optimizing/cha_guard_optimization.h
index 440d51a969..5c1fdd90de 100644
--- a/compiler/optimizing/cha_guard_optimization.h
+++ b/compiler/optimizing/cha_guard_optimization.h
@@ -17,9 +17,10 @@
#ifndef ART_COMPILER_OPTIMIZING_CHA_GUARD_OPTIMIZATION_H_
#define ART_COMPILER_OPTIMIZING_CHA_GUARD_OPTIMIZATION_H_
+#include "base/macros.h"
#include "optimization.h"
-namespace art {
+namespace art HIDDEN {
/**
* Optimize CHA guards by removing/moving them.
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 27eabafb8f..c9f42b52f5 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -15,6 +15,7 @@
*/
#include "code_generator.h"
+#include "base/globals.h"
#ifdef ART_ENABLE_CODEGEN_arm
#include "code_generator_arm_vixl.h"
@@ -24,6 +25,10 @@
#include "code_generator_arm64.h"
#endif
+#ifdef ART_ENABLE_CODEGEN_riscv64
+#include "code_generator_riscv64.h"
+#endif
+
#ifdef ART_ENABLE_CODEGEN_x86
#include "code_generator_x86.h"
#endif
@@ -39,7 +44,6 @@
#include "base/leb128.h"
#include "class_linker.h"
#include "class_root-inl.h"
-#include "compiled_method.h"
#include "dex/bytecode_utils.h"
#include "dex/code_item_accessors-inl.h"
#include "graph_visualizer.h"
@@ -61,7 +65,7 @@
#include "thread-current-inl.h"
#include "utils/assembler.h"
-namespace art {
+namespace art HIDDEN {
// Return whether a location is consistent with a type.
static bool CheckType(DataType::Type type, Location location) {
@@ -389,7 +393,8 @@ void CodeGenerator::Compile(CodeAllocator* allocator) {
core_spill_mask_,
fpu_spill_mask_,
GetGraph()->GetNumberOfVRegs(),
- GetGraph()->IsCompilingBaseline());
+ GetGraph()->IsCompilingBaseline(),
+ GetGraph()->IsDebuggable());
size_t frame_start = GetAssembler()->CodeSize();
GenerateFrameEntry();
@@ -412,7 +417,13 @@ void CodeGenerator::Compile(CodeAllocator* allocator) {
for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
HInstruction* current = it.Current();
if (current->HasEnvironment()) {
- // Create stackmap for HNativeDebugInfo or any instruction which calls native code.
+ // Catch StackMaps are dealt with later on in `RecordCatchBlockInfo`.
+ if (block->IsCatchBlock() && block->GetFirstInstruction() == current) {
+ DCHECK(current->IsNop());
+ continue;
+ }
+
+ // Create stackmap for HNop or any instruction which calls native code.
// Note that we need correct mapping for the native PC of the call instruction,
// so the runtime's stackmap is not sufficient since it is at PC after the call.
MaybeRecordNativeDebugInfo(current, block->GetDexPc());
@@ -1030,6 +1041,9 @@ std::unique_ptr<CodeGenerator> CodeGenerator::Create(HGraph* graph,
}
#endif
default:
+ UNUSED(allocator);
+ UNUSED(graph);
+ UNUSED(stats);
return nullptr;
}
}
@@ -1041,7 +1055,8 @@ CodeGenerator::CodeGenerator(HGraph* graph,
uint32_t core_callee_save_mask,
uint32_t fpu_callee_save_mask,
const CompilerOptions& compiler_options,
- OptimizingCompilerStats* stats)
+ OptimizingCompilerStats* stats,
+ const art::ArrayRef<const bool>& unimplemented_intrinsics)
: frame_size_(0),
core_spill_mask_(0),
fpu_spill_mask_(0),
@@ -1066,7 +1081,8 @@ CodeGenerator::CodeGenerator(HGraph* graph,
is_leaf_(true),
needs_suspend_check_entry_(false),
requires_current_method_(false),
- code_generation_data_() {
+ code_generation_data_(),
+ unimplemented_intrinsics_(unimplemented_intrinsics) {
if (GetGraph()->IsCompilingOsr()) {
// Make OSR methods have all registers spilled, this simplifies the logic of
// jumping to the compiled code directly.
@@ -1123,7 +1139,7 @@ static void CheckLoopEntriesCanBeUsedForOsr(const HGraph& graph,
for (HBasicBlock* block : graph.GetReversePostOrder()) {
if (block->IsLoopHeader()) {
HSuspendCheck* suspend_check = block->GetLoopInformation()->GetSuspendCheck();
- if (!suspend_check->GetEnvironment()->IsFromInlinedInvoke()) {
+ if (suspend_check != nullptr && !suspend_check->GetEnvironment()->IsFromInlinedInvoke()) {
loop_headers.push_back(suspend_check);
}
}
@@ -1333,53 +1349,43 @@ void CodeGenerator::RecordCatchBlockInfo() {
continue;
}
- uint32_t dex_pc = block->GetDexPc();
- uint32_t num_vregs = graph_->GetNumberOfVRegs();
- uint32_t native_pc = GetAddressOf(block);
+ // Get the outer dex_pc. We save the full environment list for DCHECK purposes in kIsDebugBuild.
+ std::vector<uint32_t> dex_pc_list_for_verification;
+ if (kIsDebugBuild) {
+ dex_pc_list_for_verification.push_back(block->GetDexPc());
+ }
+ DCHECK(block->GetFirstInstruction()->IsNop());
+ DCHECK(block->GetFirstInstruction()->AsNop()->NeedsEnvironment());
+ HEnvironment* const environment = block->GetFirstInstruction()->GetEnvironment();
+ DCHECK(environment != nullptr);
+ HEnvironment* outer_environment = environment;
+ while (outer_environment->GetParent() != nullptr) {
+ outer_environment = outer_environment->GetParent();
+ if (kIsDebugBuild) {
+ dex_pc_list_for_verification.push_back(outer_environment->GetDexPc());
+ }
+ }
+
+ if (kIsDebugBuild) {
+ // dex_pc_list_for_verification is set from innnermost to outermost. Let's reverse it
+ // since we are expected to pass from outermost to innermost.
+ std::reverse(dex_pc_list_for_verification.begin(), dex_pc_list_for_verification.end());
+ DCHECK_EQ(dex_pc_list_for_verification.front(), outer_environment->GetDexPc());
+ }
- stack_map_stream->BeginStackMapEntry(dex_pc,
+ uint32_t native_pc = GetAddressOf(block);
+ stack_map_stream->BeginStackMapEntry(outer_environment->GetDexPc(),
native_pc,
/* register_mask= */ 0,
/* sp_mask= */ nullptr,
- StackMap::Kind::Catch);
-
- HInstruction* current_phi = block->GetFirstPhi();
- for (size_t vreg = 0; vreg < num_vregs; ++vreg) {
- while (current_phi != nullptr && current_phi->AsPhi()->GetRegNumber() < vreg) {
- HInstruction* next_phi = current_phi->GetNext();
- DCHECK(next_phi == nullptr ||
- current_phi->AsPhi()->GetRegNumber() <= next_phi->AsPhi()->GetRegNumber())
- << "Phis need to be sorted by vreg number to keep this a linear-time loop.";
- current_phi = next_phi;
- }
+ StackMap::Kind::Catch,
+ /* needs_vreg_info= */ true,
+ dex_pc_list_for_verification);
- if (current_phi == nullptr || current_phi->AsPhi()->GetRegNumber() != vreg) {
- stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kNone, 0);
- } else {
- Location location = current_phi->GetLocations()->Out();
- switch (location.GetKind()) {
- case Location::kStackSlot: {
- stack_map_stream->AddDexRegisterEntry(
- DexRegisterLocation::Kind::kInStack, location.GetStackIndex());
- break;
- }
- case Location::kDoubleStackSlot: {
- stack_map_stream->AddDexRegisterEntry(
- DexRegisterLocation::Kind::kInStack, location.GetStackIndex());
- stack_map_stream->AddDexRegisterEntry(
- DexRegisterLocation::Kind::kInStack, location.GetHighStackIndex(kVRegSize));
- ++vreg;
- DCHECK_LT(vreg, num_vregs);
- break;
- }
- default: {
- // All catch phis must be allocated to a stack slot.
- LOG(FATAL) << "Unexpected kind " << location.GetKind();
- UNREACHABLE();
- }
- }
- }
- }
+ EmitEnvironment(environment,
+ /* slow_path= */ nullptr,
+ /* needs_vreg_info= */ true,
+ /* is_for_catch_handler= */ true);
stack_map_stream->EndStackMapEntry();
}
@@ -1390,7 +1396,9 @@ void CodeGenerator::AddSlowPath(SlowPathCode* slow_path) {
code_generation_data_->AddSlowPath(slow_path);
}
-void CodeGenerator::EmitVRegInfo(HEnvironment* environment, SlowPathCode* slow_path) {
+void CodeGenerator::EmitVRegInfo(HEnvironment* environment,
+ SlowPathCode* slow_path,
+ bool is_for_catch_handler) {
StackMapStream* stack_map_stream = GetStackMapStream();
// Walk over the environment, and record the location of dex registers.
for (size_t i = 0, environment_size = environment->Size(); i < environment_size; ++i) {
@@ -1445,6 +1453,7 @@ void CodeGenerator::EmitVRegInfo(HEnvironment* environment, SlowPathCode* slow_p
}
case Location::kRegister : {
+ DCHECK(!is_for_catch_handler);
int id = location.reg();
if (slow_path != nullptr && slow_path->IsCoreRegisterSaved(id)) {
uint32_t offset = slow_path->GetStackOffsetOfCoreRegister(id);
@@ -1466,6 +1475,7 @@ void CodeGenerator::EmitVRegInfo(HEnvironment* environment, SlowPathCode* slow_p
}
case Location::kFpuRegister : {
+ DCHECK(!is_for_catch_handler);
int id = location.reg();
if (slow_path != nullptr && slow_path->IsFpuRegisterSaved(id)) {
uint32_t offset = slow_path->GetStackOffsetOfFpuRegister(id);
@@ -1487,6 +1497,7 @@ void CodeGenerator::EmitVRegInfo(HEnvironment* environment, SlowPathCode* slow_p
}
case Location::kFpuRegisterPair : {
+ DCHECK(!is_for_catch_handler);
int low = location.low();
int high = location.high();
if (slow_path != nullptr && slow_path->IsFpuRegisterSaved(low)) {
@@ -1508,6 +1519,7 @@ void CodeGenerator::EmitVRegInfo(HEnvironment* environment, SlowPathCode* slow_p
}
case Location::kRegisterPair : {
+ DCHECK(!is_for_catch_handler);
int low = location.low();
int high = location.high();
if (slow_path != nullptr && slow_path->IsCoreRegisterSaved(low)) {
@@ -1538,9 +1550,54 @@ void CodeGenerator::EmitVRegInfo(HEnvironment* environment, SlowPathCode* slow_p
}
}
+void CodeGenerator::EmitVRegInfoOnlyCatchPhis(HEnvironment* environment) {
+ StackMapStream* stack_map_stream = GetStackMapStream();
+ DCHECK(environment->GetHolder()->GetBlock()->IsCatchBlock());
+ DCHECK_EQ(environment->GetHolder()->GetBlock()->GetFirstInstruction(), environment->GetHolder());
+ HInstruction* current_phi = environment->GetHolder()->GetBlock()->GetFirstPhi();
+ for (size_t vreg = 0; vreg < environment->Size(); ++vreg) {
+ while (current_phi != nullptr && current_phi->AsPhi()->GetRegNumber() < vreg) {
+ HInstruction* next_phi = current_phi->GetNext();
+ DCHECK(next_phi == nullptr ||
+ current_phi->AsPhi()->GetRegNumber() <= next_phi->AsPhi()->GetRegNumber())
+ << "Phis need to be sorted by vreg number to keep this a linear-time loop.";
+ current_phi = next_phi;
+ }
+
+ if (current_phi == nullptr || current_phi->AsPhi()->GetRegNumber() != vreg) {
+ stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kNone, 0);
+ } else {
+ Location location = current_phi->GetLocations()->Out();
+ switch (location.GetKind()) {
+ case Location::kStackSlot: {
+ stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack,
+ location.GetStackIndex());
+ break;
+ }
+ case Location::kDoubleStackSlot: {
+ stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack,
+ location.GetStackIndex());
+ stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack,
+ location.GetHighStackIndex(kVRegSize));
+ ++vreg;
+ DCHECK_LT(vreg, environment->Size());
+ break;
+ }
+ default: {
+ LOG(FATAL) << "All catch phis must be allocated to a stack slot. Unexpected kind "
+ << location.GetKind();
+ UNREACHABLE();
+ }
+ }
+ }
+ }
+}
+
void CodeGenerator::EmitEnvironment(HEnvironment* environment,
SlowPathCode* slow_path,
- bool needs_vreg_info) {
+ bool needs_vreg_info,
+ bool is_for_catch_handler,
+ bool innermost_environment) {
if (environment == nullptr) return;
StackMapStream* stack_map_stream = GetStackMapStream();
@@ -1548,7 +1605,11 @@ void CodeGenerator::EmitEnvironment(HEnvironment* environment,
if (emit_inline_info) {
// We emit the parent environment first.
- EmitEnvironment(environment->GetParent(), slow_path, needs_vreg_info);
+ EmitEnvironment(environment->GetParent(),
+ slow_path,
+ needs_vreg_info,
+ is_for_catch_handler,
+ /* innermost_environment= */ false);
stack_map_stream->BeginInlineInfoEntry(environment->GetMethod(),
environment->GetDexPc(),
needs_vreg_info ? environment->Size() : 0,
@@ -1556,9 +1617,13 @@ void CodeGenerator::EmitEnvironment(HEnvironment* environment,
this);
}
+ // If a dex register map is not required we just won't emit it.
if (needs_vreg_info) {
- // If a dex register map is not required we just won't emit it.
- EmitVRegInfo(environment, slow_path);
+ if (innermost_environment && is_for_catch_handler) {
+ EmitVRegInfoOnlyCatchPhis(environment);
+ } else {
+ EmitVRegInfo(environment, slow_path, is_for_catch_handler);
+ }
}
if (emit_inline_info) {
@@ -1671,7 +1736,7 @@ void CodeGenerator::ValidateInvokeRuntime(QuickEntrypointEnum entrypoint,
// When (non-Baker) read barriers are enabled, some instructions
// use a slow path to emit a read barrier, which does not trigger
// GC.
- (kEmitCompilerReadBarrier &&
+ (gUseReadBarrier &&
!kUseBakerReadBarrier &&
(instruction->IsInstanceFieldGet() ||
instruction->IsPredicatedInstanceFieldGet() ||
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index d81a7b5382..9872efaa4a 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -26,6 +26,7 @@
#include "base/bit_utils.h"
#include "base/enums.h"
#include "base/globals.h"
+#include "base/macros.h"
#include "base/memory_region.h"
#include "class_root.h"
#include "dex/string_reference.h"
@@ -33,13 +34,15 @@
#include "graph_visualizer.h"
#include "locations.h"
#include "nodes.h"
+#include "oat_quick_method_header.h"
#include "optimizing_compiler_stats.h"
#include "read_barrier_option.h"
#include "stack.h"
+#include "subtype_check.h"
#include "utils/assembler.h"
#include "utils/label.h"
-namespace art {
+namespace art HIDDEN {
// Binary encoding of 2^32 for type double.
static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000);
@@ -56,8 +59,18 @@ static int32_t constexpr kPrimIntMax = 0x7fffffff;
// Maximum value for a primitive long.
static int64_t constexpr kPrimLongMax = INT64_C(0x7fffffffffffffff);
-static constexpr ReadBarrierOption kCompilerReadBarrierOption =
- kEmitCompilerReadBarrier ? kWithReadBarrier : kWithoutReadBarrier;
+static const ReadBarrierOption gCompilerReadBarrierOption =
+ gUseReadBarrier ? kWithReadBarrier : kWithoutReadBarrier;
+
+constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
+constexpr size_t status_byte_offset =
+ mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
+constexpr uint32_t shifted_visibly_initialized_value =
+ enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte);
+constexpr uint32_t shifted_initializing_value =
+ enum_cast<uint32_t>(ClassStatus::kInitializing) << (status_lsb_position % kBitsPerByte);
+constexpr uint32_t shifted_initialized_value =
+ enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte);
class Assembler;
class CodeGenerator;
@@ -291,6 +304,12 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
// Returns whether we should split long moves in parallel moves.
virtual bool ShouldSplitLongMoves() const { return false; }
+ // Returns true if `invoke` is an implemented intrinsic in this codegen's arch.
+ bool IsImplementedIntrinsic(HInvoke* invoke) const {
+ return invoke->IsIntrinsic() &&
+ !unimplemented_intrinsics_[static_cast<size_t>(invoke->GetIntrinsic())];
+ }
+
size_t GetNumberOfCoreCalleeSaveRegisters() const {
return POPCOUNT(core_callee_save_mask_);
}
@@ -460,7 +479,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
// If the target class is in the boot image, it's non-moveable and it doesn't matter
// if we compare it with a from-space or to-space reference, the result is the same.
// It's OK to traverse a class hierarchy jumping between from-space and to-space.
- return kEmitCompilerReadBarrier && !instance_of->GetTargetClass()->IsInBootImage();
+ return gUseReadBarrier && !instance_of->GetTargetClass()->IsInBootImage();
}
static ReadBarrierOption ReadBarrierOptionForInstanceOf(HInstanceOf* instance_of) {
@@ -475,7 +494,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
case TypeCheckKind::kArrayObjectCheck:
case TypeCheckKind::kInterfaceCheck: {
bool needs_read_barrier =
- kEmitCompilerReadBarrier && !check_cast->GetTargetClass()->IsInBootImage();
+ gUseReadBarrier && !check_cast->GetTargetClass()->IsInBootImage();
// We do not emit read barriers for HCheckCast, so we can get false negatives
// and the slow path shall re-check and simply return if the cast is actually OK.
return !needs_read_barrier;
@@ -678,7 +697,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
return LocationSummary::kCallOnMainOnly;
case HLoadString::LoadKind::kJitTableAddress:
DCHECK(!load->NeedsEnvironment());
- return kEmitCompilerReadBarrier
+ return gUseReadBarrier
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall;
break;
@@ -736,7 +755,8 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
uint32_t core_callee_save_mask,
uint32_t fpu_callee_save_mask,
const CompilerOptions& compiler_options,
- OptimizingCompilerStats* stats);
+ OptimizingCompilerStats* stats,
+ const art::ArrayRef<const bool>& unimplemented_intrinsics);
virtual HGraphVisitor* GetLocationBuilder() = 0;
virtual HGraphVisitor* GetInstructionVisitor() = 0;
@@ -836,8 +856,11 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
void BlockIfInRegister(Location location, bool is_out = false) const;
void EmitEnvironment(HEnvironment* environment,
SlowPathCode* slow_path,
- bool needs_vreg_info = true);
- void EmitVRegInfo(HEnvironment* environment, SlowPathCode* slow_path);
+ bool needs_vreg_info = true,
+ bool is_for_catch_handler = false,
+ bool innermost_environment = true);
+ void EmitVRegInfo(HEnvironment* environment, SlowPathCode* slow_path, bool is_for_catch_handler);
+ void EmitVRegInfoOnlyCatchPhis(HEnvironment* environment);
static void PrepareCriticalNativeArgumentMoves(
HInvokeStaticOrDirect* invoke,
@@ -877,6 +900,9 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
// CodeGenerator::Compile() and remains alive until the CodeGenerator is destroyed.
std::unique_ptr<CodeGenerationData> code_generation_data_;
+ // Which intrinsics we don't have handcrafted code for.
+ art::ArrayRef<const bool> unimplemented_intrinsics_;
+
friend class OptimizingCFITest;
ART_FRIEND_TEST(CodegenTest, ARM64FrameSizeSIMD);
ART_FRIEND_TEST(CodegenTest, ARM64FrameSizeNoSIMD);
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 2a0b481b2d..41db9a2542 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -27,7 +27,6 @@
#include "class_root-inl.h"
#include "class_table.h"
#include "code_generator_utils.h"
-#include "compiled_method.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "entrypoints/quick/quick_entrypoints_enum.h"
#include "gc/accounting/card_table.h"
@@ -44,6 +43,7 @@
#include "mirror/var_handle.h"
#include "offsets.h"
#include "optimizing/common_arm64.h"
+#include "optimizing/nodes.h"
#include "thread.h"
#include "utils/arm64/assembler_arm64.h"
#include "utils/assembler.h"
@@ -58,7 +58,7 @@ using vixl::EmissionCheckScope;
#error "ARM64 Codegen VIXL macro-assembler macro already defined."
#endif
-namespace art {
+namespace art HIDDEN {
template<class MirrorType>
class GcRoot;
@@ -77,7 +77,6 @@ using helpers::InputFPRegisterAt;
using helpers::InputOperandAt;
using helpers::InputRegisterAt;
using helpers::Int64FromLocation;
-using helpers::IsConstantZeroBitPattern;
using helpers::LocationFrom;
using helpers::OperandFromMemOperand;
using helpers::OutputCPURegister;
@@ -583,7 +582,7 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
obj_(obj),
offset_(offset),
index_(index) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
// If `obj` is equal to `out` or `ref`, it means the initial object
// has been overwritten by (or after) the heap object reference load
// to be instrumented, e.g.:
@@ -762,7 +761,7 @@ class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 {
public:
ReadBarrierForRootSlowPathARM64(HInstruction* instruction, Location out, Location root)
: SlowPathCodeARM64(instruction), out_(out), root_(root) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
}
void EmitNativeCode(CodeGenerator* codegen) override {
@@ -825,6 +824,9 @@ class MethodEntryExitHooksSlowPathARM64 : public SlowPathCodeARM64 {
CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
+ if (instruction_->IsMethodExitHook()) {
+ __ Mov(vixl::aarch64::x4, arm64_codegen->GetFrameSize());
+ }
arm64_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this);
RestoreLiveRegisters(codegen, locations);
__ B(GetExitLabel());
@@ -933,6 +935,33 @@ Location CriticalNativeCallingConventionVisitorARM64::GetMethodLocation() const
return Location::RegisterLocation(x15.GetCode());
}
+namespace detail {
+// Mark which intrinsics we don't have handcrafted code for.
+template <Intrinsics T>
+struct IsUnimplemented {
+ bool is_unimplemented = false;
+};
+
+#define TRUE_OVERRIDE(Name) \
+ template <> \
+ struct IsUnimplemented<Intrinsics::k##Name> { \
+ bool is_unimplemented = true; \
+ };
+UNIMPLEMENTED_INTRINSIC_LIST_ARM64(TRUE_OVERRIDE)
+#undef TRUE_OVERRIDE
+
+#include "intrinsics_list.h"
+static constexpr bool kIsIntrinsicUnimplemented[] = {
+ false, // kNone
+#define IS_UNIMPLEMENTED(Intrinsic, ...) \
+ IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
+ INTRINSICS_LIST(IS_UNIMPLEMENTED)
+#undef IS_UNIMPLEMENTED
+};
+#undef INTRINSICS_LIST
+
+} // namespace detail
+
CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph,
const CompilerOptions& compiler_options,
OptimizingCompilerStats* stats)
@@ -943,7 +972,8 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph,
callee_saved_core_registers.GetList(),
callee_saved_fp_registers.GetList(),
compiler_options,
- stats),
+ stats,
+ ArrayRef<const bool>(detail::kIsIntrinsicUnimplemented)),
block_labels_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
location_builder_neon_(graph, this),
@@ -1169,9 +1199,21 @@ void InstructionCodeGeneratorARM64::GenerateMethodEntryExitHook(HInstruction* in
new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathARM64(instruction);
codegen_->AddSlowPath(slow_path);
+ if (instruction->IsMethodExitHook()) {
+ // Check if we are required to check if the caller needs a deoptimization. Strictly speaking it
+ // would be sufficient to check if CheckCallerForDeopt bit is set. Though it is faster to check
+ // if it is just non-zero. kCHA bit isn't used in debuggable runtimes as cha optimization is
+ // disabled in debuggable runtime. The other bit is used when this method itself requires a
+ // deoptimization due to redefinition. So it is safe to just check for non-zero value here.
+ __ Ldr(value, MemOperand(sp, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
+ __ Cbnz(value, slow_path->GetEntryLabel());
+ }
+
uint64_t address = reinterpret_cast64<uint64_t>(Runtime::Current()->GetInstrumentation());
- int offset = instrumentation::Instrumentation::NeedsEntryExitHooksOffset().Int32Value();
- __ Mov(temp, address + offset);
+ MemberOffset offset = instruction->IsMethodExitHook() ?
+ instrumentation::Instrumentation::HaveMethodExitListenersOffset() :
+ instrumentation::Instrumentation::HaveMethodEntryListenersOffset();
+ __ Mov(temp, address + offset.Int32Value());
__ Ldrb(value, MemOperand(temp, 0));
__ Cbnz(value, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
@@ -1233,6 +1275,54 @@ void CodeGeneratorARM64::MaybeIncrementHotness(bool is_frame_entry) {
void CodeGeneratorARM64::GenerateFrameEntry() {
MacroAssembler* masm = GetVIXLAssembler();
+
+ // Check if we need to generate the clinit check. We will jump to the
+ // resolution stub if the class is not initialized and the executing thread is
+ // not the thread initializing it.
+ // We do this before constructing the frame to get the correct stack trace if
+ // an exception is thrown.
+ if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) {
+ UseScratchRegisterScope temps(masm);
+ vixl::aarch64::Label resolution;
+ vixl::aarch64::Label memory_barrier;
+
+ Register temp1 = temps.AcquireW();
+ Register temp2 = temps.AcquireW();
+
+ // Check if we're visibly initialized.
+
+ // We don't emit a read barrier here to save on code size. We rely on the
+ // resolution trampoline to do a suspend check before re-entering this code.
+ __ Ldr(temp1, MemOperand(kArtMethodRegister, ArtMethod::DeclaringClassOffset().Int32Value()));
+ __ Ldrb(temp2, HeapOperand(temp1, status_byte_offset));
+ __ Cmp(temp2, shifted_visibly_initialized_value);
+ __ B(hs, &frame_entry_label_);
+
+ // Check if we're initialized and jump to code that does a memory barrier if
+ // so.
+ __ Cmp(temp2, shifted_initialized_value);
+ __ B(hs, &memory_barrier);
+
+ // Check if we're initializing and the thread initializing is the one
+ // executing the code.
+ __ Cmp(temp2, shifted_initializing_value);
+ __ B(lo, &resolution);
+
+ __ Ldr(temp1, HeapOperand(temp1, mirror::Class::ClinitThreadIdOffset().Int32Value()));
+ __ Ldr(temp2, MemOperand(tr, Thread::TidOffset<kArm64PointerSize>().Int32Value()));
+ __ Cmp(temp1, temp2);
+ __ B(eq, &frame_entry_label_);
+ __ Bind(&resolution);
+
+ // Jump to the resolution stub.
+ ThreadOffset64 entrypoint_offset =
+ GetThreadOffset<kArm64PointerSize>(kQuickQuickResolutionTrampoline);
+ __ Ldr(temp1.X(), MemOperand(tr, entrypoint_offset.Int32Value()));
+ __ Br(temp1.X());
+
+ __ Bind(&memory_barrier);
+ GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+ }
__ Bind(&frame_entry_label_);
bool do_overflow_check =
@@ -1364,12 +1454,12 @@ void CodeGeneratorARM64::AddLocationAsTemp(Location location, LocationSummary* l
}
}
-void CodeGeneratorARM64::MarkGCCard(Register object, Register value, bool value_can_be_null) {
+void CodeGeneratorARM64::MarkGCCard(Register object, Register value, bool emit_null_check) {
UseScratchRegisterScope temps(GetVIXLAssembler());
Register card = temps.AcquireX();
Register temp = temps.AcquireW(); // Index within the CardTable - 32bit.
vixl::aarch64::Label done;
- if (value_can_be_null) {
+ if (emit_null_check) {
__ Cbz(value, &done);
}
// Load the address of the card table into `card`.
@@ -1391,7 +1481,7 @@ void CodeGeneratorARM64::MarkGCCard(Register object, Register value, bool value_
// of the card to mark; and 2. to load the `kCardDirty` value) saves a load
// (no need to explicitly load `kCardDirty` as an immediate value).
__ Strb(card, MemOperand(card, temp.X()));
- if (value_can_be_null) {
+ if (emit_null_check) {
__ Bind(&done);
}
}
@@ -1904,11 +1994,6 @@ void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCod
Register class_reg) {
UseScratchRegisterScope temps(GetVIXLAssembler());
Register temp = temps.AcquireW();
- constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
- const size_t status_byte_offset =
- mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
- constexpr uint32_t shifted_visibly_initialized_value =
- enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte);
// CMP (immediate) is limited to imm12 or imm12<<12, so we would need to materialize
// the constant 0xf0000000 for comparison with the full 32-bit field. To reduce the code
@@ -1974,6 +2059,13 @@ bool CodeGeneratorARM64::CanUseImplicitSuspendCheck() const {
void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruction,
HBasicBlock* successor) {
+ if (instruction->IsNoOp()) {
+ if (successor != nullptr) {
+ __ B(codegen_->GetLabelOf(successor));
+ }
+ return;
+ }
+
if (codegen_->CanUseImplicitSuspendCheck()) {
__ Ldr(kImplicitSuspendCheckRegister, MemOperand(kImplicitSuspendCheckRegister));
codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
@@ -2051,7 +2143,7 @@ void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction,
bool is_predicated = instruction->IsPredicatedInstanceFieldGet();
bool object_field_get_with_read_barrier =
- kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
+ gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference);
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(instruction,
object_field_get_with_read_barrier
@@ -2107,7 +2199,7 @@ void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction,
MemOperand field =
HeapOperand(InputRegisterAt(instruction, receiver_input), field_info.GetFieldOffset());
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier &&
+ if (gUseReadBarrier && kUseBakerReadBarrier &&
load_type == DataType::Type::kReference) {
// Object FieldGet with Baker's read barrier case.
// /* HeapReference<Object> */ out = *(base + offset)
@@ -2154,9 +2246,10 @@ void LocationsBuilderARM64::HandleFieldSet(HInstruction* instruction) {
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
locations->SetInAt(0, Location::RequiresRegister());
- if (IsConstantZeroBitPattern(instruction->InputAt(1))) {
- locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
- } else if (DataType::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
+ HInstruction* value = instruction->InputAt(1);
+ if (IsZeroBitPattern(value)) {
+ locations->SetInAt(1, Location::ConstantLocation(value));
+ } else if (DataType::IsFloatingPointType(value->GetType())) {
locations->SetInAt(1, Location::RequiresFpuRegister());
} else {
locations->SetInAt(1, Location::RequiresRegister());
@@ -2165,7 +2258,8 @@ void LocationsBuilderARM64::HandleFieldSet(HInstruction* instruction) {
void InstructionCodeGeneratorARM64::HandleFieldSet(HInstruction* instruction,
const FieldInfo& field_info,
- bool value_can_be_null) {
+ bool value_can_be_null,
+ WriteBarrierKind write_barrier_kind) {
DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
bool is_predicated =
instruction->IsInstanceFieldSet() && instruction->AsInstanceFieldSet()->GetIsPredicatedSet();
@@ -2205,8 +2299,12 @@ void InstructionCodeGeneratorARM64::HandleFieldSet(HInstruction* instruction,
}
}
- if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
- codegen_->MarkGCCard(obj, Register(value), value_can_be_null);
+ if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1)) &&
+ write_barrier_kind != WriteBarrierKind::kDontEmit) {
+ codegen_->MarkGCCard(
+ obj,
+ Register(value),
+ value_can_be_null && write_barrier_kind == WriteBarrierKind::kEmitWithNullCheck);
}
if (is_predicated) {
@@ -2382,7 +2480,7 @@ void LocationsBuilderARM64::VisitDataProcWithShifterOp(
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
if (instruction->GetInstrKind() == HInstruction::kNeg) {
- locations->SetInAt(0, Location::ConstantLocation(instruction->InputAt(0)->AsConstant()));
+ locations->SetInAt(0, Location::ConstantLocation(instruction->InputAt(0)));
} else {
locations->SetInAt(0, Location::RequiresRegister());
}
@@ -2475,7 +2573,7 @@ void LocationsBuilderARM64::VisitIntermediateAddressIndex(HIntermediateAddressIn
// data offset constant generation out of the loop and reduce the critical path length in the
// loop.
locations->SetInAt(1, shift->GetValue() == 0
- ? Location::ConstantLocation(instruction->GetOffset()->AsIntConstant())
+ ? Location::ConstantLocation(instruction->GetOffset())
: Location::RequiresRegister());
locations->SetInAt(2, Location::ConstantLocation(shift));
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
@@ -2549,7 +2647,7 @@ void InstructionCodeGeneratorARM64::VisitMultiplyAccumulate(HMultiplyAccumulate*
void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) {
bool object_array_get_with_read_barrier =
- kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
+ gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference);
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(instruction,
object_array_get_with_read_barrier
@@ -2605,10 +2703,10 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) {
// does not support the HIntermediateAddress instruction.
DCHECK(!((type == DataType::Type::kReference) &&
instruction->GetArray()->IsIntermediateAddress() &&
- kEmitCompilerReadBarrier &&
+ gUseReadBarrier &&
!kUseBakerReadBarrier));
- if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (type == DataType::Type::kReference && gUseReadBarrier && kUseBakerReadBarrier) {
// Object ArrayGet with Baker's read barrier case.
// Note that a potential implicit null check is handled in the
// CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call.
@@ -2750,9 +2848,10 @@ void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) {
instruction,
needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
- if (IsConstantZeroBitPattern(instruction->InputAt(2))) {
- locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+ locations->SetInAt(1, Location::RegisterOrConstant(instruction->GetIndex()));
+ HInstruction* value = instruction->GetValue();
+ if (IsZeroBitPattern(value)) {
+ locations->SetInAt(2, Location::ConstantLocation(value));
} else if (DataType::IsFloatingPointType(value_type)) {
locations->SetInAt(2, Location::RequiresFpuRegister());
} else {
@@ -2871,7 +2970,11 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
}
}
- codegen_->MarkGCCard(array, value.W(), /* value_can_be_null= */ false);
+ if (instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit) {
+ DCHECK_EQ(instruction->GetWriteBarrierKind(), WriteBarrierKind::kEmitNoNullCheck)
+ << " Already null checked so we shouldn't do it again.";
+ codegen_->MarkGCCard(array, value.W(), /* emit_null_check= */ false);
+ }
if (can_value_be_null) {
DCHECK(do_store.IsLinked());
@@ -2929,10 +3032,10 @@ void LocationsBuilderARM64::VisitBoundsCheck(HBoundsCheck* instruction) {
HInstruction* length = instruction->InputAt(1);
bool both_const = index->IsConstant() && length->IsConstant();
locations->SetInAt(0, both_const
- ? Location::ConstantLocation(index->AsConstant())
+ ? Location::ConstantLocation(index)
: ARM64EncodableConstantOrRegister(index, instruction));
locations->SetInAt(1, both_const
- ? Location::ConstantLocation(length->AsConstant())
+ ? Location::ConstantLocation(length)
: ARM64EncodableConstantOrRegister(length, instruction));
}
@@ -3030,6 +3133,7 @@ void LocationsBuilderARM64::VisitCompare(HCompare* compare) {
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
DataType::Type in_type = compare->InputAt(0)->GetType();
+ HInstruction* rhs = compare->InputAt(1);
switch (in_type) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
@@ -3039,7 +3143,7 @@ void LocationsBuilderARM64::VisitCompare(HCompare* compare) {
case DataType::Type::kInt32:
case DataType::Type::kInt64: {
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, ARM64EncodableConstantOrRegister(compare->InputAt(1), compare));
+ locations->SetInAt(1, ARM64EncodableConstantOrRegister(rhs, compare));
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
break;
}
@@ -3047,8 +3151,8 @@ void LocationsBuilderARM64::VisitCompare(HCompare* compare) {
case DataType::Type::kFloat64: {
locations->SetInAt(0, Location::RequiresFpuRegister());
locations->SetInAt(1,
- IsFloatingPointZeroConstant(compare->InputAt(1))
- ? Location::ConstantLocation(compare->InputAt(1)->AsConstant())
+ IsFloatingPointZeroConstant(rhs)
+ ? Location::ConstantLocation(rhs)
: Location::RequiresFpuRegister());
locations->SetOut(Location::RequiresRegister());
break;
@@ -3096,16 +3200,17 @@ void InstructionCodeGeneratorARM64::VisitCompare(HCompare* compare) {
void LocationsBuilderARM64::HandleCondition(HCondition* instruction) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+ HInstruction* rhs = instruction->InputAt(1);
if (DataType::IsFloatingPointType(instruction->InputAt(0)->GetType())) {
locations->SetInAt(0, Location::RequiresFpuRegister());
locations->SetInAt(1,
- IsFloatingPointZeroConstant(instruction->InputAt(1))
- ? Location::ConstantLocation(instruction->InputAt(1)->AsConstant())
+ IsFloatingPointZeroConstant(rhs)
+ ? Location::ConstantLocation(rhs)
: Location::RequiresFpuRegister());
} else {
// Integer cases.
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->InputAt(1), instruction));
+ locations->SetInAt(1, ARM64EncodableConstantOrRegister(rhs, instruction));
}
if (!instruction->IsEmittedAtUseSite()) {
@@ -3845,12 +3950,12 @@ void InstructionCodeGeneratorARM64::VisitSelect(HSelect* select) {
}
}
-void LocationsBuilderARM64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
- new (GetGraph()->GetAllocator()) LocationSummary(info);
+void LocationsBuilderARM64::VisitNop(HNop* nop) {
+ new (GetGraph()->GetAllocator()) LocationSummary(nop);
}
-void InstructionCodeGeneratorARM64::VisitNativeDebugInfo(HNativeDebugInfo*) {
- // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
+void InstructionCodeGeneratorARM64::VisitNop(HNop*) {
+ // The environment recording already happened in CodeGenerator::Compile.
}
void CodeGeneratorARM64::IncreaseFrame(size_t adjustment) {
@@ -3893,12 +3998,15 @@ void LocationsBuilderARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction
}
void InstructionCodeGeneratorARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
- HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
+ HandleFieldSet(instruction,
+ instruction->GetFieldInfo(),
+ instruction->GetValueCanBeNull(),
+ instruction->GetWriteBarrierKind());
}
// Temp is used for read barrier.
static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
- if (kEmitCompilerReadBarrier &&
+ if (gUseReadBarrier &&
(kUseBakerReadBarrier ||
type_check_kind == TypeCheckKind::kAbstractClassCheck ||
type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
@@ -3948,9 +4056,9 @@ void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) {
}
locations->SetInAt(0, Location::RequiresRegister());
if (type_check_kind == TypeCheckKind::kBitstringCheck) {
- locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
- locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
- locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
} else {
locations->SetInAt(1, Location::RequiresRegister());
}
@@ -4194,9 +4302,9 @@ void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) {
new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
locations->SetInAt(0, Location::RequiresRegister());
if (type_check_kind == TypeCheckKind::kBitstringCheck) {
- locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
- locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
- locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
} else {
locations->SetInAt(1, Location::RequiresRegister());
}
@@ -5313,7 +5421,7 @@ void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) {
load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
load_kind == HLoadClass::LoadKind::kBssEntryPackage);
- const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
+ const bool requires_read_barrier = gUseReadBarrier && !cls->IsInBootImage();
LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall;
@@ -5327,7 +5435,7 @@ void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) {
}
locations->SetOut(Location::RequiresRegister());
if (cls->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) {
- if (!kUseReadBarrier || kUseBakerReadBarrier) {
+ if (!gUseReadBarrier || kUseBakerReadBarrier) {
// Rely on the type resolution or initialization and marking to save everything we need.
locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
} else {
@@ -5354,7 +5462,7 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA
const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
? kWithoutReadBarrier
- : kCompilerReadBarrierOption;
+ : gCompilerReadBarrierOption;
bool generate_null_check = false;
switch (load_kind) {
case HLoadClass::LoadKind::kReferrersClass: {
@@ -5523,7 +5631,7 @@ void LocationsBuilderARM64::VisitLoadString(HLoadString* load) {
} else {
locations->SetOut(Location::RequiresRegister());
if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
- if (!kUseReadBarrier || kUseBakerReadBarrier) {
+ if (!gUseReadBarrier || kUseBakerReadBarrier) {
// Rely on the pResolveString and marking to save everything we need.
locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
} else {
@@ -5577,7 +5685,7 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD
temp,
/* offset placeholder */ 0u,
ldr_label,
- kCompilerReadBarrierOption);
+ gCompilerReadBarrierOption);
SlowPathCodeARM64* slow_path =
new (codegen_->GetScopedAllocator()) LoadStringSlowPathARM64(load);
codegen_->AddSlowPath(slow_path);
@@ -5601,7 +5709,7 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD
out.X(),
/* offset= */ 0,
/* fixup_label= */ nullptr,
- kCompilerReadBarrierOption);
+ gCompilerReadBarrierOption);
return;
}
default:
@@ -6156,7 +6264,10 @@ void LocationsBuilderARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
}
void InstructionCodeGeneratorARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
- HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
+ HandleFieldSet(instruction,
+ instruction->GetFieldInfo(),
+ instruction->GetValueCanBeNull(),
+ instruction->GetWriteBarrierKind());
}
void LocationsBuilderARM64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
@@ -6462,7 +6573,7 @@ void InstructionCodeGeneratorARM64::GenerateReferenceLoadOneRegister(
DataType::Type type = DataType::Type::kReference;
Register out_reg = RegisterFrom(out, type);
if (read_barrier_option == kWithReadBarrier) {
- CHECK(kEmitCompilerReadBarrier);
+ CHECK(gUseReadBarrier);
if (kUseBakerReadBarrier) {
// Load with fast path based Baker's read barrier.
// /* HeapReference<Object> */ out = *(out + offset)
@@ -6503,7 +6614,7 @@ void InstructionCodeGeneratorARM64::GenerateReferenceLoadTwoRegisters(
Register out_reg = RegisterFrom(out, type);
Register obj_reg = RegisterFrom(obj, type);
if (read_barrier_option == kWithReadBarrier) {
- CHECK(kEmitCompilerReadBarrier);
+ CHECK(gUseReadBarrier);
if (kUseBakerReadBarrier) {
// Load with fast path based Baker's read barrier.
// /* HeapReference<Object> */ out = *(obj + offset)
@@ -6538,7 +6649,7 @@ void CodeGeneratorARM64::GenerateGcRootFieldLoad(
DCHECK(fixup_label == nullptr || offset == 0u);
Register root_reg = RegisterFrom(root, DataType::Type::kReference);
if (read_barrier_option == kWithReadBarrier) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
if (kUseBakerReadBarrier) {
// Fast path implementation of art::ReadBarrier::BarrierForRoot when
// Baker's read barrier are used.
@@ -6604,7 +6715,7 @@ void CodeGeneratorARM64::GenerateGcRootFieldLoad(
void CodeGeneratorARM64::GenerateIntrinsicCasMoveWithBakerReadBarrier(
vixl::aarch64::Register marked_old_value,
vixl::aarch64::Register old_value) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
DCHECK(kUseBakerReadBarrier);
// Similar to the Baker RB path in GenerateGcRootFieldLoad(), with a MOV instead of LDR.
@@ -6626,7 +6737,7 @@ void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* ins
const vixl::aarch64::MemOperand& src,
bool needs_null_check,
bool use_load_acquire) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
DCHECK(kUseBakerReadBarrier);
// Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
@@ -6722,7 +6833,7 @@ void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HArrayGet* instru
uint32_t data_offset,
Location index,
bool needs_null_check) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
DCHECK(kUseBakerReadBarrier);
static_assert(
@@ -6800,7 +6911,7 @@ void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HArrayGet* instru
void CodeGeneratorARM64::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) {
// The following condition is a compile-time one, so it does not have a run-time cost.
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && kIsDebugBuild) {
+ if (kIsDebugBuild && gUseReadBarrier && kUseBakerReadBarrier) {
// The following condition is a run-time one; it is executed after the
// previous compile-time test, to avoid penalizing non-debug builds.
if (GetCompilerOptions().EmitRunTimeChecksInDebugMode()) {
@@ -6829,7 +6940,7 @@ void CodeGeneratorARM64::GenerateReadBarrierSlow(HInstruction* instruction,
Location obj,
uint32_t offset,
Location index) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
// Insert a slow path based read barrier *after* the reference load.
//
@@ -6854,7 +6965,7 @@ void CodeGeneratorARM64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
Location obj,
uint32_t offset,
Location index) {
- if (kEmitCompilerReadBarrier) {
+ if (gUseReadBarrier) {
// Baker's read barriers shall be handled by the fast path
// (CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier).
DCHECK(!kUseBakerReadBarrier);
@@ -6869,7 +6980,7 @@ void CodeGeneratorARM64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
void CodeGeneratorARM64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
Location out,
Location root) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
// Insert a slow path based read barrier *after* the GC root load.
//
@@ -7003,6 +7114,7 @@ static void EmitGrayCheckAndFastPath(arm64::Arm64Assembler& assembler,
vixl::aarch64::MemOperand& lock_word,
vixl::aarch64::Label* slow_path,
vixl::aarch64::Label* throw_npe = nullptr) {
+ vixl::aarch64::Label throw_npe_cont;
// Load the lock word containing the rb_state.
__ Ldr(ip0.W(), lock_word);
// Given the numeric representation, it's enough to check the low bit of the rb_state.
@@ -7014,7 +7126,7 @@ static void EmitGrayCheckAndFastPath(arm64::Arm64Assembler& assembler,
"Field and array LDR offsets must be the same to reuse the same code.");
// To throw NPE, we return to the fast path; the artificial dependence below does not matter.
if (throw_npe != nullptr) {
- __ Bind(throw_npe);
+ __ Bind(&throw_npe_cont);
}
// Adjust the return address back to the LDR (1 instruction; 2 for heap poisoning).
static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
@@ -7026,6 +7138,12 @@ static void EmitGrayCheckAndFastPath(arm64::Arm64Assembler& assembler,
// a memory barrier (which would be more expensive).
__ Add(base_reg, base_reg, Operand(ip0, LSR, 32));
__ Br(lr); // And return back to the function.
+ if (throw_npe != nullptr) {
+ // Clear IP0 before returning to the fast path.
+ __ Bind(throw_npe);
+ __ Mov(ip0.X(), xzr);
+ __ B(&throw_npe_cont);
+ }
// Note: The fake dependency is unnecessary for the slow path.
}
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index f4d652c29c..6190364d1d 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -18,6 +18,7 @@
#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM64_H_
#include "base/bit_field.h"
+#include "base/macros.h"
#include "class_root.h"
#include "code_generator.h"
#include "common_arm64.h"
@@ -36,7 +37,7 @@
#include "aarch64/macro-assembler-aarch64.h"
#pragma GCC diagnostic pop
-namespace art {
+namespace art HIDDEN {
namespace linker {
class Arm64RelativePatcherTest;
@@ -92,7 +93,10 @@ const vixl::aarch64::CPURegList runtime_reserved_core_registers =
vixl::aarch64::CPURegList(
tr,
// Reserve X20 as Marking Register when emitting Baker read barriers.
- ((kEmitCompilerReadBarrier && kUseBakerReadBarrier) ? mr : vixl::aarch64::NoCPUReg),
+ // TODO: We don't need to reserve marking-register for userfaultfd GC. But
+ // that would require some work in the assembler code as the right GC is
+ // chosen at load-time and not compile time.
+ (kReserveMarkingRegister ? mr : vixl::aarch64::NoCPUReg),
kImplicitSuspendCheckRegister,
vixl::aarch64::lr);
@@ -111,9 +115,7 @@ inline Location FixedTempLocation() {
const vixl::aarch64::CPURegList callee_saved_core_registers(
vixl::aarch64::CPURegister::kRegister,
vixl::aarch64::kXRegSize,
- ((kEmitCompilerReadBarrier && kUseBakerReadBarrier)
- ? vixl::aarch64::x21.GetCode()
- : vixl::aarch64::x20.GetCode()),
+ (kReserveMarkingRegister ? vixl::aarch64::x21.GetCode() : vixl::aarch64::x20.GetCode()),
vixl::aarch64::x30.GetCode());
const vixl::aarch64::CPURegList callee_saved_fp_registers(vixl::aarch64::CPURegister::kVRegister,
vixl::aarch64::kDRegSize,
@@ -121,6 +123,41 @@ const vixl::aarch64::CPURegList callee_saved_fp_registers(vixl::aarch64::CPURegi
vixl::aarch64::d15.GetCode());
Location ARM64ReturnLocation(DataType::Type return_type);
+#define UNIMPLEMENTED_INTRINSIC_LIST_ARM64(V) \
+ V(StringStringIndexOf) \
+ V(StringStringIndexOfAfter) \
+ V(StringBufferAppend) \
+ V(StringBufferLength) \
+ V(StringBufferToString) \
+ V(StringBuilderAppendObject) \
+ V(StringBuilderAppendString) \
+ V(StringBuilderAppendCharSequence) \
+ V(StringBuilderAppendCharArray) \
+ V(StringBuilderAppendBoolean) \
+ V(StringBuilderAppendChar) \
+ V(StringBuilderAppendInt) \
+ V(StringBuilderAppendLong) \
+ V(StringBuilderAppendFloat) \
+ V(StringBuilderAppendDouble) \
+ V(StringBuilderLength) \
+ V(StringBuilderToString) \
+ V(SystemArrayCopyByte) \
+ V(SystemArrayCopyInt) \
+ /* 1.8 */ \
+ V(UnsafeGetAndAddInt) \
+ V(UnsafeGetAndAddLong) \
+ V(UnsafeGetAndSetInt) \
+ V(UnsafeGetAndSetLong) \
+ V(UnsafeGetAndSetObject) \
+ V(MethodHandleInvokeExact) \
+ V(MethodHandleInvoke) \
+ /* OpenJDK 11 */ \
+ V(JdkUnsafeGetAndAddInt) \
+ V(JdkUnsafeGetAndAddLong) \
+ V(JdkUnsafeGetAndSetInt) \
+ V(JdkUnsafeGetAndSetLong) \
+ V(JdkUnsafeGetAndSetObject)
+
class SlowPathCodeARM64 : public SlowPathCode {
public:
explicit SlowPathCodeARM64(HInstruction* instruction)
@@ -327,7 +364,8 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator {
void HandleFieldSet(HInstruction* instruction,
const FieldInfo& field_info,
- bool value_can_be_null);
+ bool value_can_be_null,
+ WriteBarrierKind write_barrier_kind);
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
void HandleCondition(HCondition* instruction);
@@ -615,7 +653,7 @@ class CodeGeneratorARM64 : public CodeGenerator {
// Emit a write barrier.
void MarkGCCard(vixl::aarch64::Register object,
vixl::aarch64::Register value,
- bool value_can_be_null);
+ bool emit_null_check);
void GenerateMemoryBarrier(MemBarrierKind kind);
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 09fa598203..d69e77045b 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -26,7 +26,6 @@
#include "class_table.h"
#include "code_generator_utils.h"
#include "common_arm.h"
-#include "compiled_method.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "gc/accounting/card_table.h"
#include "gc/space/image_space.h"
@@ -46,7 +45,7 @@
#include "utils/assembler.h"
#include "utils/stack_checks.h"
-namespace art {
+namespace art HIDDEN {
namespace arm {
namespace vixl32 = vixl::aarch32;
@@ -744,7 +743,7 @@ class ReadBarrierForHeapReferenceSlowPathARMVIXL : public SlowPathCodeARMVIXL {
obj_(obj),
offset_(offset),
index_(index) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
// If `obj` is equal to `out` or `ref`, it means the initial object
// has been overwritten by (or after) the heap object reference load
// to be instrumented, e.g.:
@@ -922,7 +921,7 @@ class ReadBarrierForRootSlowPathARMVIXL : public SlowPathCodeARMVIXL {
public:
ReadBarrierForRootSlowPathARMVIXL(HInstruction* instruction, Location out, Location root)
: SlowPathCodeARMVIXL(instruction), out_(out), root_(root) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
}
void EmitNativeCode(CodeGenerator* codegen) override {
@@ -974,6 +973,10 @@ class MethodEntryExitHooksSlowPathARMVIXL : public SlowPathCodeARMVIXL {
(instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook;
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
+ if (instruction_->IsMethodExitHook()) {
+ // Load frame size to pass to the exit hooks
+ __ Mov(vixl::aarch32::Register(R2), arm_codegen->GetFrameSize());
+ }
arm_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this);
RestoreLiveRegisters(codegen, locations);
__ B(GetExitLabel());
@@ -1845,7 +1848,7 @@ static Location Arm8BitEncodableConstantOrRegister(HInstruction* constant) {
DCHECK(!DataType::IsFloatingPointType(constant->GetType()));
if (constant->IsConstant() && CanEncodeConstantAs8BitImmediate(constant->AsConstant())) {
- return Location::ConstantLocation(constant->AsConstant());
+ return Location::ConstantLocation(constant);
}
return Location::RequiresRegister();
@@ -1904,6 +1907,33 @@ vixl32::Label* CodeGeneratorARMVIXL::GetFinalLabel(HInstruction* instruction,
return final_label;
}
+namespace detail {
+// Mark which intrinsics we don't have handcrafted code for.
+template <Intrinsics T>
+struct IsUnimplemented {
+ bool is_unimplemented = false;
+};
+
+#define TRUE_OVERRIDE(Name) \
+ template <> \
+ struct IsUnimplemented<Intrinsics::k##Name> { \
+ bool is_unimplemented = true; \
+ };
+UNIMPLEMENTED_INTRINSIC_LIST_ARM(TRUE_OVERRIDE)
+#undef TRUE_OVERRIDE
+
+#include "intrinsics_list.h"
+static constexpr bool kIsIntrinsicUnimplemented[] = {
+ false, // kNone
+#define IS_UNIMPLEMENTED(Intrinsic, ...) \
+ IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
+ INTRINSICS_LIST(IS_UNIMPLEMENTED)
+#undef IS_UNIMPLEMENTED
+};
+#undef INTRINSICS_LIST
+
+} // namespace detail
+
CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph,
const CompilerOptions& compiler_options,
OptimizingCompilerStats* stats)
@@ -1914,7 +1944,8 @@ CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph,
kCoreCalleeSaves.GetList(),
ComputeSRegisterListMask(kFpuCalleeSaves),
compiler_options,
- stats),
+ stats,
+ ArrayRef<const bool>(detail::kIsIntrinsicUnimplemented)),
block_labels_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
location_builder_(graph, this),
@@ -2101,7 +2132,10 @@ void CodeGeneratorARMVIXL::SetupBlockedRegisters() const {
blocked_core_registers_[LR] = true;
blocked_core_registers_[PC] = true;
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // TODO: We don't need to reserve marking-register for userfaultfd GC. But
+ // that would require some work in the assembler code as the right GC is
+ // chosen at load-time and not compile time.
+ if (kReserveMarkingRegister) {
// Reserve marking register.
blocked_core_registers_[MR] = true;
}
@@ -2164,9 +2198,24 @@ void InstructionCodeGeneratorARMVIXL::GenerateMethodEntryExitHook(HInstruction*
new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathARMVIXL(instruction);
codegen_->AddSlowPath(slow_path);
- int offset = instrumentation::Instrumentation::NeedsEntryExitHooksOffset().Int32Value();
+ if (instruction->IsMethodExitHook()) {
+ // Check if we are required to check if the caller needs a deoptimization. Strictly speaking it
+ // would be sufficient to check if CheckCallerForDeopt bit is set. Though it is faster to check
+ // if it is just non-zero. kCHA bit isn't used in debuggable runtimes as cha optimization is
+ // disabled in debuggable runtime. The other bit is used when this method itself requires a
+ // deoptimization due to redefinition. So it is safe to just check for non-zero value here.
+ GetAssembler()->LoadFromOffset(kLoadWord,
+ temp,
+ sp,
+ codegen_->GetStackOffsetOfShouldDeoptimizeFlag());
+ __ CompareAndBranchIfNonZero(temp, slow_path->GetEntryLabel());
+ }
+
+ MemberOffset offset = instruction->IsMethodExitHook() ?
+ instrumentation::Instrumentation::HaveMethodExitListenersOffset() :
+ instrumentation::Instrumentation::HaveMethodEntryListenersOffset();
uint32_t address = reinterpret_cast32<uint32_t>(Runtime::Current()->GetInstrumentation());
- __ Mov(temp, address + offset);
+ __ Mov(temp, address + offset.Int32Value());
__ Ldrb(temp, MemOperand(temp, 0));
__ CompareAndBranchIfNonZero(temp, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
@@ -2234,6 +2283,61 @@ void CodeGeneratorARMVIXL::GenerateFrameEntry() {
bool skip_overflow_check =
IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm);
DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
+
+ // Check if we need to generate the clinit check. We will jump to the
+ // resolution stub if the class is not initialized and the executing thread is
+ // not the thread initializing it.
+ // We do this before constructing the frame to get the correct stack trace if
+ // an exception is thrown.
+ if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) {
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ vixl32::Label resolution;
+ vixl32::Label memory_barrier;
+
+ // Check if we're visibly initialized.
+
+ vixl32::Register temp1 = temps.Acquire();
+ // Use r4 as other temporary register.
+ DCHECK(!blocked_core_registers_[R4]);
+ DCHECK(!kCoreCalleeSaves.Includes(r4));
+ vixl32::Register temp2 = r4;
+ for (vixl32::Register reg : kParameterCoreRegistersVIXL) {
+ DCHECK(!reg.Is(r4));
+ }
+
+ // We don't emit a read barrier here to save on code size. We rely on the
+ // resolution trampoline to do a suspend check before re-entering this code.
+ __ Ldr(temp1, MemOperand(kMethodRegister, ArtMethod::DeclaringClassOffset().Int32Value()));
+ __ Ldrb(temp2, MemOperand(temp1, status_byte_offset));
+ __ Cmp(temp2, shifted_visibly_initialized_value);
+ __ B(cs, &frame_entry_label_);
+
+ // Check if we're initialized and jump to code that does a memory barrier if
+ // so.
+ __ Cmp(temp2, shifted_initialized_value);
+ __ B(cs, &memory_barrier);
+
+ // Check if we're initializing and the thread initializing is the one
+ // executing the code.
+ __ Cmp(temp2, shifted_initializing_value);
+ __ B(lo, &resolution);
+
+ __ Ldr(temp1, MemOperand(temp1, mirror::Class::ClinitThreadIdOffset().Int32Value()));
+ __ Ldr(temp2, MemOperand(tr, Thread::TidOffset<kArmPointerSize>().Int32Value()));
+ __ Cmp(temp1, temp2);
+ __ B(eq, &frame_entry_label_);
+ __ Bind(&resolution);
+
+ // Jump to the resolution stub.
+ ThreadOffset32 entrypoint_offset =
+ GetThreadOffset<kArmPointerSize>(kQuickQuickResolutionTrampoline);
+ __ Ldr(temp1, MemOperand(tr, entrypoint_offset.Int32Value()));
+ __ Bx(temp1);
+
+ __ Bind(&memory_barrier);
+ GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+ }
+
__ Bind(&frame_entry_label_);
if (HasEmptyFrame()) {
@@ -3069,12 +3173,12 @@ void InstructionCodeGeneratorARMVIXL::VisitSelect(HSelect* select) {
}
}
-void LocationsBuilderARMVIXL::VisitNativeDebugInfo(HNativeDebugInfo* info) {
- new (GetGraph()->GetAllocator()) LocationSummary(info);
+void LocationsBuilderARMVIXL::VisitNop(HNop* nop) {
+ new (GetGraph()->GetAllocator()) LocationSummary(nop);
}
-void InstructionCodeGeneratorARMVIXL::VisitNativeDebugInfo(HNativeDebugInfo*) {
- // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
+void InstructionCodeGeneratorARMVIXL::VisitNop(HNop*) {
+ // The environment recording already happened in CodeGenerator::Compile.
}
void CodeGeneratorARMVIXL::IncreaseFrame(size_t adjustment) {
@@ -4514,10 +4618,11 @@ void LocationsBuilderARMVIXL::VisitDiv(HDiv* div) {
switch (div->GetResultType()) {
case DataType::Type::kInt32: {
- if (div->InputAt(1)->IsConstant()) {
+ HInstruction* divisor = div->InputAt(1);
+ if (divisor->IsConstant()) {
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::ConstantLocation(div->InputAt(1)->AsConstant()));
- int32_t value = Int32ConstantFrom(div->InputAt(1));
+ locations->SetInAt(1, Location::ConstantLocation(divisor));
+ int32_t value = Int32ConstantFrom(divisor);
Location::OutputOverlap out_overlaps = Location::kNoOutputOverlap;
if (value == 1 || value == 0 || value == -1) {
// No temp register required.
@@ -4631,10 +4736,11 @@ void LocationsBuilderARMVIXL::VisitRem(HRem* rem) {
switch (type) {
case DataType::Type::kInt32: {
- if (rem->InputAt(1)->IsConstant()) {
+ HInstruction* divisor = rem->InputAt(1);
+ if (divisor->IsConstant()) {
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::ConstantLocation(rem->InputAt(1)->AsConstant()));
- int32_t value = Int32ConstantFrom(rem->InputAt(1));
+ locations->SetInAt(1, Location::ConstantLocation(divisor));
+ int32_t value = Int32ConstantFrom(divisor);
Location::OutputOverlap out_overlaps = Location::kNoOutputOverlap;
if (value == 1 || value == 0 || value == -1) {
// No temp register required.
@@ -5187,17 +5293,18 @@ void InstructionCodeGeneratorARMVIXL::HandleLongRotate(HRor* ror) {
void LocationsBuilderARMVIXL::VisitRor(HRor* ror) {
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(ror, LocationSummary::kNoCall);
+ HInstruction* shift = ror->InputAt(1);
switch (ror->GetResultType()) {
case DataType::Type::kInt32: {
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RegisterOrConstant(ror->InputAt(1)));
+ locations->SetInAt(1, Location::RegisterOrConstant(shift));
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
break;
}
case DataType::Type::kInt64: {
locations->SetInAt(0, Location::RequiresRegister());
- if (ror->InputAt(1)->IsConstant()) {
- locations->SetInAt(1, Location::ConstantLocation(ror->InputAt(1)->AsConstant()));
+ if (shift->IsConstant()) {
+ locations->SetInAt(1, Location::ConstantLocation(shift));
} else {
locations->SetInAt(1, Location::RequiresRegister());
locations->AddTemp(Location::RequiresRegister());
@@ -5234,11 +5341,12 @@ void LocationsBuilderARMVIXL::HandleShift(HBinaryOperation* op) {
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall);
+ HInstruction* shift = op->InputAt(1);
switch (op->GetResultType()) {
case DataType::Type::kInt32: {
locations->SetInAt(0, Location::RequiresRegister());
- if (op->InputAt(1)->IsConstant()) {
- locations->SetInAt(1, Location::ConstantLocation(op->InputAt(1)->AsConstant()));
+ if (shift->IsConstant()) {
+ locations->SetInAt(1, Location::ConstantLocation(shift));
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
} else {
locations->SetInAt(1, Location::RequiresRegister());
@@ -5250,8 +5358,8 @@ void LocationsBuilderARMVIXL::HandleShift(HBinaryOperation* op) {
}
case DataType::Type::kInt64: {
locations->SetInAt(0, Location::RequiresRegister());
- if (op->InputAt(1)->IsConstant()) {
- locations->SetInAt(1, Location::ConstantLocation(op->InputAt(1)->AsConstant()));
+ if (shift->IsConstant()) {
+ locations->SetInAt(1, Location::ConstantLocation(shift));
// For simplicity, use kOutputOverlap even though we only require that low registers
// don't clash with high registers which the register allocator currently guarantees.
locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
@@ -5727,8 +5835,9 @@ void InstructionCodeGeneratorARMVIXL::GenerateWideAtomicStore(vixl32::Register a
__ CompareAndBranchIfNonZero(temp1, &fail);
}
-void LocationsBuilderARMVIXL::HandleFieldSet(
- HInstruction* instruction, const FieldInfo& field_info) {
+void LocationsBuilderARMVIXL::HandleFieldSet(HInstruction* instruction,
+ const FieldInfo& field_info,
+ WriteBarrierKind write_barrier_kind) {
DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
LocationSummary* locations =
@@ -5751,8 +5860,12 @@ void LocationsBuilderARMVIXL::HandleFieldSet(
// Temporary registers for the write barrier.
// TODO: consider renaming StoreNeedsWriteBarrier to StoreNeedsGCMark.
if (needs_write_barrier) {
- locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
- locations->AddTemp(Location::RequiresRegister());
+ if (write_barrier_kind != WriteBarrierKind::kDontEmit) {
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ } else if (kPoisonHeapReferences) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
} else if (generate_volatile) {
// ARM encoding have some additional constraints for ldrexd/strexd:
// - registers need to be consecutive
@@ -5773,7 +5886,8 @@ void LocationsBuilderARMVIXL::HandleFieldSet(
void InstructionCodeGeneratorARMVIXL::HandleFieldSet(HInstruction* instruction,
const FieldInfo& field_info,
- bool value_can_be_null) {
+ bool value_can_be_null,
+ WriteBarrierKind write_barrier_kind) {
DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
LocationSummary* locations = instruction->GetLocations();
@@ -5889,10 +6003,16 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldSet(HInstruction* instruction,
UNREACHABLE();
}
- if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
+ if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1)) &&
+ write_barrier_kind != WriteBarrierKind::kDontEmit) {
vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
vixl32::Register card = RegisterFrom(locations->GetTemp(1));
- codegen_->MarkGCCard(temp, card, base, RegisterFrom(value), value_can_be_null);
+ codegen_->MarkGCCard(
+ temp,
+ card,
+ base,
+ RegisterFrom(value),
+ value_can_be_null && write_barrier_kind == WriteBarrierKind::kEmitWithNullCheck);
}
if (is_volatile) {
@@ -5911,7 +6031,7 @@ void LocationsBuilderARMVIXL::HandleFieldGet(HInstruction* instruction,
instruction->IsPredicatedInstanceFieldGet());
bool object_field_get_with_read_barrier =
- kEmitCompilerReadBarrier && (field_info.GetFieldType() == DataType::Type::kReference);
+ gUseReadBarrier && (field_info.GetFieldType() == DataType::Type::kReference);
bool is_predicated = instruction->IsPredicatedInstanceFieldGet();
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(instruction,
@@ -5975,7 +6095,7 @@ Location LocationsBuilderARMVIXL::ArithmeticZeroOrFpuRegister(HInstruction* inpu
DCHECK(DataType::IsFloatingPointType(input->GetType())) << input->GetType();
if ((input->IsFloatConstant() && (input->AsFloatConstant()->IsArithmeticZero())) ||
(input->IsDoubleConstant() && (input->AsDoubleConstant()->IsArithmeticZero()))) {
- return Location::ConstantLocation(input->AsConstant());
+ return Location::ConstantLocation(input);
} else {
return Location::RequiresFpuRegister();
}
@@ -5986,7 +6106,7 @@ Location LocationsBuilderARMVIXL::ArmEncodableConstantOrRegister(HInstruction* c
DCHECK(!DataType::IsFloatingPointType(constant->GetType()));
if (constant->IsConstant() &&
CanEncodeConstantAsImmediate(constant->AsConstant(), opcode)) {
- return Location::ConstantLocation(constant->AsConstant());
+ return Location::ConstantLocation(constant);
}
return Location::RequiresRegister();
}
@@ -6082,7 +6202,7 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction,
case DataType::Type::kReference: {
// /* HeapReference<Object> */ out = *(base + offset)
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
Location maybe_temp = (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location();
// Note that a potential implicit null check is handled in this
// CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier call.
@@ -6165,11 +6285,14 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction,
}
void LocationsBuilderARMVIXL::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
- HandleFieldSet(instruction, instruction->GetFieldInfo());
+ HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind());
}
void InstructionCodeGeneratorARMVIXL::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
- HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
+ HandleFieldSet(instruction,
+ instruction->GetFieldInfo(),
+ instruction->GetValueCanBeNull(),
+ instruction->GetWriteBarrierKind());
}
void LocationsBuilderARMVIXL::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
@@ -6202,11 +6325,14 @@ void InstructionCodeGeneratorARMVIXL::VisitStaticFieldGet(HStaticFieldGet* instr
}
void LocationsBuilderARMVIXL::VisitStaticFieldSet(HStaticFieldSet* instruction) {
- HandleFieldSet(instruction, instruction->GetFieldInfo());
+ HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind());
}
void InstructionCodeGeneratorARMVIXL::VisitStaticFieldSet(HStaticFieldSet* instruction) {
- HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
+ HandleFieldSet(instruction,
+ instruction->GetFieldInfo(),
+ instruction->GetValueCanBeNull(),
+ instruction->GetWriteBarrierKind());
}
void LocationsBuilderARMVIXL::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
@@ -6386,7 +6512,7 @@ void CodeGeneratorARMVIXL::StoreToShiftedRegOffset(DataType::Type type,
void LocationsBuilderARMVIXL::VisitArrayGet(HArrayGet* instruction) {
bool object_array_get_with_read_barrier =
- kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
+ gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference);
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(instruction,
object_array_get_with_read_barrier
@@ -6534,14 +6660,14 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) {
// The read barrier instrumentation of object ArrayGet
// instructions does not support the HIntermediateAddress
// instruction.
- DCHECK(!(has_intermediate_address && kEmitCompilerReadBarrier));
+ DCHECK(!(has_intermediate_address && gUseReadBarrier));
static_assert(
sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
"art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
// /* HeapReference<Object> */ out =
// *(obj + data_offset + index * sizeof(HeapReference<Object>))
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
// Note that a potential implicit null check is handled in this
// CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier call.
DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0)));
@@ -6688,8 +6814,10 @@ void LocationsBuilderARMVIXL::VisitArraySet(HArraySet* instruction) {
locations->SetInAt(2, Location::RequiresRegister());
}
if (needs_write_barrier) {
- // Temporary registers for the write barrier.
- locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too.
+ // Temporary registers for the write barrier or register poisoning.
+ // TODO(solanes): We could reduce the temp usage but it requires some non-trivial refactoring of
+ // InstructionCodeGeneratorARMVIXL::VisitArraySet.
+ locations->AddTemp(Location::RequiresRegister());
locations->AddTemp(Location::RequiresRegister());
}
}
@@ -6841,7 +6969,11 @@ void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) {
}
}
- codegen_->MarkGCCard(temp1, temp2, array, value, /* value_can_be_null= */ false);
+ if (instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit) {
+ DCHECK_EQ(instruction->GetWriteBarrierKind(), WriteBarrierKind::kEmitNoNullCheck)
+ << " Already null checked so we shouldn't do it again.";
+ codegen_->MarkGCCard(temp1, temp2, array, value, /* emit_null_check= */ false);
+ }
if (can_value_be_null) {
DCHECK(do_store.IsReferenced());
@@ -7025,10 +7157,10 @@ void LocationsBuilderARMVIXL::VisitBoundsCheck(HBoundsCheck* instruction) {
// locations.
bool both_const = index->IsConstant() && length->IsConstant();
locations->SetInAt(0, both_const
- ? Location::ConstantLocation(index->AsConstant())
+ ? Location::ConstantLocation(index)
: ArmEncodableConstantOrRegister(index, CMP));
locations->SetInAt(1, both_const
- ? Location::ConstantLocation(length->AsConstant())
+ ? Location::ConstantLocation(length)
: ArmEncodableConstantOrRegister(length, CMP));
}
@@ -7072,9 +7204,9 @@ void CodeGeneratorARMVIXL::MarkGCCard(vixl32::Register temp,
vixl32::Register card,
vixl32::Register object,
vixl32::Register value,
- bool value_can_be_null) {
+ bool emit_null_check) {
vixl32::Label is_null;
- if (value_can_be_null) {
+ if (emit_null_check) {
__ CompareAndBranchIfZero(value, &is_null, /* is_far_target=*/ false);
}
// Load the address of the card table into `card`.
@@ -7097,7 +7229,7 @@ void CodeGeneratorARMVIXL::MarkGCCard(vixl32::Register temp,
// of the card to mark; and 2. to load the `kCardDirty` value) saves a load
// (no need to explicitly load `kCardDirty` as an immediate value).
__ Strb(card, MemOperand(card, temp));
- if (value_can_be_null) {
+ if (emit_null_check) {
__ Bind(&is_null);
}
}
@@ -7459,7 +7591,7 @@ void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) {
load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
load_kind == HLoadClass::LoadKind::kBssEntryPackage);
- const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
+ const bool requires_read_barrier = gUseReadBarrier && !cls->IsInBootImage();
LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall;
@@ -7473,7 +7605,7 @@ void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) {
}
locations->SetOut(Location::RequiresRegister());
if (load_kind == HLoadClass::LoadKind::kBssEntry) {
- if (!kUseReadBarrier || kUseBakerReadBarrier) {
+ if (!gUseReadBarrier || kUseBakerReadBarrier) {
// Rely on the type resolution or initialization and marking to save everything we need.
locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
} else {
@@ -7501,7 +7633,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_
const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
? kWithoutReadBarrier
- : kCompilerReadBarrierOption;
+ : gCompilerReadBarrierOption;
bool generate_null_check = false;
switch (load_kind) {
case HLoadClass::LoadKind::kReferrersClass: {
@@ -7622,12 +7754,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateClassInitializationCheck(
LoadClassSlowPathARMVIXL* slow_path, vixl32::Register class_reg) {
UseScratchRegisterScope temps(GetVIXLAssembler());
vixl32::Register temp = temps.Acquire();
- constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
- constexpr uint32_t shifted_visibly_initialized_value =
- enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << status_lsb_position;
-
- const size_t status_offset = mirror::Class::StatusOffset().SizeValue();
- GetAssembler()->LoadFromOffset(kLoadWord, temp, class_reg, status_offset);
+ __ Ldrb(temp, MemOperand(class_reg, status_byte_offset));
__ Cmp(temp, shifted_visibly_initialized_value);
__ B(lo, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
@@ -7721,7 +7848,7 @@ void LocationsBuilderARMVIXL::VisitLoadString(HLoadString* load) {
} else {
locations->SetOut(Location::RequiresRegister());
if (load_kind == HLoadString::LoadKind::kBssEntry) {
- if (!kUseReadBarrier || kUseBakerReadBarrier) {
+ if (!gUseReadBarrier || kUseBakerReadBarrier) {
// Rely on the pResolveString and marking to save everything we need, including temps.
locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
} else {
@@ -7760,7 +7887,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE
codegen_->EmitMovwMovtPlaceholder(labels, out);
// All aligned loads are implicitly atomic consume operations on ARM.
codegen_->GenerateGcRootFieldLoad(
- load, out_loc, out, /*offset=*/ 0, kCompilerReadBarrierOption);
+ load, out_loc, out, /*offset=*/ 0, gCompilerReadBarrierOption);
LoadStringSlowPathARMVIXL* slow_path =
new (codegen_->GetScopedAllocator()) LoadStringSlowPathARMVIXL(load);
codegen_->AddSlowPath(slow_path);
@@ -7781,7 +7908,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE
load->GetString()));
// /* GcRoot<mirror::String> */ out = *out
codegen_->GenerateGcRootFieldLoad(
- load, out_loc, out, /*offset=*/ 0, kCompilerReadBarrierOption);
+ load, out_loc, out, /*offset=*/ 0, gCompilerReadBarrierOption);
return;
}
default:
@@ -7838,7 +7965,7 @@ void InstructionCodeGeneratorARMVIXL::VisitThrow(HThrow* instruction) {
// Temp is used for read barrier.
static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
- if (kEmitCompilerReadBarrier &&
+ if (gUseReadBarrier &&
(kUseBakerReadBarrier ||
type_check_kind == TypeCheckKind::kAbstractClassCheck ||
type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
@@ -7888,9 +8015,9 @@ void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) {
}
locations->SetInAt(0, Location::RequiresRegister());
if (type_check_kind == TypeCheckKind::kBitstringCheck) {
- locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
- locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
- locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
} else {
locations->SetInAt(1, Location::RequiresRegister());
}
@@ -8185,9 +8312,9 @@ void LocationsBuilderARMVIXL::VisitCheckCast(HCheckCast* instruction) {
new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
locations->SetInAt(0, Location::RequiresRegister());
if (type_check_kind == TypeCheckKind::kBitstringCheck) {
- locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
- locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
- locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
} else {
locations->SetInAt(1, Location::RequiresRegister());
}
@@ -8773,7 +8900,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateReferenceLoadOneRegister(
ReadBarrierOption read_barrier_option) {
vixl32::Register out_reg = RegisterFrom(out);
if (read_barrier_option == kWithReadBarrier) {
- CHECK(kEmitCompilerReadBarrier);
+ CHECK(gUseReadBarrier);
DCHECK(maybe_temp.IsRegister()) << maybe_temp;
if (kUseBakerReadBarrier) {
// Load with fast path based Baker's read barrier.
@@ -8808,7 +8935,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateReferenceLoadTwoRegisters(
vixl32::Register out_reg = RegisterFrom(out);
vixl32::Register obj_reg = RegisterFrom(obj);
if (read_barrier_option == kWithReadBarrier) {
- CHECK(kEmitCompilerReadBarrier);
+ CHECK(gUseReadBarrier);
if (kUseBakerReadBarrier) {
DCHECK(maybe_temp.IsRegister()) << maybe_temp;
// Load with fast path based Baker's read barrier.
@@ -8837,7 +8964,7 @@ void CodeGeneratorARMVIXL::GenerateGcRootFieldLoad(
ReadBarrierOption read_barrier_option) {
vixl32::Register root_reg = RegisterFrom(root);
if (read_barrier_option == kWithReadBarrier) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
if (kUseBakerReadBarrier) {
// Fast path implementation of art::ReadBarrier::BarrierForRoot when
// Baker's read barrier are used.
@@ -8901,7 +9028,7 @@ void CodeGeneratorARMVIXL::GenerateGcRootFieldLoad(
void CodeGeneratorARMVIXL::GenerateIntrinsicCasMoveWithBakerReadBarrier(
vixl::aarch32::Register marked_old_value,
vixl::aarch32::Register old_value) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
DCHECK(kUseBakerReadBarrier);
// Similar to the Baker RB path in GenerateGcRootFieldLoad(), with a MOV instead of LDR.
@@ -8935,7 +9062,7 @@ void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* i
vixl32::Register obj,
const vixl32::MemOperand& src,
bool needs_null_check) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
DCHECK(kUseBakerReadBarrier);
// Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
@@ -9028,7 +9155,7 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(Location ref,
Location index,
Location temp,
bool needs_null_check) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
DCHECK(kUseBakerReadBarrier);
static_assert(
@@ -9094,7 +9221,7 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(Location ref,
void CodeGeneratorARMVIXL::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) {
// The following condition is a compile-time one, so it does not have a run-time cost.
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && kIsDebugBuild) {
+ if (kIsDebugBuild && gUseReadBarrier && kUseBakerReadBarrier) {
// The following condition is a run-time one; it is executed after the
// previous compile-time test, to avoid penalizing non-debug builds.
if (GetCompilerOptions().EmitRunTimeChecksInDebugMode()) {
@@ -9124,7 +9251,7 @@ void CodeGeneratorARMVIXL::GenerateReadBarrierSlow(HInstruction* instruction,
Location obj,
uint32_t offset,
Location index) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
// Insert a slow path based read barrier *after* the reference load.
//
@@ -9150,7 +9277,7 @@ void CodeGeneratorARMVIXL::MaybeGenerateReadBarrierSlow(HInstruction* instructio
Location obj,
uint32_t offset,
Location index) {
- if (kEmitCompilerReadBarrier) {
+ if (gUseReadBarrier) {
// Baker's read barriers shall be handled by the fast path
// (CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier).
DCHECK(!kUseBakerReadBarrier);
@@ -9165,7 +9292,7 @@ void CodeGeneratorARMVIXL::MaybeGenerateReadBarrierSlow(HInstruction* instructio
void CodeGeneratorARMVIXL::GenerateReadBarrierForRootSlow(HInstruction* instruction,
Location out,
Location root) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
// Insert a slow path based read barrier *after* the GC root load.
//
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index 790ad0f8f7..f5abe6951a 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -18,6 +18,7 @@
#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM_VIXL_H_
#include "base/enums.h"
+#include "base/macros.h"
#include "class_root.h"
#include "code_generator.h"
#include "common_arm.h"
@@ -36,7 +37,7 @@
#include "aarch32/macro-assembler-aarch32.h"
#pragma GCC diagnostic pop
-namespace art {
+namespace art HIDDEN {
namespace linker {
class Thumb2RelativePatcherTest;
@@ -84,7 +85,7 @@ static const vixl::aarch32::RegisterList kCoreCalleeSaves = vixl::aarch32::Regis
vixl::aarch32::r6,
vixl::aarch32::r7),
// Do not consider r8 as a callee-save register with Baker read barriers.
- ((kEmitCompilerReadBarrier && kUseBakerReadBarrier)
+ (kReserveMarkingRegister
? vixl::aarch32::RegisterList()
: vixl::aarch32::RegisterList(vixl::aarch32::r8)),
vixl::aarch32::RegisterList(vixl::aarch32::r10,
@@ -118,6 +119,65 @@ class CodeGeneratorARMVIXL;
using VIXLInt32Literal = vixl::aarch32::Literal<int32_t>;
using VIXLUInt32Literal = vixl::aarch32::Literal<uint32_t>;
+#define UNIMPLEMENTED_INTRINSIC_LIST_ARM(V) \
+ V(MathRoundDouble) /* Could be done by changing rounding mode, maybe? */ \
+ V(UnsafeCASLong) /* High register pressure */ \
+ V(SystemArrayCopyChar) \
+ V(LongDivideUnsigned) \
+ V(CRC32Update) \
+ V(CRC32UpdateBytes) \
+ V(CRC32UpdateByteBuffer) \
+ V(FP16ToFloat) \
+ V(FP16ToHalf) \
+ V(FP16Floor) \
+ V(FP16Ceil) \
+ V(FP16Rint) \
+ V(FP16Greater) \
+ V(FP16GreaterEquals) \
+ V(FP16Less) \
+ V(FP16LessEquals) \
+ V(FP16Compare) \
+ V(FP16Min) \
+ V(FP16Max) \
+ V(MathMultiplyHigh) \
+ V(StringStringIndexOf) \
+ V(StringStringIndexOfAfter) \
+ V(StringBufferAppend) \
+ V(StringBufferLength) \
+ V(StringBufferToString) \
+ V(StringBuilderAppendObject) \
+ V(StringBuilderAppendString) \
+ V(StringBuilderAppendCharSequence) \
+ V(StringBuilderAppendCharArray) \
+ V(StringBuilderAppendBoolean) \
+ V(StringBuilderAppendChar) \
+ V(StringBuilderAppendInt) \
+ V(StringBuilderAppendLong) \
+ V(StringBuilderAppendFloat) \
+ V(StringBuilderAppendDouble) \
+ V(StringBuilderLength) \
+ V(StringBuilderToString) \
+ V(SystemArrayCopyByte) \
+ V(SystemArrayCopyInt) \
+ /* 1.8 */ \
+ V(MathFmaDouble) \
+ V(MathFmaFloat) \
+ V(UnsafeGetAndAddInt) \
+ V(UnsafeGetAndAddLong) \
+ V(UnsafeGetAndSetInt) \
+ V(UnsafeGetAndSetLong) \
+ V(UnsafeGetAndSetObject) \
+ V(MethodHandleInvokeExact) \
+ V(MethodHandleInvoke) \
+ /* OpenJDK 11 */ \
+ V(JdkUnsafeCASLong) /* High register pressure */ \
+ V(JdkUnsafeGetAndAddInt) \
+ V(JdkUnsafeGetAndAddLong) \
+ V(JdkUnsafeGetAndSetInt) \
+ V(JdkUnsafeGetAndSetLong) \
+ V(JdkUnsafeGetAndSetObject) \
+ V(JdkUnsafeCompareAndSetLong)
+
class JumpTableARMVIXL : public DeletableArenaObject<kArenaAllocSwitchTable> {
public:
explicit JumpTableARMVIXL(HPackedSwitch* switch_instr)
@@ -309,7 +369,9 @@ class LocationsBuilderARMVIXL : public HGraphVisitor {
void HandleIntegerRotate(LocationSummary* locations);
void HandleLongRotate(LocationSummary* locations);
void HandleShift(HBinaryOperation* operation);
- void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
+ void HandleFieldSet(HInstruction* instruction,
+ const FieldInfo& field_info,
+ WriteBarrierKind write_barrier_kind);
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
Location ArithmeticZeroOrFpuRegister(HInstruction* input);
@@ -378,7 +440,8 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator {
void HandleFieldSet(HInstruction* instruction,
const FieldInfo& field_info,
- bool value_can_be_null);
+ bool value_can_be_null,
+ WriteBarrierKind write_barrier_kind);
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
void GenerateMinMaxInt(LocationSummary* locations, bool is_min);
@@ -542,7 +605,7 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
vixl::aarch32::Register card,
vixl::aarch32::Register object,
vixl::aarch32::Register value,
- bool value_can_be_null);
+ bool emit_null_check);
void GenerateMemoryBarrier(MemBarrierKind kind);
@@ -602,7 +665,6 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
struct PcRelativePatchInfo {
PcRelativePatchInfo(const DexFile* dex_file, uint32_t off_or_idx)
: target_dex_file(dex_file), offset_or_index(off_or_idx) { }
- PcRelativePatchInfo(PcRelativePatchInfo&& other) = default;
// Target dex file or null for .data.bmig.rel.ro patches.
const DexFile* target_dex_file;
diff --git a/compiler/optimizing/code_generator_riscv64.h b/compiler/optimizing/code_generator_riscv64.h
new file mode 100644
index 0000000000..405b39aa0a
--- /dev/null
+++ b/compiler/optimizing/code_generator_riscv64.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_RISCV64_H_
+#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_RISCV64_H_
+
+#include "code_generator.h"
+#include "driver/compiler_options.h"
+
+#endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_RISCV64_H_
diff --git a/compiler/optimizing/code_generator_utils.cc b/compiler/optimizing/code_generator_utils.cc
index abec26464a..99805928e4 100644
--- a/compiler/optimizing/code_generator_utils.cc
+++ b/compiler/optimizing/code_generator_utils.cc
@@ -20,7 +20,7 @@
#include "nodes.h"
-namespace art {
+namespace art HIDDEN {
void CalculateMagicAndShiftForDivRem(int64_t divisor, bool is_long,
int64_t* magic, int* shift) {
diff --git a/compiler/optimizing/code_generator_utils.h b/compiler/optimizing/code_generator_utils.h
index 64665adc15..9d9ab2b118 100644
--- a/compiler/optimizing/code_generator_utils.h
+++ b/compiler/optimizing/code_generator_utils.h
@@ -21,7 +21,9 @@
#include <cstdlib>
#include <limits>
-namespace art {
+#include "base/macros.h"
+
+namespace art HIDDEN {
class HInstruction;
diff --git a/compiler/optimizing/code_generator_vector_arm64_neon.cc b/compiler/optimizing/code_generator_vector_arm64_neon.cc
index 0fe9898635..6b6e25cf0c 100644
--- a/compiler/optimizing/code_generator_vector_arm64_neon.cc
+++ b/compiler/optimizing/code_generator_vector_arm64_neon.cc
@@ -23,7 +23,7 @@
using namespace vixl::aarch64; // NOLINT(build/namespaces)
-namespace art {
+namespace art HIDDEN {
namespace arm64 {
using helpers::DRegisterFrom;
@@ -65,7 +65,7 @@ inline Location NEONEncodableConstantOrRegister(HInstruction* constant,
HInstruction* instr) {
if (constant->IsConstant()
&& NEONCanEncodeConstantAsImmediate(constant->AsConstant(), instr)) {
- return Location::ConstantLocation(constant->AsConstant());
+ return Location::ConstantLocation(constant);
}
return Location::RequiresRegister();
@@ -94,7 +94,7 @@ void LocationsBuilderARM64Neon::VisitVecReplicateScalar(HVecReplicateScalar* ins
case DataType::Type::kFloat64:
if (input->IsConstant() &&
NEONCanEncodeConstantAsImmediate(input->AsConstant(), instruction)) {
- locations->SetInAt(0, Location::ConstantLocation(input->AsConstant()));
+ locations->SetInAt(0, Location::ConstantLocation(input));
locations->SetOut(Location::RequiresFpuRegister());
} else {
locations->SetInAt(0, Location::RequiresFpuRegister());
@@ -881,7 +881,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati
case DataType::Type::kInt32:
case DataType::Type::kInt64:
locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
break;
default:
@@ -1008,13 +1008,13 @@ void LocationsBuilderARM64Neon::VisitVecSetScalars(HVecSetScalars* instruction)
case DataType::Type::kInt16:
case DataType::Type::kInt32:
case DataType::Type::kInt64:
- locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input)
: Location::RequiresRegister());
locations->SetOut(Location::RequiresFpuRegister());
break;
case DataType::Type::kFloat32:
case DataType::Type::kFloat64:
- locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input)
: Location::RequiresFpuRegister());
locations->SetOut(Location::RequiresFpuRegister());
break;
diff --git a/compiler/optimizing/code_generator_vector_arm64_sve.cc b/compiler/optimizing/code_generator_vector_arm64_sve.cc
index 824b6c9476..fe15791d3f 100644
--- a/compiler/optimizing/code_generator_vector_arm64_sve.cc
+++ b/compiler/optimizing/code_generator_vector_arm64_sve.cc
@@ -23,17 +23,14 @@
using namespace vixl::aarch64; // NOLINT(build/namespaces)
-namespace art {
+namespace art HIDDEN {
namespace arm64 {
using helpers::DRegisterFrom;
-using helpers::HeapOperand;
using helpers::InputRegisterAt;
using helpers::Int64FromLocation;
using helpers::LocationFrom;
using helpers::OutputRegister;
-using helpers::QRegisterFrom;
-using helpers::StackOperandFrom;
using helpers::SveStackOperandFrom;
using helpers::VRegisterFrom;
using helpers::ZRegisterFrom;
@@ -67,7 +64,7 @@ static bool SVECanEncodeConstantAsImmediate(HConstant* constant, HInstruction* i
inline Location SVEEncodableConstantOrRegister(HInstruction* constant, HInstruction* instr) {
if (constant->IsConstant()
&& SVECanEncodeConstantAsImmediate(constant->AsConstant(), instr)) {
- return Location::ConstantLocation(constant->AsConstant());
+ return Location::ConstantLocation(constant);
}
return Location::RequiresRegister();
@@ -96,7 +93,7 @@ void LocationsBuilderARM64Sve::VisitVecReplicateScalar(HVecReplicateScalar* inst
case DataType::Type::kFloat64:
if (input->IsConstant() &&
SVECanEncodeConstantAsImmediate(input->AsConstant(), instruction)) {
- locations->SetInAt(0, Location::ConstantLocation(input->AsConstant()));
+ locations->SetInAt(0, Location::ConstantLocation(input));
locations->SetOut(Location::RequiresFpuRegister());
} else {
locations->SetInAt(0, Location::RequiresFpuRegister());
@@ -754,7 +751,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati
case DataType::Type::kInt32:
case DataType::Type::kInt64:
locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
break;
default:
@@ -878,13 +875,13 @@ void LocationsBuilderARM64Sve::VisitVecSetScalars(HVecSetScalars* instruction) {
case DataType::Type::kInt16:
case DataType::Type::kInt32:
case DataType::Type::kInt64:
- locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input)
: Location::RequiresRegister());
locations->SetOut(Location::RequiresFpuRegister());
break;
case DataType::Type::kFloat32:
case DataType::Type::kFloat64:
- locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input)
: Location::RequiresFpuRegister());
locations->SetOut(Location::RequiresFpuRegister());
break;
diff --git a/compiler/optimizing/code_generator_vector_arm_vixl.cc b/compiler/optimizing/code_generator_vector_arm_vixl.cc
index c46f9b7986..e8ecf28386 100644
--- a/compiler/optimizing/code_generator_vector_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_vector_arm_vixl.cc
@@ -20,7 +20,7 @@
namespace vixl32 = vixl::aarch32;
using namespace vixl32; // NOLINT(build/namespaces)
-namespace art {
+namespace art HIDDEN {
namespace arm {
using helpers::DRegisterFrom;
@@ -640,7 +640,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati
case DataType::Type::kInt16:
case DataType::Type::kInt32:
locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
break;
default:
@@ -749,7 +749,7 @@ void LocationsBuilderARMVIXL::VisitVecSetScalars(HVecSetScalars* instruction) {
switch (instruction->GetPackedType()) {
case DataType::Type::kInt32:
- locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input)
: Location::RequiresRegister());
locations->SetOut(Location::RequiresFpuRegister());
break;
diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc
index 9c837dd986..343a6e1af4 100644
--- a/compiler/optimizing/code_generator_vector_x86.cc
+++ b/compiler/optimizing/code_generator_vector_x86.cc
@@ -19,7 +19,7 @@
#include "mirror/array-inl.h"
#include "mirror/string.h"
-namespace art {
+namespace art HIDDEN {
namespace x86 {
// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
@@ -42,13 +42,13 @@ void LocationsBuilderX86::VisitVecReplicateScalar(HVecReplicateScalar* instructi
case DataType::Type::kUint16:
case DataType::Type::kInt16:
case DataType::Type::kInt32:
- locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input)
: Location::RequiresRegister());
locations->SetOut(Location::RequiresFpuRegister());
break;
case DataType::Type::kFloat32:
case DataType::Type::kFloat64:
- locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input)
: Location::RequiresFpuRegister());
locations->SetOut(is_zero ? Location::RequiresFpuRegister()
: Location::SameAsFirstInput());
@@ -981,7 +981,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati
case DataType::Type::kInt32:
case DataType::Type::kInt64:
locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
locations->SetOut(Location::SameAsFirstInput());
break;
default:
@@ -1094,13 +1094,13 @@ void LocationsBuilderX86::VisitVecSetScalars(HVecSetScalars* instruction) {
case DataType::Type::kUint16:
case DataType::Type::kInt16:
case DataType::Type::kInt32:
- locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input)
: Location::RequiresRegister());
locations->SetOut(Location::RequiresFpuRegister());
break;
case DataType::Type::kFloat32:
case DataType::Type::kFloat64:
- locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input)
: Location::RequiresFpuRegister());
locations->SetOut(Location::RequiresFpuRegister());
break;
diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc
index 330bf76a4a..fb6e4e753f 100644
--- a/compiler/optimizing/code_generator_vector_x86_64.cc
+++ b/compiler/optimizing/code_generator_vector_x86_64.cc
@@ -19,7 +19,7 @@
#include "mirror/array-inl.h"
#include "mirror/string.h"
-namespace art {
+namespace art HIDDEN {
namespace x86_64 {
// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
@@ -37,13 +37,13 @@ void LocationsBuilderX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instru
case DataType::Type::kInt16:
case DataType::Type::kInt32:
case DataType::Type::kInt64:
- locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input)
: Location::RequiresRegister());
locations->SetOut(Location::RequiresFpuRegister());
break;
case DataType::Type::kFloat32:
case DataType::Type::kFloat64:
- locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input)
: Location::RequiresFpuRegister());
locations->SetOut(is_zero ? Location::RequiresFpuRegister()
: Location::SameAsFirstInput());
@@ -964,7 +964,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati
case DataType::Type::kInt32:
case DataType::Type::kInt64:
locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
locations->SetOut(Location::SameAsFirstInput());
break;
default:
@@ -1072,13 +1072,13 @@ void LocationsBuilderX86_64::VisitVecSetScalars(HVecSetScalars* instruction) {
case DataType::Type::kInt16:
case DataType::Type::kInt32:
case DataType::Type::kInt64:
- locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input)
: Location::RequiresRegister());
locations->SetOut(Location::RequiresFpuRegister());
break;
case DataType::Type::kFloat32:
case DataType::Type::kFloat64:
- locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input)
: Location::RequiresFpuRegister());
locations->SetOut(Location::RequiresFpuRegister());
break;
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 8c6b8027cd..cb1cecc45a 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -20,7 +20,6 @@
#include "art_method-inl.h"
#include "class_table.h"
#include "code_generator_utils.h"
-#include "compiled_method.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "entrypoints/quick/quick_entrypoints_enum.h"
#include "gc/accounting/card_table.h"
@@ -36,6 +35,7 @@
#include "mirror/array-inl.h"
#include "mirror/class-inl.h"
#include "mirror/var_handle.h"
+#include "optimizing/nodes.h"
#include "scoped_thread_state_change-inl.h"
#include "thread.h"
#include "utils/assembler.h"
@@ -43,7 +43,7 @@
#include "utils/x86/assembler_x86.h"
#include "utils/x86/managed_register_x86.h"
-namespace art {
+namespace art HIDDEN {
template<class MirrorType>
class GcRoot;
@@ -503,7 +503,7 @@ class ReadBarrierMarkSlowPathX86 : public SlowPathCode {
: SlowPathCode(instruction),
ref_(ref),
unpoison_ref_before_marking_(unpoison_ref_before_marking) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
}
const char* GetDescription() const override { return "ReadBarrierMarkSlowPathX86"; }
@@ -590,7 +590,7 @@ class ReadBarrierMarkAndUpdateFieldSlowPathX86 : public SlowPathCode {
field_addr_(field_addr),
unpoison_ref_before_marking_(unpoison_ref_before_marking),
temp_(temp) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
}
const char* GetDescription() const override { return "ReadBarrierMarkAndUpdateFieldSlowPathX86"; }
@@ -744,7 +744,7 @@ class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode {
obj_(obj),
offset_(offset),
index_(index) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
// If `obj` is equal to `out` or `ref`, it means the initial object
// has been overwritten by (or after) the heap object reference load
// to be instrumented, e.g.:
@@ -918,7 +918,7 @@ class ReadBarrierForRootSlowPathX86 : public SlowPathCode {
public:
ReadBarrierForRootSlowPathX86(HInstruction* instruction, Location out, Location root)
: SlowPathCode(instruction), out_(out), root_(root) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
}
void EmitNativeCode(CodeGenerator* codegen) override {
@@ -967,6 +967,9 @@ class MethodEntryExitHooksSlowPathX86 : public SlowPathCode {
(instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook;
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
+ if (instruction_->IsMethodExitHook()) {
+ __ movl(EBX, Immediate(codegen->GetFrameSize()));
+ }
x86_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this);
RestoreLiveRegisters(codegen, locations);
__ jmp(GetExitLabel());
@@ -1103,6 +1106,33 @@ void CodeGeneratorX86::GenerateInvokeRuntime(int32_t entry_point_offset) {
__ fs()->call(Address::Absolute(entry_point_offset));
}
+namespace detail {
+// Mark which intrinsics we don't have handcrafted code for.
+template <Intrinsics T>
+struct IsUnimplemented {
+ bool is_unimplemented = false;
+};
+
+#define TRUE_OVERRIDE(Name) \
+ template <> \
+ struct IsUnimplemented<Intrinsics::k##Name> { \
+ bool is_unimplemented = true; \
+ };
+UNIMPLEMENTED_INTRINSIC_LIST_X86(TRUE_OVERRIDE)
+#undef TRUE_OVERRIDE
+
+#include "intrinsics_list.h"
+static constexpr bool kIsIntrinsicUnimplemented[] = {
+ false, // kNone
+#define IS_UNIMPLEMENTED(Intrinsic, ...) \
+ IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
+ INTRINSICS_LIST(IS_UNIMPLEMENTED)
+#undef IS_UNIMPLEMENTED
+};
+#undef INTRINSICS_LIST
+
+} // namespace detail
+
CodeGeneratorX86::CodeGeneratorX86(HGraph* graph,
const CompilerOptions& compiler_options,
OptimizingCompilerStats* stats)
@@ -1115,7 +1145,8 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph,
| (1 << kFakeReturnRegister),
0,
compiler_options,
- stats),
+ stats,
+ ArrayRef<const bool>(detail::kIsIntrinsicUnimplemented)),
block_labels_(nullptr),
location_builder_(graph, this),
instruction_visitor_(graph, this),
@@ -1197,9 +1228,21 @@ void InstructionCodeGeneratorX86::GenerateMethodEntryExitHook(HInstruction* inst
new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathX86(instruction);
codegen_->AddSlowPath(slow_path);
+ if (instruction->IsMethodExitHook()) {
+ // Check if we are required to check if the caller needs a deoptimization. Strictly speaking it
+ // would be sufficient to check if CheckCallerForDeopt bit is set. Though it is faster to check
+ // if it is just non-zero. kCHA bit isn't used in debuggable runtimes as cha optimization is
+ // disabled in debuggable runtime. The other bit is used when this method itself requires a
+ // deoptimization due to redefinition. So it is safe to just check for non-zero value here.
+ __ cmpl(Address(ESP, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0));
+ __ j(kNotEqual, slow_path->GetEntryLabel());
+ }
+
uint64_t address = reinterpret_cast64<uint64_t>(Runtime::Current()->GetInstrumentation());
- int offset = instrumentation::Instrumentation::NeedsEntryExitHooksOffset().Int32Value();
- __ cmpb(Address::Absolute(address + offset), Immediate(0));
+ MemberOffset offset = instruction->IsMethodExitHook() ?
+ instrumentation::Instrumentation::HaveMethodExitListenersOffset() :
+ instrumentation::Instrumentation::HaveMethodEntryListenersOffset();
+ __ cmpb(Address::Absolute(address + offset.Int32Value()), Immediate(0));
__ j(kNotEqual, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
}
@@ -1261,6 +1304,44 @@ void CodeGeneratorX86::MaybeIncrementHotness(bool is_frame_entry) {
void CodeGeneratorX86::GenerateFrameEntry() {
__ cfi().SetCurrentCFAOffset(kX86WordSize); // return address
+
+ // Check if we need to generate the clinit check. We will jump to the
+ // resolution stub if the class is not initialized and the executing thread is
+ // not the thread initializing it.
+ // We do this before constructing the frame to get the correct stack trace if
+ // an exception is thrown.
+ if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) {
+ NearLabel continue_execution, resolution;
+ // We'll use EBP as temporary.
+ __ pushl(EBP);
+ // Check if we're visibly initialized.
+
+ // We don't emit a read barrier here to save on code size. We rely on the
+ // resolution trampoline to do a suspend check before re-entering this code.
+ __ movl(EBP, Address(kMethodRegisterArgument, ArtMethod::DeclaringClassOffset().Int32Value()));
+ __ cmpb(Address(EBP, status_byte_offset), Immediate(shifted_visibly_initialized_value));
+ __ j(kAboveEqual, &continue_execution);
+
+ // Check if we're initializing and the thread initializing is the one
+ // executing the code.
+ __ cmpb(Address(EBP, status_byte_offset), Immediate(shifted_initializing_value));
+ __ j(kBelow, &resolution);
+
+ __ movl(EBP, Address(EBP, mirror::Class::ClinitThreadIdOffset().Int32Value()));
+ __ fs()->cmpl(EBP, Address::Absolute(Thread::TidOffset<kX86PointerSize>().Int32Value()));
+ __ j(kEqual, &continue_execution);
+ __ Bind(&resolution);
+
+ __ popl(EBP);
+ // Jump to the resolution stub.
+ ThreadOffset32 entrypoint_offset =
+ GetThreadOffset<kX86PointerSize>(kQuickQuickResolutionTrampoline);
+ __ fs()->jmp(Address::Absolute(entrypoint_offset));
+
+ __ Bind(&continue_execution);
+ __ popl(EBP);
+ }
+
__ Bind(&frame_entry_label_);
bool skip_overflow_check =
IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86);
@@ -1619,7 +1700,7 @@ void CodeGeneratorX86::LoadFromMemoryNoBarrier(DataType::Type dst_type,
__ movsd(dst.AsFpuRegister<XmmRegister>(), src);
break;
case DataType::Type::kReference:
- DCHECK(!kEmitCompilerReadBarrier);
+ DCHECK(!gUseReadBarrier);
__ movl(dst.AsRegister<Register>(), src);
__ MaybeUnpoisonHeapReference(dst.AsRegister<Register>());
break;
@@ -2230,12 +2311,12 @@ void InstructionCodeGeneratorX86::VisitSelect(HSelect* select) {
}
}
-void LocationsBuilderX86::VisitNativeDebugInfo(HNativeDebugInfo* info) {
- new (GetGraph()->GetAllocator()) LocationSummary(info);
+void LocationsBuilderX86::VisitNop(HNop* nop) {
+ new (GetGraph()->GetAllocator()) LocationSummary(nop);
}
-void InstructionCodeGeneratorX86::VisitNativeDebugInfo(HNativeDebugInfo*) {
- // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
+void InstructionCodeGeneratorX86::VisitNop(HNop*) {
+ // The environment recording already happened in CodeGenerator::Compile.
}
void CodeGeneratorX86::IncreaseFrame(size_t adjustment) {
@@ -2913,7 +2994,7 @@ void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) {
case DataType::Type::kInt64: {
HInstruction* input = conversion->InputAt(0);
Location input_location = input->IsConstant()
- ? Location::ConstantLocation(input->AsConstant())
+ ? Location::ConstantLocation(input)
: Location::RegisterPairLocation(EAX, EDX);
locations->SetInAt(0, input_location);
// Make the output overlap to please the register allocator. This greatly simplifies
@@ -5689,13 +5770,10 @@ void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linke
DCHECK_EQ(size, linker_patches->size());
}
-void CodeGeneratorX86::MarkGCCard(Register temp,
- Register card,
- Register object,
- Register value,
- bool value_can_be_null) {
+void CodeGeneratorX86::MarkGCCard(
+ Register temp, Register card, Register object, Register value, bool emit_null_check) {
NearLabel is_null;
- if (value_can_be_null) {
+ if (emit_null_check) {
__ testl(value, value);
__ j(kEqual, &is_null);
}
@@ -5720,7 +5798,7 @@ void CodeGeneratorX86::MarkGCCard(Register temp,
// (no need to explicitly load `kCardDirty` as an immediate value).
__ movb(Address(temp, card, TIMES_1, 0),
X86ManagedRegister::FromCpuRegister(card).AsByteRegister());
- if (value_can_be_null) {
+ if (emit_null_check) {
__ Bind(&is_null);
}
}
@@ -5731,11 +5809,11 @@ void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldI
instruction->IsPredicatedInstanceFieldGet());
bool object_field_get_with_read_barrier =
- kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
+ gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference);
bool is_predicated = instruction->IsPredicatedInstanceFieldGet();
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(instruction,
- kEmitCompilerReadBarrier
+ gUseReadBarrier
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall);
if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
@@ -5793,7 +5871,7 @@ void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction,
if (load_type == DataType::Type::kReference) {
// /* HeapReference<Object> */ out = *(base + offset)
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
// Note that a potential implicit null check is handled in this
// CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call.
codegen_->GenerateFieldLoadWithBakerReadBarrier(
@@ -5824,7 +5902,9 @@ void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction,
}
}
-void LocationsBuilderX86::HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info) {
+void LocationsBuilderX86::HandleFieldSet(HInstruction* instruction,
+ const FieldInfo& field_info,
+ WriteBarrierKind write_barrier_kind) {
DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
LocationSummary* locations =
@@ -5861,10 +5941,13 @@ void LocationsBuilderX86::HandleFieldSet(HInstruction* instruction, const FieldI
locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
- // Temporary registers for the write barrier.
- locations->AddTemp(Location::RequiresRegister()); // May be used for reference poisoning too.
- // Ensure the card is in a byte register.
- locations->AddTemp(Location::RegisterLocation(ECX));
+ if (write_barrier_kind != WriteBarrierKind::kDontEmit) {
+ locations->AddTemp(Location::RequiresRegister());
+ // Ensure the card is in a byte register.
+ locations->AddTemp(Location::RegisterLocation(ECX));
+ } else if (kPoisonHeapReferences) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
}
}
@@ -5875,7 +5958,8 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction,
Address field_addr,
Register base,
bool is_volatile,
- bool value_can_be_null) {
+ bool value_can_be_null,
+ WriteBarrierKind write_barrier_kind) {
LocationSummary* locations = instruction->GetLocations();
Location value = locations->InAt(value_index);
bool needs_write_barrier =
@@ -5988,10 +6072,15 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction,
codegen_->MaybeRecordImplicitNullCheck(instruction);
}
- if (needs_write_barrier) {
+ if (needs_write_barrier && write_barrier_kind != WriteBarrierKind::kDontEmit) {
Register temp = locations->GetTemp(0).AsRegister<Register>();
Register card = locations->GetTemp(1).AsRegister<Register>();
- codegen_->MarkGCCard(temp, card, base, value.AsRegister<Register>(), value_can_be_null);
+ codegen_->MarkGCCard(
+ temp,
+ card,
+ base,
+ value.AsRegister<Register>(),
+ value_can_be_null && write_barrier_kind == WriteBarrierKind::kEmitWithNullCheck);
}
if (is_volatile) {
@@ -6001,7 +6090,8 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction,
void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction,
const FieldInfo& field_info,
- bool value_can_be_null) {
+ bool value_can_be_null,
+ WriteBarrierKind write_barrier_kind) {
DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
LocationSummary* locations = instruction->GetLocations();
@@ -6026,7 +6116,8 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction,
field_addr,
base,
is_volatile,
- value_can_be_null);
+ value_can_be_null,
+ write_barrier_kind);
if (is_predicated) {
__ Bind(&pred_is_null);
@@ -6042,19 +6133,25 @@ void InstructionCodeGeneratorX86::VisitStaticFieldGet(HStaticFieldGet* instructi
}
void LocationsBuilderX86::VisitStaticFieldSet(HStaticFieldSet* instruction) {
- HandleFieldSet(instruction, instruction->GetFieldInfo());
+ HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind());
}
void InstructionCodeGeneratorX86::VisitStaticFieldSet(HStaticFieldSet* instruction) {
- HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
+ HandleFieldSet(instruction,
+ instruction->GetFieldInfo(),
+ instruction->GetValueCanBeNull(),
+ instruction->GetWriteBarrierKind());
}
void LocationsBuilderX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
- HandleFieldSet(instruction, instruction->GetFieldInfo());
+ HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind());
}
void InstructionCodeGeneratorX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
- HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
+ HandleFieldSet(instruction,
+ instruction->GetFieldInfo(),
+ instruction->GetValueCanBeNull(),
+ instruction->GetWriteBarrierKind());
}
void LocationsBuilderX86::VisitPredicatedInstanceFieldGet(
@@ -6202,7 +6299,7 @@ void InstructionCodeGeneratorX86::VisitNullCheck(HNullCheck* instruction) {
void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) {
bool object_array_get_with_read_barrier =
- kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
+ gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference);
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(instruction,
object_array_get_with_read_barrier
@@ -6244,7 +6341,7 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
"art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
// /* HeapReference<Object> */ out =
// *(obj + data_offset + index * sizeof(HeapReference<Object>))
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
// Note that a potential implicit null check is handled in this
// CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call.
codegen_->GenerateArrayLoadWithBakerReadBarrier(
@@ -6315,10 +6412,12 @@ void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) {
locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
}
if (needs_write_barrier) {
- // Temporary registers for the write barrier.
- locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too.
- // Ensure the card is in a byte register.
- locations->AddTemp(Location::RegisterLocation(ECX));
+ // Used by reference poisoning or emitting write barrier.
+ locations->AddTemp(Location::RequiresRegister());
+ if (instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit) {
+ // Only used when emitting a write barrier. Ensure the card is in a byte register.
+ locations->AddTemp(Location::RegisterLocation(ECX));
+ }
}
}
@@ -6435,9 +6534,16 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) {
}
}
- Register card = locations->GetTemp(1).AsRegister<Register>();
- codegen_->MarkGCCard(
- temp, card, array, value.AsRegister<Register>(), /* value_can_be_null= */ false);
+ if (instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit) {
+ DCHECK_EQ(instruction->GetWriteBarrierKind(), WriteBarrierKind::kEmitNoNullCheck)
+ << " Already null checked so we shouldn't do it again.";
+ Register card = locations->GetTemp(1).AsRegister<Register>();
+ codegen_->MarkGCCard(temp,
+ card,
+ array,
+ value.AsRegister<Register>(),
+ /* emit_null_check= */ false);
+ }
if (can_value_be_null) {
DCHECK(do_store.IsLinked());
@@ -7057,7 +7163,7 @@ void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) {
load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
load_kind == HLoadClass::LoadKind::kBssEntryPackage);
- const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
+ const bool requires_read_barrier = gUseReadBarrier && !cls->IsInBootImage();
LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall;
@@ -7071,7 +7177,7 @@ void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) {
}
locations->SetOut(Location::RequiresRegister());
if (call_kind == LocationSummary::kCallOnSlowPath && cls->HasPcRelativeLoadKind()) {
- if (!kUseReadBarrier || kUseBakerReadBarrier) {
+ if (!gUseReadBarrier || kUseBakerReadBarrier) {
// Rely on the type resolution and/or initialization to save everything.
locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
} else {
@@ -7109,7 +7215,7 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE
bool generate_null_check = false;
const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
? kWithoutReadBarrier
- : kCompilerReadBarrierOption;
+ : gCompilerReadBarrierOption;
switch (load_kind) {
case HLoadClass::LoadKind::kReferrersClass: {
DCHECK(!cls->CanCallRuntime());
@@ -7233,12 +7339,6 @@ void InstructionCodeGeneratorX86::VisitClinitCheck(HClinitCheck* check) {
void InstructionCodeGeneratorX86::GenerateClassInitializationCheck(
SlowPathCode* slow_path, Register class_reg) {
- constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
- const size_t status_byte_offset =
- mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
- constexpr uint32_t shifted_visibly_initialized_value =
- enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte);
-
__ cmpb(Address(class_reg, status_byte_offset), Immediate(shifted_visibly_initialized_value));
__ j(kBelow, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
@@ -7296,7 +7396,7 @@ void LocationsBuilderX86::VisitLoadString(HLoadString* load) {
} else {
locations->SetOut(Location::RequiresRegister());
if (load_kind == HLoadString::LoadKind::kBssEntry) {
- if (!kUseReadBarrier || kUseBakerReadBarrier) {
+ if (!gUseReadBarrier || kUseBakerReadBarrier) {
// Rely on the pResolveString to save everything.
locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
} else {
@@ -7345,7 +7445,7 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_S
Address address = Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset);
Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
// /* GcRoot<mirror::String> */ out = *address /* PC-relative */
- GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
+ GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, gCompilerReadBarrierOption);
// No need for memory fence, thanks to the x86 memory model.
SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86(load);
codegen_->AddSlowPath(slow_path);
@@ -7365,7 +7465,7 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_S
Label* fixup_label = codegen_->NewJitRootStringPatch(
load->GetDexFile(), load->GetStringIndex(), load->GetString());
// /* GcRoot<mirror::String> */ out = *address
- GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
+ GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, gCompilerReadBarrierOption);
return;
}
default:
@@ -7416,7 +7516,7 @@ void InstructionCodeGeneratorX86::VisitThrow(HThrow* instruction) {
// Temp is used for read barrier.
static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
- if (kEmitCompilerReadBarrier &&
+ if (gUseReadBarrier &&
!kUseBakerReadBarrier &&
(type_check_kind == TypeCheckKind::kAbstractClassCheck ||
type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
@@ -7466,9 +7566,9 @@ void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) {
}
locations->SetInAt(0, Location::RequiresRegister());
if (type_check_kind == TypeCheckKind::kBitstringCheck) {
- locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
- locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
- locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
} else {
locations->SetInAt(1, Location::Any());
}
@@ -7734,9 +7834,9 @@ void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) {
// a memory address.
locations->SetInAt(1, Location::RequiresRegister());
} else if (type_check_kind == TypeCheckKind::kBitstringCheck) {
- locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
- locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
- locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
} else {
locations->SetInAt(1, Location::Any());
}
@@ -8188,7 +8288,7 @@ void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister(
ReadBarrierOption read_barrier_option) {
Register out_reg = out.AsRegister<Register>();
if (read_barrier_option == kWithReadBarrier) {
- CHECK(kEmitCompilerReadBarrier);
+ CHECK(gUseReadBarrier);
if (kUseBakerReadBarrier) {
// Load with fast path based Baker's read barrier.
// /* HeapReference<Object> */ out = *(out + offset)
@@ -8222,7 +8322,7 @@ void InstructionCodeGeneratorX86::GenerateReferenceLoadTwoRegisters(
Register out_reg = out.AsRegister<Register>();
Register obj_reg = obj.AsRegister<Register>();
if (read_barrier_option == kWithReadBarrier) {
- CHECK(kEmitCompilerReadBarrier);
+ CHECK(gUseReadBarrier);
if (kUseBakerReadBarrier) {
// Load with fast path based Baker's read barrier.
// /* HeapReference<Object> */ out = *(obj + offset)
@@ -8250,7 +8350,7 @@ void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(
ReadBarrierOption read_barrier_option) {
Register root_reg = root.AsRegister<Register>();
if (read_barrier_option == kWithReadBarrier) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
if (kUseBakerReadBarrier) {
// Fast path implementation of art::ReadBarrier::BarrierForRoot when
// Baker's read barrier are used:
@@ -8314,7 +8414,7 @@ void CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instr
Register obj,
uint32_t offset,
bool needs_null_check) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
DCHECK(kUseBakerReadBarrier);
// /* HeapReference<Object> */ ref = *(obj + offset)
@@ -8328,7 +8428,7 @@ void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instr
uint32_t data_offset,
Location index,
bool needs_null_check) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
DCHECK(kUseBakerReadBarrier);
static_assert(
@@ -8347,7 +8447,7 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
bool needs_null_check,
bool always_update_field,
Register* temp) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
DCHECK(kUseBakerReadBarrier);
// In slow path based read barriers, the read barrier call is
@@ -8428,7 +8528,7 @@ void CodeGeneratorX86::GenerateReadBarrierSlow(HInstruction* instruction,
Location obj,
uint32_t offset,
Location index) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
// Insert a slow path based read barrier *after* the reference load.
//
@@ -8455,7 +8555,7 @@ void CodeGeneratorX86::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
Location obj,
uint32_t offset,
Location index) {
- if (kEmitCompilerReadBarrier) {
+ if (gUseReadBarrier) {
// Baker's read barriers shall be handled by the fast path
// (CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier).
DCHECK(!kUseBakerReadBarrier);
@@ -8470,7 +8570,7 @@ void CodeGeneratorX86::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
void CodeGeneratorX86::GenerateReadBarrierForRootSlow(HInstruction* instruction,
Location out,
Location root) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
// Insert a slow path based read barrier *after* the GC root load.
//
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 75c5cebb5e..d27155f31d 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -19,6 +19,7 @@
#include "arch/x86/instruction_set_features_x86.h"
#include "base/enums.h"
+#include "base/macros.h"
#include "code_generator.h"
#include "dex/dex_file_types.h"
#include "driver/compiler_options.h"
@@ -26,7 +27,7 @@
#include "parallel_move_resolver.h"
#include "utils/x86/assembler_x86.h"
-namespace art {
+namespace art HIDDEN {
namespace x86 {
// Use a local definition to prevent copying mistakes.
@@ -47,6 +48,61 @@ static constexpr XmmRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1, XMM
static constexpr size_t kRuntimeParameterFpuRegistersLength =
arraysize(kRuntimeParameterFpuRegisters);
+#define UNIMPLEMENTED_INTRINSIC_LIST_X86(V) \
+ V(MathRoundDouble) \
+ V(FloatIsInfinite) \
+ V(DoubleIsInfinite) \
+ V(IntegerHighestOneBit) \
+ V(LongHighestOneBit) \
+ V(LongDivideUnsigned) \
+ V(CRC32Update) \
+ V(CRC32UpdateBytes) \
+ V(CRC32UpdateByteBuffer) \
+ V(FP16ToFloat) \
+ V(FP16ToHalf) \
+ V(FP16Floor) \
+ V(FP16Ceil) \
+ V(FP16Rint) \
+ V(FP16Greater) \
+ V(FP16GreaterEquals) \
+ V(FP16Less) \
+ V(FP16LessEquals) \
+ V(FP16Compare) \
+ V(FP16Min) \
+ V(FP16Max) \
+ V(MathMultiplyHigh) \
+ V(StringStringIndexOf) \
+ V(StringStringIndexOfAfter) \
+ V(StringBufferAppend) \
+ V(StringBufferLength) \
+ V(StringBufferToString) \
+ V(StringBuilderAppendObject) \
+ V(StringBuilderAppendString) \
+ V(StringBuilderAppendCharSequence) \
+ V(StringBuilderAppendCharArray) \
+ V(StringBuilderAppendBoolean) \
+ V(StringBuilderAppendChar) \
+ V(StringBuilderAppendInt) \
+ V(StringBuilderAppendLong) \
+ V(StringBuilderAppendFloat) \
+ V(StringBuilderAppendDouble) \
+ V(StringBuilderLength) \
+ V(StringBuilderToString) \
+ /* 1.8 */ \
+ V(UnsafeGetAndAddInt) \
+ V(UnsafeGetAndAddLong) \
+ V(UnsafeGetAndSetInt) \
+ V(UnsafeGetAndSetLong) \
+ V(UnsafeGetAndSetObject) \
+ V(MethodHandleInvokeExact) \
+ V(MethodHandleInvoke) \
+ /* OpenJDK 11 */ \
+ V(JdkUnsafeGetAndAddInt) \
+ V(JdkUnsafeGetAndAddLong) \
+ V(JdkUnsafeGetAndSetInt) \
+ V(JdkUnsafeGetAndSetLong) \
+ V(JdkUnsafeGetAndSetObject)
+
class InvokeRuntimeCallingConvention : public CallingConvention<Register, XmmRegister> {
public:
InvokeRuntimeCallingConvention()
@@ -196,7 +252,9 @@ class LocationsBuilderX86 : public HGraphVisitor {
void HandleInvoke(HInvoke* invoke);
void HandleCondition(HCondition* condition);
void HandleShift(HBinaryOperation* instruction);
- void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
+ void HandleFieldSet(HInstruction* instruction,
+ const FieldInfo& field_info,
+ WriteBarrierKind write_barrier_kind);
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
bool CpuHasAvxFeatureFlag();
bool CpuHasAvx2FeatureFlag();
@@ -249,7 +307,8 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator {
Address field_addr,
Register base,
bool is_volatile,
- bool value_can_be_null);
+ bool value_can_be_null,
+ WriteBarrierKind write_barrier_kind);
private:
// Generate code for the given suspend check. If not null, `successor`
@@ -279,7 +338,8 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator {
void HandleFieldSet(HInstruction* instruction,
const FieldInfo& field_info,
- bool value_can_be_null);
+ bool value_can_be_null,
+ WriteBarrierKind write_barrier_kind);
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
// Generate a heap reference load using one register `out`:
@@ -519,11 +579,8 @@ class CodeGeneratorX86 : public CodeGenerator {
void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) override;
// Emit a write barrier.
- void MarkGCCard(Register temp,
- Register card,
- Register object,
- Register value,
- bool value_can_be_null);
+ void MarkGCCard(
+ Register temp, Register card, Register object, Register value, bool emit_null_check);
void GenerateMemoryBarrier(MemBarrierKind kind);
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 511917a735..eea6b204fa 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -21,7 +21,6 @@
#include "class_root-inl.h"
#include "class_table.h"
#include "code_generator_utils.h"
-#include "compiled_method.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "gc/accounting/card_table.h"
#include "gc/space/image_space.h"
@@ -37,6 +36,7 @@
#include "mirror/class-inl.h"
#include "mirror/object_reference.h"
#include "mirror/var_handle.h"
+#include "optimizing/nodes.h"
#include "scoped_thread_state_change-inl.h"
#include "thread.h"
#include "utils/assembler.h"
@@ -45,7 +45,7 @@
#include "utils/x86_64/constants_x86_64.h"
#include "utils/x86_64/managed_register_x86_64.h"
-namespace art {
+namespace art HIDDEN {
template<class MirrorType>
class GcRoot;
@@ -510,7 +510,7 @@ class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
: SlowPathCode(instruction),
ref_(ref),
unpoison_ref_before_marking_(unpoison_ref_before_marking) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
}
const char* GetDescription() const override { return "ReadBarrierMarkSlowPathX86_64"; }
@@ -601,7 +601,7 @@ class ReadBarrierMarkAndUpdateFieldSlowPathX86_64 : public SlowPathCode {
unpoison_ref_before_marking_(unpoison_ref_before_marking),
temp1_(temp1),
temp2_(temp2) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
}
const char* GetDescription() const override {
@@ -761,7 +761,7 @@ class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
obj_(obj),
offset_(offset),
index_(index) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
// If `obj` is equal to `out` or `ref`, it means the initial
// object has been overwritten by (or after) the heap object
// reference load to be instrumented, e.g.:
@@ -937,7 +937,7 @@ class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode {
public:
ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root)
: SlowPathCode(instruction), out_(out), root_(root) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
}
void EmitNativeCode(CodeGenerator* codegen) override {
@@ -986,6 +986,10 @@ class MethodEntryExitHooksSlowPathX86_64 : public SlowPathCode {
(instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook;
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
+ if (instruction_->IsMethodExitHook()) {
+ // Load FrameSize to pass to the exit hook.
+ __ movq(CpuRegister(R8), Immediate(codegen->GetFrameSize()));
+ }
x86_64_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this);
RestoreLiveRegisters(codegen, locations);
__ jmp(GetExitLabel());
@@ -1490,6 +1494,33 @@ void CodeGeneratorX86_64::GenerateInvokeRuntime(int32_t entry_point_offset) {
__ gs()->call(Address::Absolute(entry_point_offset, /* no_rip= */ true));
}
+namespace detail {
+// Mark which intrinsics we don't have handcrafted code for.
+template <Intrinsics T>
+struct IsUnimplemented {
+ bool is_unimplemented = false;
+};
+
+#define TRUE_OVERRIDE(Name) \
+ template <> \
+ struct IsUnimplemented<Intrinsics::k##Name> { \
+ bool is_unimplemented = true; \
+ };
+UNIMPLEMENTED_INTRINSIC_LIST_X86_64(TRUE_OVERRIDE)
+#undef TRUE_OVERRIDE
+
+#include "intrinsics_list.h"
+static constexpr bool kIsIntrinsicUnimplemented[] = {
+ false, // kNone
+#define IS_UNIMPLEMENTED(Intrinsic, ...) \
+ IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
+ INTRINSICS_LIST(IS_UNIMPLEMENTED)
+#undef IS_UNIMPLEMENTED
+};
+#undef INTRINSICS_LIST
+
+} // namespace detail
+
static constexpr int kNumberOfCpuRegisterPairs = 0;
// Use a fake return address register to mimic Quick.
static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1);
@@ -1506,7 +1537,8 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves),
arraysize(kFpuCalleeSaves)),
compiler_options,
- stats),
+ stats,
+ ArrayRef<const bool>(detail::kIsIntrinsicUnimplemented)),
block_labels_(nullptr),
location_builder_(graph, this),
instruction_visitor_(graph, this),
@@ -1561,9 +1593,22 @@ void InstructionCodeGeneratorX86_64::GenerateMethodEntryExitHook(HInstruction* i
new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathX86_64(instruction);
codegen_->AddSlowPath(slow_path);
+ if (instruction->IsMethodExitHook()) {
+ // Check if we are required to check if the caller needs a deoptimization. Strictly speaking it
+ // would be sufficient to check if CheckCallerForDeopt bit is set. Though it is faster to check
+ // if it is just non-zero. kCHA bit isn't used in debuggable runtimes as cha optimization is
+ // disabled in debuggable runtime. The other bit is used when this method itself requires a
+ // deoptimization due to redefinition. So it is safe to just check for non-zero value here.
+ __ cmpl(Address(CpuRegister(RSP), codegen_->GetStackOffsetOfShouldDeoptimizeFlag()),
+ Immediate(0));
+ __ j(kNotEqual, slow_path->GetEntryLabel());
+ }
+
uint64_t address = reinterpret_cast64<uint64_t>(Runtime::Current()->GetInstrumentation());
- int offset = instrumentation::Instrumentation::NeedsEntryExitHooksOffset().Int32Value();
- __ movq(CpuRegister(TMP), Immediate(address + offset));
+ MemberOffset offset = instruction->IsMethodExitHook() ?
+ instrumentation::Instrumentation::HaveMethodExitListenersOffset()
+ : instrumentation::Instrumentation::HaveMethodEntryListenersOffset();
+ __ movq(CpuRegister(TMP), Immediate(address + offset.Int32Value()));
__ cmpb(Address(CpuRegister(TMP), 0), Immediate(0));
__ j(kNotEqual, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
@@ -1653,6 +1698,44 @@ void CodeGeneratorX86_64::MaybeIncrementHotness(bool is_frame_entry) {
void CodeGeneratorX86_64::GenerateFrameEntry() {
__ cfi().SetCurrentCFAOffset(kX86_64WordSize); // return address
+
+ // Check if we need to generate the clinit check. We will jump to the
+ // resolution stub if the class is not initialized and the executing thread is
+ // not the thread initializing it.
+ // We do this before constructing the frame to get the correct stack trace if
+ // an exception is thrown.
+ if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) {
+ NearLabel resolution;
+ // Check if we're visibly initialized.
+
+ // We don't emit a read barrier here to save on code size. We rely on the
+ // resolution trampoline to do a suspend check before re-entering this code.
+ __ movl(CpuRegister(TMP),
+ Address(CpuRegister(kMethodRegisterArgument),
+ ArtMethod::DeclaringClassOffset().Int32Value()));
+ __ cmpb(Address(CpuRegister(TMP), status_byte_offset),
+ Immediate(shifted_visibly_initialized_value));
+ __ j(kAboveEqual, &frame_entry_label_);
+
+ // Check if we're initializing and the thread initializing is the one
+ // executing the code.
+ __ cmpb(Address(CpuRegister(TMP), status_byte_offset), Immediate(shifted_initializing_value));
+ __ j(kBelow, &resolution);
+
+ __ movl(CpuRegister(TMP),
+ Address(CpuRegister(TMP), mirror::Class::ClinitThreadIdOffset().Int32Value()));
+ __ gs()->cmpl(
+ CpuRegister(TMP),
+ Address::Absolute(Thread::TidOffset<kX86_64PointerSize>().Int32Value(), /*no_rip=*/ true));
+ __ j(kEqual, &frame_entry_label_);
+ __ Bind(&resolution);
+
+ // Jump to the resolution stub.
+ ThreadOffset64 entrypoint_offset =
+ GetThreadOffset<kX86_64PointerSize>(kQuickQuickResolutionTrampoline);
+ __ gs()->jmp(Address::Absolute(entrypoint_offset, /*no_rip=*/ true));
+ }
+
__ Bind(&frame_entry_label_);
bool skip_overflow_check = IsLeafMethod()
&& !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64);
@@ -2274,12 +2357,12 @@ void InstructionCodeGeneratorX86_64::VisitSelect(HSelect* select) {
}
}
-void LocationsBuilderX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
- new (GetGraph()->GetAllocator()) LocationSummary(info);
+void LocationsBuilderX86_64::VisitNop(HNop* nop) {
+ new (GetGraph()->GetAllocator()) LocationSummary(nop);
}
-void InstructionCodeGeneratorX86_64::VisitNativeDebugInfo(HNativeDebugInfo*) {
- // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
+void InstructionCodeGeneratorX86_64::VisitNop(HNop*) {
+ // The environment recording already happened in CodeGenerator::Compile.
}
void CodeGeneratorX86_64::IncreaseFrame(size_t adjustment) {
@@ -5013,7 +5096,7 @@ void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) {
instruction->IsPredicatedInstanceFieldGet());
bool object_field_get_with_read_barrier =
- kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
+ gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference);
bool is_predicated = instruction->IsPredicatedInstanceFieldGet();
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(instruction,
@@ -5064,7 +5147,7 @@ void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction,
if (load_type == DataType::Type::kReference) {
// /* HeapReference<Object> */ out = *(base + offset)
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
// Note that a potential implicit null check is handled in this
// CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier call.
codegen_->GenerateFieldLoadWithBakerReadBarrier(
@@ -5119,6 +5202,9 @@ void LocationsBuilderX86_64::HandleFieldSet(HInstruction* instruction,
locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
}
}
+
+ // TODO(solanes): We could reduce the temp usage but it requires some non-trivial refactoring of
+ // InstructionCodeGeneratorX86_64::HandleFieldSet.
if (needs_write_barrier) {
// Temporary registers for the write barrier.
locations->AddTemp(Location::RequiresRegister());
@@ -5180,7 +5266,8 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
bool is_volatile,
bool is_atomic,
bool value_can_be_null,
- bool byte_swap) {
+ bool byte_swap,
+ WriteBarrierKind write_barrier_kind) {
LocationSummary* locations = instruction->GetLocations();
Location value = locations->InAt(value_index);
@@ -5298,10 +5385,16 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
codegen_->MaybeRecordImplicitNullCheck(instruction);
}
- if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(value_index))) {
+ if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(value_index)) &&
+ write_barrier_kind != WriteBarrierKind::kDontEmit) {
CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
CpuRegister card = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
- codegen_->MarkGCCard(temp, card, base, value.AsRegister<CpuRegister>(), value_can_be_null);
+ codegen_->MarkGCCard(
+ temp,
+ card,
+ base,
+ value.AsRegister<CpuRegister>(),
+ value_can_be_null && write_barrier_kind == WriteBarrierKind::kEmitWithNullCheck);
}
if (is_volatile) {
@@ -5311,7 +5404,8 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
const FieldInfo& field_info,
- bool value_can_be_null) {
+ bool value_can_be_null,
+ WriteBarrierKind write_barrier_kind) {
DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
LocationSummary* locations = instruction->GetLocations();
@@ -5336,7 +5430,9 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
base,
is_volatile,
/*is_atomic=*/ false,
- value_can_be_null);
+ value_can_be_null,
+ /*byte_swap=*/ false,
+ write_barrier_kind);
if (is_predicated) {
__ Bind(&pred_is_null);
@@ -5348,7 +5444,10 @@ void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instructio
}
void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
- HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
+ HandleFieldSet(instruction,
+ instruction->GetFieldInfo(),
+ instruction->GetValueCanBeNull(),
+ instruction->GetWriteBarrierKind());
}
void LocationsBuilderX86_64::VisitPredicatedInstanceFieldGet(
@@ -5388,7 +5487,10 @@ void LocationsBuilderX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
}
void InstructionCodeGeneratorX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
- HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
+ HandleFieldSet(instruction,
+ instruction->GetFieldInfo(),
+ instruction->GetValueCanBeNull(),
+ instruction->GetWriteBarrierKind());
}
void LocationsBuilderX86_64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
@@ -5513,7 +5615,7 @@ void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) {
void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) {
bool object_array_get_with_read_barrier =
- kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
+ gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference);
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(instruction,
object_array_get_with_read_barrier
@@ -5551,7 +5653,7 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
"art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
// /* HeapReference<Object> */ out =
// *(obj + data_offset + index * sizeof(HeapReference<Object>))
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
// Note that a potential implicit null check is handled in this
// CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier call.
codegen_->GenerateArrayLoadWithBakerReadBarrier(
@@ -5619,9 +5721,12 @@ void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) {
}
if (needs_write_barrier) {
- // Temporary registers for the write barrier.
- locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too.
+ // Used by reference poisoning or emitting write barrier.
locations->AddTemp(Location::RequiresRegister());
+ if (instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit) {
+ // Only used when emitting a write barrier.
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
}
@@ -5739,9 +5844,16 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
}
}
- CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
- codegen_->MarkGCCard(
- temp, card, array, value.AsRegister<CpuRegister>(), /* value_can_be_null= */ false);
+ if (instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit) {
+ DCHECK_EQ(instruction->GetWriteBarrierKind(), WriteBarrierKind::kEmitNoNullCheck)
+ << " Already null checked so we shouldn't do it again.";
+ CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
+ codegen_->MarkGCCard(temp,
+ card,
+ array,
+ value.AsRegister<CpuRegister>(),
+ /* emit_null_check= */ false);
+ }
if (can_value_be_null) {
DCHECK(do_store.IsLinked());
@@ -5940,9 +6052,9 @@ void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp,
CpuRegister card,
CpuRegister object,
CpuRegister value,
- bool value_can_be_null) {
+ bool emit_null_check) {
NearLabel is_null;
- if (value_can_be_null) {
+ if (emit_null_check) {
__ testl(value, value);
__ j(kEqual, &is_null);
}
@@ -5967,7 +6079,7 @@ void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp,
// of the card to mark; and 2. to load the `kCardDirty` value) saves a load
// (no need to explicitly load `kCardDirty` as an immediate value).
__ movb(Address(temp, card, TIMES_1, 0), card);
- if (value_can_be_null) {
+ if (emit_null_check) {
__ Bind(&is_null);
}
}
@@ -6282,12 +6394,6 @@ void ParallelMoveResolverX86_64::RestoreScratch(int reg) {
void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck(
SlowPathCode* slow_path, CpuRegister class_reg) {
- constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
- const size_t status_byte_offset =
- mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
- constexpr uint32_t shifted_visibly_initialized_value =
- enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte);
-
__ cmpb(Address(class_reg, status_byte_offset), Immediate(shifted_visibly_initialized_value));
__ j(kBelow, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
@@ -6352,7 +6458,7 @@ void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
load_kind == HLoadClass::LoadKind::kBssEntryPackage);
- const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
+ const bool requires_read_barrier = gUseReadBarrier && !cls->IsInBootImage();
LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall;
@@ -6366,7 +6472,7 @@ void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
}
locations->SetOut(Location::RequiresRegister());
if (load_kind == HLoadClass::LoadKind::kBssEntry) {
- if (!kUseReadBarrier || kUseBakerReadBarrier) {
+ if (!gUseReadBarrier || kUseBakerReadBarrier) {
// Rely on the type resolution and/or initialization to save everything.
locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
} else {
@@ -6403,7 +6509,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S
const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
? kWithoutReadBarrier
- : kCompilerReadBarrierOption;
+ : gCompilerReadBarrierOption;
bool generate_null_check = false;
switch (load_kind) {
case HLoadClass::LoadKind::kReferrersClass: {
@@ -6550,7 +6656,7 @@ void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) {
} else {
locations->SetOut(Location::RequiresRegister());
if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
- if (!kUseReadBarrier || kUseBakerReadBarrier) {
+ if (!gUseReadBarrier || kUseBakerReadBarrier) {
// Rely on the pResolveString to save everything.
locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
} else {
@@ -6598,7 +6704,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREA
/* no_rip= */ false);
Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
// /* GcRoot<mirror::Class> */ out = *address /* PC-relative */
- GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
+ GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, gCompilerReadBarrierOption);
// No need for memory fence, thanks to the x86-64 memory model.
SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86_64(load);
codegen_->AddSlowPath(slow_path);
@@ -6619,7 +6725,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREA
Label* fixup_label = codegen_->NewJitRootStringPatch(
load->GetDexFile(), load->GetStringIndex(), load->GetString());
// /* GcRoot<mirror::String> */ out = *address
- GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
+ GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, gCompilerReadBarrierOption);
return;
}
default:
@@ -6672,7 +6778,7 @@ void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) {
// Temp is used for read barrier.
static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
- if (kEmitCompilerReadBarrier &&
+ if (gUseReadBarrier &&
!kUseBakerReadBarrier &&
(type_check_kind == TypeCheckKind::kAbstractClassCheck ||
type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
@@ -6722,9 +6828,9 @@ void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
}
locations->SetInAt(0, Location::RequiresRegister());
if (type_check_kind == TypeCheckKind::kBitstringCheck) {
- locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
- locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
- locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
} else {
locations->SetInAt(1, Location::Any());
}
@@ -7000,9 +7106,9 @@ void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) {
// a memory address.
locations->SetInAt(1, Location::RequiresRegister());
} else if (type_check_kind == TypeCheckKind::kBitstringCheck) {
- locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
- locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
- locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
} else {
locations->SetInAt(1, Location::Any());
}
@@ -7426,7 +7532,7 @@ void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(
ReadBarrierOption read_barrier_option) {
CpuRegister out_reg = out.AsRegister<CpuRegister>();
if (read_barrier_option == kWithReadBarrier) {
- CHECK(kEmitCompilerReadBarrier);
+ CHECK(gUseReadBarrier);
if (kUseBakerReadBarrier) {
// Load with fast path based Baker's read barrier.
// /* HeapReference<Object> */ out = *(out + offset)
@@ -7460,7 +7566,7 @@ void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(
CpuRegister out_reg = out.AsRegister<CpuRegister>();
CpuRegister obj_reg = obj.AsRegister<CpuRegister>();
if (read_barrier_option == kWithReadBarrier) {
- CHECK(kEmitCompilerReadBarrier);
+ CHECK(gUseReadBarrier);
if (kUseBakerReadBarrier) {
// Load with fast path based Baker's read barrier.
// /* HeapReference<Object> */ out = *(obj + offset)
@@ -7488,7 +7594,7 @@ void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(
ReadBarrierOption read_barrier_option) {
CpuRegister root_reg = root.AsRegister<CpuRegister>();
if (read_barrier_option == kWithReadBarrier) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
if (kUseBakerReadBarrier) {
// Fast path implementation of art::ReadBarrier::BarrierForRoot when
// Baker's read barrier are used:
@@ -7552,7 +7658,7 @@ void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* in
CpuRegister obj,
uint32_t offset,
bool needs_null_check) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
DCHECK(kUseBakerReadBarrier);
// /* HeapReference<Object> */ ref = *(obj + offset)
@@ -7566,7 +7672,7 @@ void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* in
uint32_t data_offset,
Location index,
bool needs_null_check) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
DCHECK(kUseBakerReadBarrier);
static_assert(
@@ -7586,7 +7692,7 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction
bool always_update_field,
CpuRegister* temp1,
CpuRegister* temp2) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
DCHECK(kUseBakerReadBarrier);
// In slow path based read barriers, the read barrier call is
@@ -7668,7 +7774,7 @@ void CodeGeneratorX86_64::GenerateReadBarrierSlow(HInstruction* instruction,
Location obj,
uint32_t offset,
Location index) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
// Insert a slow path based read barrier *after* the reference load.
//
@@ -7695,7 +7801,7 @@ void CodeGeneratorX86_64::MaybeGenerateReadBarrierSlow(HInstruction* instruction
Location obj,
uint32_t offset,
Location index) {
- if (kEmitCompilerReadBarrier) {
+ if (gUseReadBarrier) {
// Baker's read barriers shall be handled by the fast path
// (CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier).
DCHECK(!kUseBakerReadBarrier);
@@ -7710,7 +7816,7 @@ void CodeGeneratorX86_64::MaybeGenerateReadBarrierSlow(HInstruction* instruction
void CodeGeneratorX86_64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
Location out,
Location root) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
// Insert a slow path based read barrier *after* the GC root load.
//
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 39a72d8211..dff2e799e0 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -18,13 +18,14 @@
#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_
#include "arch/x86_64/instruction_set_features_x86_64.h"
+#include "base/macros.h"
#include "code_generator.h"
#include "driver/compiler_options.h"
#include "nodes.h"
#include "parallel_move_resolver.h"
#include "utils/x86_64/assembler_x86_64.h"
-namespace art {
+namespace art HIDDEN {
namespace x86_64 {
// Use a local definition to prevent copying mistakes.
@@ -52,6 +53,53 @@ static constexpr size_t kRuntimeParameterFpuRegistersLength =
// these are not clobbered by any direct call to native code (such as math intrinsics).
static constexpr FloatRegister non_volatile_xmm_regs[] = { XMM12, XMM13, XMM14, XMM15 };
+#define UNIMPLEMENTED_INTRINSIC_LIST_X86_64(V) \
+ V(CRC32Update) \
+ V(CRC32UpdateBytes) \
+ V(CRC32UpdateByteBuffer) \
+ V(FP16ToFloat) \
+ V(FP16ToHalf) \
+ V(FP16Floor) \
+ V(FP16Ceil) \
+ V(FP16Rint) \
+ V(FP16Greater) \
+ V(FP16GreaterEquals) \
+ V(FP16Less) \
+ V(FP16LessEquals) \
+ V(FP16Compare) \
+ V(FP16Min) \
+ V(FP16Max) \
+ V(StringStringIndexOf) \
+ V(StringStringIndexOfAfter) \
+ V(StringBufferAppend) \
+ V(StringBufferLength) \
+ V(StringBufferToString) \
+ V(StringBuilderAppendObject) \
+ V(StringBuilderAppendString) \
+ V(StringBuilderAppendCharSequence) \
+ V(StringBuilderAppendCharArray) \
+ V(StringBuilderAppendBoolean) \
+ V(StringBuilderAppendChar) \
+ V(StringBuilderAppendInt) \
+ V(StringBuilderAppendLong) \
+ V(StringBuilderAppendFloat) \
+ V(StringBuilderAppendDouble) \
+ V(StringBuilderLength) \
+ V(StringBuilderToString) \
+ /* 1.8 */ \
+ V(UnsafeGetAndAddInt) \
+ V(UnsafeGetAndAddLong) \
+ V(UnsafeGetAndSetInt) \
+ V(UnsafeGetAndSetLong) \
+ V(UnsafeGetAndSetObject) \
+ V(MethodHandleInvokeExact) \
+ V(MethodHandleInvoke) \
+ /* OpenJDK 11 */ \
+ V(JdkUnsafeGetAndAddInt) \
+ V(JdkUnsafeGetAndAddLong) \
+ V(JdkUnsafeGetAndSetInt) \
+ V(JdkUnsafeGetAndSetLong) \
+ V(JdkUnsafeGetAndSetObject)
class InvokeRuntimeCallingConvention : public CallingConvention<Register, FloatRegister> {
public:
@@ -250,7 +298,8 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator {
bool is_volatile,
bool is_atomic,
bool value_can_be_null,
- bool byte_swap = false);
+ bool byte_swap,
+ WriteBarrierKind write_barrier_kind);
void Bswap(Location value, DataType::Type type, CpuRegister* temp = nullptr);
@@ -273,7 +322,8 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator {
void HandleFieldSet(HInstruction* instruction,
const FieldInfo& field_info,
- bool value_can_be_null);
+ bool value_can_be_null,
+ WriteBarrierKind write_barrier_kind);
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
void GenerateMinMaxInt(LocationSummary* locations, bool is_min, DataType::Type type);
@@ -435,7 +485,7 @@ class CodeGeneratorX86_64 : public CodeGenerator {
CpuRegister card,
CpuRegister object,
CpuRegister value,
- bool value_can_be_null);
+ bool emit_null_check);
void GenerateMemoryBarrier(MemBarrierKind kind);
diff --git a/compiler/optimizing/code_sinking.cc b/compiler/optimizing/code_sinking.cc
index 766bb01978..d759a16f48 100644
--- a/compiler/optimizing/code_sinking.cc
+++ b/compiler/optimizing/code_sinking.cc
@@ -19,30 +19,55 @@
#include "base/arena_bit_vector.h"
#include "base/array_ref.h"
#include "base/bit_vector-inl.h"
+#include "base/globals.h"
#include "base/logging.h"
#include "base/scoped_arena_allocator.h"
#include "base/scoped_arena_containers.h"
#include "common_dominator.h"
#include "nodes.h"
-namespace art {
+namespace art HIDDEN {
bool CodeSinking::Run() {
- HBasicBlock* exit = graph_->GetExitBlock();
- if (exit == nullptr) {
+ if (graph_->GetExitBlock() == nullptr) {
// Infinite loop, just bail.
return false;
}
+
+ UncommonBranchSinking();
+ ReturnSinking();
+ return true;
+}
+
+void CodeSinking::UncommonBranchSinking() {
+ HBasicBlock* exit = graph_->GetExitBlock();
+ DCHECK(exit != nullptr);
// TODO(ngeoffray): we do not profile branches yet, so use throw instructions
// as an indicator of an uncommon branch.
for (HBasicBlock* exit_predecessor : exit->GetPredecessors()) {
HInstruction* last = exit_predecessor->GetLastInstruction();
+
+ // TryBoundary instructions are sometimes inserted between the last instruction (e.g. Throw,
+ // Return) and Exit. We don't want to use that instruction for our "uncommon branch" heuristic
+ // because they are not as good an indicator as throwing branches, so we skip them and fetch the
+ // actual last instruction.
+ if (last->IsTryBoundary()) {
+ // We have an exit try boundary. Fetch the previous instruction.
+ DCHECK(!last->AsTryBoundary()->IsEntry());
+ if (last->GetPrevious() == nullptr) {
+ DCHECK(exit_predecessor->IsSingleTryBoundary());
+ exit_predecessor = exit_predecessor->GetSinglePredecessor();
+ last = exit_predecessor->GetLastInstruction();
+ } else {
+ last = last->GetPrevious();
+ }
+ }
+
// Any predecessor of the exit that does not return, throws an exception.
if (!last->IsReturn() && !last->IsReturnVoid()) {
SinkCodeToUncommonBranch(exit_predecessor);
}
}
- return true;
}
static bool IsInterestingInstruction(HInstruction* instruction) {
@@ -88,7 +113,7 @@ static bool IsInterestingInstruction(HInstruction* instruction) {
// We can only store on local allocations. Other heap references can
// be escaping. Note that allocations can escape too, but we only move
- // allocations if their users can move to, or are in the list of
+ // allocations if their users can move too, or are in the list of
// post dominated blocks.
if (instruction->IsInstanceFieldSet()) {
if (!instruction->InputAt(0)->IsNewInstance()) {
@@ -102,7 +127,7 @@ static bool IsInterestingInstruction(HInstruction* instruction) {
}
}
- // Heap accesses cannot go pass instructions that have memory side effects, which
+ // Heap accesses cannot go past instructions that have memory side effects, which
// we are not tracking here. Note that the load/store elimination optimization
// runs before this optimization, and should have removed interesting ones.
// In theory, we could handle loads of local allocations, but this is currently
@@ -171,7 +196,6 @@ static bool ShouldFilterUse(HInstruction* instruction,
return false;
}
-
// Find the ideal position for moving `instruction`. If `filter` is true,
// we filter out store instructions to that instruction, which are processed
// first in the step (3) of the sinking algorithm.
@@ -210,56 +234,52 @@ static HInstruction* FindIdealPosition(HInstruction* instruction,
return nullptr;
}
- // Move to the first dominator not in a loop, if we can.
- while (target_block->IsInLoop()) {
+ // Move to the first dominator not in a loop, if we can. We only do this if we are trying to hoist
+ // `instruction` out of a loop it wasn't a part of.
+ const HLoopInformation* loop_info = instruction->GetBlock()->GetLoopInformation();
+ while (target_block->IsInLoop() && target_block->GetLoopInformation() != loop_info) {
if (!post_dominated.IsBitSet(target_block->GetDominator()->GetBlockId())) {
break;
}
target_block = target_block->GetDominator();
DCHECK(target_block != nullptr);
}
- const bool was_in_loop = target_block->IsInLoop();
-
- // For throwing instructions we can move them into:
- // * Blocks that are not part of a try
- // * Catch blocks are suitable as well, as long as they are not part of an outer try.
- // * Blocks that are part of the same try that the instrucion was already in.
- //
- // We cannot move an instruction that can throw into a try that said instruction is not a part of
- // already, as that would mean it will throw into a different catch block. If we detect that
- // `target_block` is not a valid block to move `instruction` to, we traverse up the dominator tree
- // to find if we have a suitable block.
- while (instruction->CanThrow() && target_block->GetTryCatchInformation() != nullptr) {
- if (target_block->IsCatchBlock()) {
- // If the catch block has an xhandler, it means it is inside of an outer try.
- const bool inside_of_another_try_catch = target_block->GetSuccessors().size() != 1;
- if (!inside_of_another_try_catch) {
- // If we have a catch block, it's okay to sink as long as that catch is not inside of
- // another try catch.
- break;
+
+ if (instruction->CanThrow()) {
+ // Consistency check: We shouldn't land in a loop if we weren't in one before traversing up the
+ // dominator tree regarding try catches.
+ const bool was_in_loop = target_block->IsInLoop();
+
+ // We cannot move an instruction that can throw into a try that said instruction is not a part
+ // of already, as that would mean it will throw into a different catch block. In short, for
+ // throwing instructions:
+ // * If the throwing instruction is part of a try, they should only be sunk into that same try.
+ // * If the throwing instruction is not part of any try, they shouldn't be sunk to any try.
+ if (instruction->GetBlock()->IsTryBlock()) {
+ const HTryBoundary& try_entry =
+ instruction->GetBlock()->GetTryCatchInformation()->GetTryEntry();
+ while (!(target_block->IsTryBlock() &&
+ try_entry.HasSameExceptionHandlersAs(
+ target_block->GetTryCatchInformation()->GetTryEntry()))) {
+ target_block = target_block->GetDominator();
+ if (!post_dominated.IsBitSet(target_block->GetBlockId())) {
+ // We couldn't find a suitable block.
+ return nullptr;
+ }
}
} else {
- DCHECK(target_block->IsTryBlock());
- if (instruction->GetBlock()->IsTryBlock() &&
- instruction->GetBlock()->GetTryCatchInformation()->GetTryEntry().GetId() ==
- target_block->GetTryCatchInformation()->GetTryEntry().GetId()) {
- // Sink within the same try block is allowed.
- break;
+ // Search for the first block also not in a try block
+ while (target_block->IsTryBlock()) {
+ target_block = target_block->GetDominator();
+ if (!post_dominated.IsBitSet(target_block->GetBlockId())) {
+ // We couldn't find a suitable block.
+ return nullptr;
+ }
}
}
- // We are now in the case where we would be moving to a different try. Since we don't want
- // that, traverse up the dominator tree to find a suitable block.
- if (!post_dominated.IsBitSet(target_block->GetDominator()->GetBlockId())) {
- // We couldn't find a suitable block.
- return nullptr;
- }
- target_block = target_block->GetDominator();
- DCHECK(target_block != nullptr);
- }
- // We shouldn't land in a loop if we weren't in one before traversing up the dominator tree
- // regarding try catches.
- DCHECK_IMPLIES(target_block->IsInLoop(), was_in_loop);
+ DCHECK_IMPLIES(target_block->IsInLoop(), was_in_loop);
+ }
// Find insertion position. No need to filter anymore, as we have found a
// target block.
@@ -271,10 +291,21 @@ static HInstruction* FindIdealPosition(HInstruction* instruction,
}
}
for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) {
- HInstruction* user = use.GetUser()->GetHolder();
+ HEnvironment* env = use.GetUser();
+ HInstruction* user = env->GetHolder();
if (user->GetBlock() == target_block &&
(insert_pos == nullptr || user->StrictlyDominates(insert_pos))) {
- insert_pos = user;
+ if (target_block->IsCatchBlock() && target_block->GetFirstInstruction() == user) {
+ // We can sink the instructions past the environment setting Nop. If we do that, we have to
+ // remove said instruction from the environment. Since we know that we will be sinking the
+ // instruction to this block and there are no more instructions to consider, we can safely
+ // remove it from the environment now.
+ DCHECK(target_block->GetFirstInstruction()->IsNop());
+ env->RemoveAsUserOfInput(use.GetIndex());
+ env->SetRawEnvAt(use.GetIndex(), /*instruction=*/ nullptr);
+ } else {
+ insert_pos = user;
+ }
}
}
if (insert_pos == nullptr) {
@@ -310,8 +341,8 @@ void CodeSinking::SinkCodeToUncommonBranch(HBasicBlock* end_block) {
ScopedArenaVector<HInstruction*> move_in_order(allocator.Adapter(kArenaAllocMisc));
// Step (1): Visit post order to get a subset of blocks post dominated by `end_block`.
- // TODO(ngeoffray): Getting the full set of post-dominated shoud be done by
- // computint the post dominator tree, but that could be too time consuming. Also,
+ // TODO(ngeoffray): Getting the full set of post-dominated should be done by
+ // computing the post dominator tree, but that could be too time consuming. Also,
// we should start the analysis from blocks dominated by an uncommon branch, but we
// don't profile branches yet.
bool found_block = false;
@@ -321,45 +352,43 @@ void CodeSinking::SinkCodeToUncommonBranch(HBasicBlock* end_block) {
post_dominated.SetBit(block->GetBlockId());
} else if (found_block) {
bool is_post_dominated = true;
- if (block->GetSuccessors().empty()) {
- // We currently bail for loops.
- is_post_dominated = false;
- } else {
- // BasicBlock that are try entries look like this:
- // BasicBlock i:
- // instr 1
- // ...
- // instr N
- // TryBoundary kind:entry ---Try begins here---
- //
- // Due to how our BasicBlocks are structured, BasicBlock i will have an xhandler successor
- // since we are starting a try. If we use `GetSuccessors` for this case, we will check if
- // the catch block is post_dominated.
- //
- // However, this catch block doesn't matter: when we sink the instruction into that
- // BasicBlock i, we do it before the TryBoundary (i.e. outside of the try and outside the
- // catch's domain). We can ignore catch blocks using `GetNormalSuccessors` to sink code
- // right before the start of a try block.
- //
- // On the other side of the coin, BasicBlock that are try exits look like this:
- // BasicBlock j:
- // instr 1
- // ...
- // instr N
- // TryBoundary kind:exit ---Try ends here---
- //
- // If we sink to these basic blocks we would be sinking inside of the try so we would like
- // to check the catch block for post dominance.
- const bool ends_with_try_boundary_entry =
- block->EndsWithTryBoundary() && block->GetLastInstruction()->AsTryBoundary()->IsEntry();
- ArrayRef<HBasicBlock* const> successors =
- ends_with_try_boundary_entry ? block->GetNormalSuccessors() :
- ArrayRef<HBasicBlock* const>(block->GetSuccessors());
- for (HBasicBlock* successor : successors) {
- if (!post_dominated.IsBitSet(successor->GetBlockId())) {
- is_post_dominated = false;
- break;
- }
+ DCHECK_NE(block, graph_->GetExitBlock())
+ << "We shouldn't encounter the exit block after `end_block`.";
+
+ // BasicBlock that are try entries look like this:
+ // BasicBlock i:
+ // instr 1
+ // ...
+ // instr N
+ // TryBoundary kind:entry ---Try begins here---
+ //
+ // Due to how our BasicBlocks are structured, BasicBlock i will have an xhandler successor
+ // since we are starting a try. If we use `GetSuccessors` for this case, we will check if
+ // the catch block is post_dominated.
+ //
+ // However, this catch block doesn't matter: when we sink the instruction into that
+ // BasicBlock i, we do it before the TryBoundary (i.e. outside of the try and outside the
+ // catch's domain). We can ignore catch blocks using `GetNormalSuccessors` to sink code
+ // right before the start of a try block.
+ //
+ // On the other side of the coin, BasicBlock that are try exits look like this:
+ // BasicBlock j:
+ // instr 1
+ // ...
+ // instr N
+ // TryBoundary kind:exit ---Try ends here---
+ //
+ // If we sink to these basic blocks we would be sinking inside of the try so we would like
+ // to check the catch block for post dominance.
+ const bool ends_with_try_boundary_entry =
+ block->EndsWithTryBoundary() && block->GetLastInstruction()->AsTryBoundary()->IsEntry();
+ ArrayRef<HBasicBlock* const> successors =
+ ends_with_try_boundary_entry ? block->GetNormalSuccessors() :
+ ArrayRef<HBasicBlock* const>(block->GetSuccessors());
+ for (HBasicBlock* successor : successors) {
+ if (!post_dominated.IsBitSet(successor->GetBlockId())) {
+ is_post_dominated = false;
+ break;
}
}
if (is_post_dominated) {
@@ -509,4 +538,79 @@ void CodeSinking::SinkCodeToUncommonBranch(HBasicBlock* end_block) {
}
}
+void CodeSinking::ReturnSinking() {
+ HBasicBlock* exit = graph_->GetExitBlock();
+ DCHECK(exit != nullptr);
+
+ int number_of_returns = 0;
+ bool saw_return = false;
+ for (HBasicBlock* pred : exit->GetPredecessors()) {
+ // TODO(solanes): We might have Return/ReturnVoid->TryBoundary->Exit. We can theoretically
+ // handle them and move them out of the TryBoundary. However, it is a border case and it adds
+ // codebase complexity.
+ if (pred->GetLastInstruction()->IsReturn() || pred->GetLastInstruction()->IsReturnVoid()) {
+ saw_return |= pred->GetLastInstruction()->IsReturn();
+ ++number_of_returns;
+ }
+ }
+
+ if (number_of_returns < 2) {
+ // Nothing to do.
+ return;
+ }
+
+ // `new_block` will coalesce the Return instructions into Phi+Return, or the ReturnVoid
+ // instructions into a ReturnVoid.
+ HBasicBlock* new_block = new (graph_->GetAllocator()) HBasicBlock(graph_, exit->GetDexPc());
+ if (saw_return) {
+ HPhi* new_phi = nullptr;
+ for (size_t i = 0; i < exit->GetPredecessors().size(); /*++i in loop*/) {
+ HBasicBlock* pred = exit->GetPredecessors()[i];
+ if (!pred->GetLastInstruction()->IsReturn()) {
+ ++i;
+ continue;
+ }
+
+ HReturn* ret = pred->GetLastInstruction()->AsReturn();
+ if (new_phi == nullptr) {
+ // Create the new_phi, if we haven't done so yet. We do it here since we need to know the
+ // type to assign to it.
+ new_phi = new (graph_->GetAllocator()) HPhi(graph_->GetAllocator(),
+ kNoRegNumber,
+ /*number_of_inputs=*/0,
+ ret->InputAt(0)->GetType());
+ new_block->AddPhi(new_phi);
+ }
+ new_phi->AddInput(ret->InputAt(0));
+ pred->ReplaceAndRemoveInstructionWith(ret,
+ new (graph_->GetAllocator()) HGoto(ret->GetDexPc()));
+ pred->ReplaceSuccessor(exit, new_block);
+ // Since we are removing a predecessor, there's no need to increment `i`.
+ }
+ new_block->AddInstruction(new (graph_->GetAllocator()) HReturn(new_phi, exit->GetDexPc()));
+ } else {
+ for (size_t i = 0; i < exit->GetPredecessors().size(); /*++i in loop*/) {
+ HBasicBlock* pred = exit->GetPredecessors()[i];
+ if (!pred->GetLastInstruction()->IsReturnVoid()) {
+ ++i;
+ continue;
+ }
+
+ HReturnVoid* ret = pred->GetLastInstruction()->AsReturnVoid();
+ pred->ReplaceAndRemoveInstructionWith(ret,
+ new (graph_->GetAllocator()) HGoto(ret->GetDexPc()));
+ pred->ReplaceSuccessor(exit, new_block);
+ // Since we are removing a predecessor, there's no need to increment `i`.
+ }
+ new_block->AddInstruction(new (graph_->GetAllocator()) HReturnVoid(exit->GetDexPc()));
+ }
+
+ new_block->AddSuccessor(exit);
+ graph_->AddBlock(new_block);
+
+ // Recompute dominance since we added a new block.
+ graph_->ClearDominanceInformation();
+ graph_->ComputeDominanceInformation();
+}
+
} // namespace art
diff --git a/compiler/optimizing/code_sinking.h b/compiler/optimizing/code_sinking.h
index 8eb3a520c3..c743db40d9 100644
--- a/compiler/optimizing/code_sinking.h
+++ b/compiler/optimizing/code_sinking.h
@@ -17,10 +17,11 @@
#ifndef ART_COMPILER_OPTIMIZING_CODE_SINKING_H_
#define ART_COMPILER_OPTIMIZING_CODE_SINKING_H_
+#include "base/macros.h"
#include "nodes.h"
#include "optimization.h"
-namespace art {
+namespace art HIDDEN {
/**
* Optimization pass to move instructions into uncommon branches,
@@ -38,10 +39,16 @@ class CodeSinking : public HOptimization {
static constexpr const char* kCodeSinkingPassName = "code_sinking";
private:
- // Try to move code only used by `end_block` and all its post-dominated / dominated
+ // Tries to sink code to uncommon branches.
+ void UncommonBranchSinking();
+ // Tries to move code only used by `end_block` and all its post-dominated / dominated
// blocks, to these blocks.
void SinkCodeToUncommonBranch(HBasicBlock* end_block);
+ // Coalesces the Return/ReturnVoid instructions into one, if we have two or more. We do this to
+ // avoid generating the exit frame code several times.
+ void ReturnSinking();
+
DISALLOW_COPY_AND_ASSIGN(CodeSinking);
};
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index c0441b07ed..2d9acc49b3 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -33,7 +33,7 @@
#include "gtest/gtest.h"
-namespace art {
+namespace art HIDDEN {
// Return all combinations of ISA and code generator that are executable on
// hardware, or on simulator, and that we'd like to test.
@@ -64,7 +64,7 @@ static ::std::vector<CodegenTargetConfig> GetTargetConfigs() {
return v;
}
-class CodegenTest : public OptimizingUnitTest {
+class CodegenTest : public CommonCompilerTest, public OptimizingUnitTestHelper {
protected:
void TestCode(const std::vector<uint16_t>& data, bool has_result = false, int32_t expected = 0);
void TestCodeLong(const std::vector<uint16_t>& data, bool has_result, int64_t expected);
diff --git a/compiler/optimizing/codegen_test_utils.h b/compiler/optimizing/codegen_test_utils.h
index 397e601cee..7af9d0f44c 100644
--- a/compiler/optimizing/codegen_test_utils.h
+++ b/compiler/optimizing/codegen_test_utils.h
@@ -20,6 +20,7 @@
#include "arch/arm/registers_arm.h"
#include "arch/instruction_set.h"
#include "arch/x86/registers_x86.h"
+#include "base/macros.h"
#include "code_simulator.h"
#include "code_simulator_container.h"
#include "common_compiler_test.h"
@@ -35,6 +36,10 @@
#include "code_generator_arm64.h"
#endif
+#ifdef ART_ENABLE_CODEGEN_riscv64
+#include "code_generator_riscv64.h"
+#endif
+
#ifdef ART_ENABLE_CODEGEN_x86
#include "code_generator_x86.h"
#endif
@@ -43,9 +48,9 @@
#include "code_generator_x86_64.h"
#endif
-namespace art {
+namespace art HIDDEN {
-typedef CodeGenerator* (*CreateCodegenFn)(HGraph*, const CompilerOptions&);
+using CreateCodegenFn = CodeGenerator* (*)(HGraph*, const CompilerOptions&);
class CodegenTargetConfig {
public:
@@ -254,15 +259,11 @@ static void Run(const InternalCodeAllocator& allocator,
Runtime* GetRuntime() override { return nullptr; }
};
CodeHolder code_holder;
- const void* code_ptr =
+ const void* method_code =
code_holder.MakeExecutable(allocator.GetMemory(), ArrayRef<const uint8_t>(), target_isa);
- typedef Expected (*fptr)();
- fptr f = reinterpret_cast<fptr>(reinterpret_cast<uintptr_t>(code_ptr));
- if (target_isa == InstructionSet::kThumb2) {
- // For thumb we need the bottom bit set.
- f = reinterpret_cast<fptr>(reinterpret_cast<uintptr_t>(f) + 1);
- }
+ using fptr = Expected (*)();
+ fptr f = reinterpret_cast<fptr>(reinterpret_cast<uintptr_t>(method_code));
VerifyGeneratedCode(target_isa, f, has_result, expected);
}
@@ -332,6 +333,10 @@ inline CodeGenerator* create_codegen_arm64(HGraph* graph, const CompilerOptions&
}
#endif
+#ifdef ART_ENABLE_CODEGEN_riscv64
+inline CodeGenerator* create_codegen_riscv64(HGraph*, const CompilerOptions&) { return nullptr; }
+#endif
+
#ifdef ART_ENABLE_CODEGEN_x86
inline CodeGenerator* create_codegen_x86(HGraph* graph, const CompilerOptions& compiler_options) {
return new (graph->GetAllocator()) TestCodeGeneratorX86(graph, compiler_options);
diff --git a/compiler/optimizing/common_arm.h b/compiler/optimizing/common_arm.h
index 320915ee57..5f71cb906c 100644
--- a/compiler/optimizing/common_arm.h
+++ b/compiler/optimizing/common_arm.h
@@ -17,6 +17,7 @@
#ifndef ART_COMPILER_OPTIMIZING_COMMON_ARM_H_
#define ART_COMPILER_OPTIMIZING_COMMON_ARM_H_
+#include "base/macros.h"
#include "instruction_simplifier_shared.h"
#include "locations.h"
#include "nodes.h"
@@ -28,7 +29,7 @@
#include "aarch32/macro-assembler-aarch32.h"
#pragma GCC diagnostic pop
-namespace art {
+namespace art HIDDEN {
using helpers::HasShifterOperand;
diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h
index 81c6561318..20b0e38af5 100644
--- a/compiler/optimizing/common_arm64.h
+++ b/compiler/optimizing/common_arm64.h
@@ -17,6 +17,7 @@
#ifndef ART_COMPILER_OPTIMIZING_COMMON_ARM64_H_
#define ART_COMPILER_OPTIMIZING_COMMON_ARM64_H_
+#include "base/macros.h"
#include "code_generator.h"
#include "instruction_simplifier_shared.h"
#include "locations.h"
@@ -31,7 +32,7 @@
#include "aarch64/simulator-aarch64.h"
#pragma GCC diagnostic pop
-namespace art {
+namespace art HIDDEN {
using helpers::CanFitInShifterOperand;
using helpers::HasShifterOperand;
@@ -153,7 +154,7 @@ inline vixl::aarch64::CPURegister InputCPURegisterOrZeroRegAt(HInstruction* inst
int index) {
HInstruction* input = instr->InputAt(index);
DataType::Type input_type = input->GetType();
- if (input->IsConstant() && input->AsConstant()->IsZeroBitPattern()) {
+ if (IsZeroBitPattern(input)) {
return (DataType::Size(input_type) >= vixl::aarch64::kXRegSizeInBytes)
? vixl::aarch64::Register(vixl::aarch64::xzr)
: vixl::aarch64::Register(vixl::aarch64::wzr);
@@ -314,7 +315,7 @@ inline Location ARM64EncodableConstantOrRegister(HInstruction* constant,
HInstruction* instr) {
if (constant->IsConstant()
&& Arm64CanEncodeConstantAsImmediate(constant->AsConstant(), instr)) {
- return Location::ConstantLocation(constant->AsConstant());
+ return Location::ConstantLocation(constant);
}
return Location::RequiresRegister();
@@ -380,10 +381,6 @@ inline bool ShifterOperandSupportsExtension(HInstruction* instruction) {
return instruction->IsAdd() || instruction->IsSub();
}
-inline bool IsConstantZeroBitPattern(const HInstruction* instruction) {
- return instruction->IsConstant() && instruction->AsConstant()->IsZeroBitPattern();
-}
-
} // namespace helpers
} // namespace arm64
} // namespace art
diff --git a/compiler/optimizing/common_dominator.h b/compiler/optimizing/common_dominator.h
index 9f012cfbb2..f01270ee4a 100644
--- a/compiler/optimizing/common_dominator.h
+++ b/compiler/optimizing/common_dominator.h
@@ -17,9 +17,10 @@
#ifndef ART_COMPILER_OPTIMIZING_COMMON_DOMINATOR_H_
#define ART_COMPILER_OPTIMIZING_COMMON_DOMINATOR_H_
+#include "base/macros.h"
#include "nodes.h"
-namespace art {
+namespace art HIDDEN {
// Helper class for finding common dominators of two or more blocks in a graph.
// The domination information of a graph must not be modified while there is
diff --git a/compiler/optimizing/constant_folding.cc b/compiler/optimizing/constant_folding.cc
index 2031707759..06d19e3f29 100644
--- a/compiler/optimizing/constant_folding.cc
+++ b/compiler/optimizing/constant_folding.cc
@@ -16,14 +16,20 @@
#include "constant_folding.h"
-namespace art {
+#include <algorithm>
+
+#include "dex/dex_file-inl.h"
+#include "optimizing/data_type.h"
+#include "optimizing/nodes.h"
+
+namespace art HIDDEN {
// This visitor tries to simplify instructions that can be evaluated
// as constants.
-class HConstantFoldingVisitor : public HGraphDelegateVisitor {
+class HConstantFoldingVisitor final : public HGraphDelegateVisitor {
public:
- explicit HConstantFoldingVisitor(HGraph* graph)
- : HGraphDelegateVisitor(graph) {}
+ HConstantFoldingVisitor(HGraph* graph, OptimizingCompilerStats* stats, bool use_all_optimizations)
+ : HGraphDelegateVisitor(graph, stats), use_all_optimizations_(use_all_optimizations) {}
private:
void VisitBasicBlock(HBasicBlock* block) override;
@@ -31,8 +37,15 @@ class HConstantFoldingVisitor : public HGraphDelegateVisitor {
void VisitUnaryOperation(HUnaryOperation* inst) override;
void VisitBinaryOperation(HBinaryOperation* inst) override;
- void VisitTypeConversion(HTypeConversion* inst) override;
+ void VisitArrayLength(HArrayLength* inst) override;
void VisitDivZeroCheck(HDivZeroCheck* inst) override;
+ void VisitIf(HIf* inst) override;
+ void VisitTypeConversion(HTypeConversion* inst) override;
+
+ void PropagateValue(HBasicBlock* starting_block, HInstruction* variable, HConstant* constant);
+
+ // Use all optimizations without restrictions.
+ bool use_all_optimizations_;
DISALLOW_COPY_AND_ASSIGN(HConstantFoldingVisitor);
};
@@ -55,6 +68,11 @@ class InstructionWithAbsorbingInputSimplifier : public HGraphVisitor {
void VisitBelow(HBelow* instruction) override;
void VisitBelowOrEqual(HBelowOrEqual* instruction) override;
+ void VisitGreaterThan(HGreaterThan* instruction) override;
+ void VisitGreaterThanOrEqual(HGreaterThanOrEqual* instruction) override;
+ void VisitLessThan(HLessThan* instruction) override;
+ void VisitLessThanOrEqual(HLessThanOrEqual* instruction) override;
+
void VisitAnd(HAnd* instruction) override;
void VisitCompare(HCompare* instruction) override;
void VisitMul(HMul* instruction) override;
@@ -69,7 +87,7 @@ class InstructionWithAbsorbingInputSimplifier : public HGraphVisitor {
bool HConstantFolding::Run() {
- HConstantFoldingVisitor visitor(graph_);
+ HConstantFoldingVisitor visitor(graph_, stats_, use_all_optimizations_);
// Process basic blocks in reverse post-order in the dominator tree,
// so that an instruction turned into a constant, used as input of
// another instruction, may possibly be used to turn that second
@@ -111,16 +129,6 @@ void HConstantFoldingVisitor::VisitBinaryOperation(HBinaryOperation* inst) {
}
}
-void HConstantFoldingVisitor::VisitTypeConversion(HTypeConversion* inst) {
- // Constant folding: replace `TypeConversion(a)' with a constant at
- // compile time if `a' is a constant.
- HConstant* constant = inst->TryStaticEvaluation();
- if (constant != nullptr) {
- inst->ReplaceWith(constant);
- inst->GetBlock()->RemoveInstruction(inst);
- }
-}
-
void HConstantFoldingVisitor::VisitDivZeroCheck(HDivZeroCheck* inst) {
// We can safely remove the check if the input is a non-null constant.
HInstruction* check_input = inst->InputAt(0);
@@ -130,6 +138,169 @@ void HConstantFoldingVisitor::VisitDivZeroCheck(HDivZeroCheck* inst) {
}
}
+void HConstantFoldingVisitor::PropagateValue(HBasicBlock* starting_block,
+ HInstruction* variable,
+ HConstant* constant) {
+ const bool recording_stats = stats_ != nullptr;
+ size_t uses_before = 0;
+ size_t uses_after = 0;
+ if (recording_stats) {
+ uses_before = variable->GetUses().SizeSlow();
+ }
+
+ if (variable->GetUses().HasExactlyOneElement()) {
+ // Nothing to do, since we only have the `if (variable)` use or the `condition` use.
+ return;
+ }
+
+ variable->ReplaceUsesDominatedBy(
+ starting_block->GetFirstInstruction(), constant, /* strictly_dominated= */ false);
+
+ if (recording_stats) {
+ uses_after = variable->GetUses().SizeSlow();
+ DCHECK_GE(uses_after, 1u) << "we must at least have the use in the if clause.";
+ DCHECK_GE(uses_before, uses_after);
+ MaybeRecordStat(stats_, MethodCompilationStat::kPropagatedIfValue, uses_before - uses_after);
+ }
+}
+
+void HConstantFoldingVisitor::VisitIf(HIf* inst) {
+ // This optimization can take a lot of compile time since we have a lot of If instructions in
+ // graphs.
+ if (!use_all_optimizations_) {
+ return;
+ }
+
+ // Consistency check: the true and false successors do not dominate each other.
+ DCHECK(!inst->IfTrueSuccessor()->Dominates(inst->IfFalseSuccessor()) &&
+ !inst->IfFalseSuccessor()->Dominates(inst->IfTrueSuccessor()));
+
+ HInstruction* if_input = inst->InputAt(0);
+
+ // Already a constant.
+ if (if_input->IsConstant()) {
+ return;
+ }
+
+ // if (variable) {
+ // SSA `variable` guaranteed to be true
+ // } else {
+ // and here false
+ // }
+ PropagateValue(inst->IfTrueSuccessor(), if_input, GetGraph()->GetIntConstant(1));
+ PropagateValue(inst->IfFalseSuccessor(), if_input, GetGraph()->GetIntConstant(0));
+
+ // If the input is a condition, we can propagate the information of the condition itself.
+ if (!if_input->IsCondition()) {
+ return;
+ }
+ HCondition* condition = if_input->AsCondition();
+
+ // We want either `==` or `!=`, since we cannot make assumptions for other conditions e.g. `>`
+ if (!condition->IsEqual() && !condition->IsNotEqual()) {
+ return;
+ }
+
+ HInstruction* left = condition->GetLeft();
+ HInstruction* right = condition->GetRight();
+
+ // We want one of them to be a constant and not the other.
+ if (left->IsConstant() == right->IsConstant()) {
+ return;
+ }
+
+ // At this point we have something like:
+ // if (variable == constant) {
+ // SSA `variable` guaranteed to be equal to constant here
+ // } else {
+ // No guarantees can be made here (usually, see boolean case below).
+ // }
+ // Similarly with variable != constant, except that we can make guarantees in the else case.
+
+ HConstant* constant = left->IsConstant() ? left->AsConstant() : right->AsConstant();
+ HInstruction* variable = left->IsConstant() ? right : left;
+
+ // Don't deal with floats/doubles since they bring a lot of edge cases e.g.
+ // if (f == 0.0f) {
+ // // f is not really guaranteed to be 0.0f. It could be -0.0f, for example
+ // }
+ if (DataType::IsFloatingPointType(variable->GetType())) {
+ return;
+ }
+ DCHECK(!DataType::IsFloatingPointType(constant->GetType()));
+
+ // Sometimes we have an HCompare flowing into an Equals/NonEquals, which can act as a proxy. For
+ // example: `Equals(Compare(var, constant), 0)`. This is common for long, float, and double.
+ if (variable->IsCompare()) {
+ // We only care about equality comparisons so we skip if it is a less or greater comparison.
+ if (!constant->IsArithmeticZero()) {
+ return;
+ }
+
+ // Update left and right to be the ones from the HCompare.
+ left = variable->AsCompare()->GetLeft();
+ right = variable->AsCompare()->GetRight();
+
+ // Re-check that one of them to be a constant and not the other.
+ if (left->IsConstant() == right->IsConstant()) {
+ return;
+ }
+
+ constant = left->IsConstant() ? left->AsConstant() : right->AsConstant();
+ variable = left->IsConstant() ? right : left;
+
+ // Re-check floating point values.
+ if (DataType::IsFloatingPointType(variable->GetType())) {
+ return;
+ }
+ DCHECK(!DataType::IsFloatingPointType(constant->GetType()));
+ }
+
+ // From this block forward we want to replace the SSA value. We use `starting_block` and not the
+ // `if` block as we want to update one of the branches but not the other.
+ HBasicBlock* starting_block =
+ condition->IsEqual() ? inst->IfTrueSuccessor() : inst->IfFalseSuccessor();
+
+ PropagateValue(starting_block, variable, constant);
+
+ // Special case for booleans since they have only two values so we know what to propagate in the
+ // other branch. However, sometimes our boolean values are not compared to 0 or 1. In those cases
+ // we cannot make an assumption for the `else` branch.
+ if (variable->GetType() == DataType::Type::kBool &&
+ constant->IsIntConstant() &&
+ (constant->AsIntConstant()->IsTrue() || constant->AsIntConstant()->IsFalse())) {
+ HBasicBlock* other_starting_block =
+ condition->IsEqual() ? inst->IfFalseSuccessor() : inst->IfTrueSuccessor();
+ DCHECK_NE(other_starting_block, starting_block);
+
+ HConstant* other_constant = constant->AsIntConstant()->IsTrue() ?
+ GetGraph()->GetIntConstant(0) :
+ GetGraph()->GetIntConstant(1);
+ DCHECK_NE(other_constant, constant);
+ PropagateValue(other_starting_block, variable, other_constant);
+ }
+}
+
+void HConstantFoldingVisitor::VisitArrayLength(HArrayLength* inst) {
+ HInstruction* input = inst->InputAt(0);
+ if (input->IsLoadString()) {
+ DCHECK(inst->IsStringLength());
+ HLoadString* load_string = input->AsLoadString();
+ const DexFile& dex_file = load_string->GetDexFile();
+ const dex::StringId& string_id = dex_file.GetStringId(load_string->GetStringIndex());
+ inst->ReplaceWith(GetGraph()->GetIntConstant(dex_file.GetStringLength(string_id)));
+ }
+}
+
+void HConstantFoldingVisitor::VisitTypeConversion(HTypeConversion* inst) {
+ // Constant folding: replace `TypeConversion(a)' with a constant at
+ // compile time if `a' is a constant.
+ HConstant* constant = inst->TryStaticEvaluation();
+ if (constant != nullptr) {
+ inst->ReplaceWith(constant);
+ inst->GetBlock()->RemoveInstruction(inst);
+ }
+}
void InstructionWithAbsorbingInputSimplifier::VisitShift(HBinaryOperation* instruction) {
DCHECK(instruction->IsShl() || instruction->IsShr() || instruction->IsUShr());
@@ -145,8 +316,17 @@ void InstructionWithAbsorbingInputSimplifier::VisitShift(HBinaryOperation* instr
}
void InstructionWithAbsorbingInputSimplifier::VisitEqual(HEqual* instruction) {
- if ((instruction->GetLeft()->IsNullConstant() && !instruction->GetRight()->CanBeNull()) ||
- (instruction->GetRight()->IsNullConstant() && !instruction->GetLeft()->CanBeNull())) {
+ if (instruction->GetLeft() == instruction->GetRight() &&
+ !DataType::IsFloatingPointType(instruction->GetLeft()->GetType())) {
+ // Replace code looking like
+ // EQUAL lhs, lhs
+ // CONSTANT true
+ // We don't perform this optimizations for FP types since Double.NaN != Double.NaN, which is the
+ // opposite value.
+ instruction->ReplaceWith(GetGraph()->GetConstant(DataType::Type::kBool, 1));
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ } else if ((instruction->GetLeft()->IsNullConstant() && !instruction->GetRight()->CanBeNull()) ||
+ (instruction->GetRight()->IsNullConstant() && !instruction->GetLeft()->CanBeNull())) {
// Replace code looking like
// EQUAL lhs, null
// where lhs cannot be null with
@@ -157,8 +337,17 @@ void InstructionWithAbsorbingInputSimplifier::VisitEqual(HEqual* instruction) {
}
void InstructionWithAbsorbingInputSimplifier::VisitNotEqual(HNotEqual* instruction) {
- if ((instruction->GetLeft()->IsNullConstant() && !instruction->GetRight()->CanBeNull()) ||
- (instruction->GetRight()->IsNullConstant() && !instruction->GetLeft()->CanBeNull())) {
+ if (instruction->GetLeft() == instruction->GetRight() &&
+ !DataType::IsFloatingPointType(instruction->GetLeft()->GetType())) {
+ // Replace code looking like
+ // NOT_EQUAL lhs, lhs
+ // CONSTANT false
+ // We don't perform this optimizations for FP types since Double.NaN != Double.NaN, which is the
+ // opposite value.
+ instruction->ReplaceWith(GetGraph()->GetConstant(DataType::Type::kBool, 0));
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ } else if ((instruction->GetLeft()->IsNullConstant() && !instruction->GetRight()->CanBeNull()) ||
+ (instruction->GetRight()->IsNullConstant() && !instruction->GetLeft()->CanBeNull())) {
// Replace code looking like
// NOT_EQUAL lhs, null
// where lhs cannot be null with
@@ -169,8 +358,14 @@ void InstructionWithAbsorbingInputSimplifier::VisitNotEqual(HNotEqual* instructi
}
void InstructionWithAbsorbingInputSimplifier::VisitAbove(HAbove* instruction) {
- if (instruction->GetLeft()->IsConstant() &&
- instruction->GetLeft()->AsConstant()->IsArithmeticZero()) {
+ if (instruction->GetLeft() == instruction->GetRight()) {
+ // Replace code looking like
+ // ABOVE lhs, lhs
+ // CONSTANT false
+ instruction->ReplaceWith(GetGraph()->GetConstant(DataType::Type::kBool, 0));
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ } else if (instruction->GetLeft()->IsConstant() &&
+ instruction->GetLeft()->AsConstant()->IsArithmeticZero()) {
// Replace code looking like
// ABOVE dst, 0, src // unsigned 0 > src is always false
// with
@@ -181,8 +376,14 @@ void InstructionWithAbsorbingInputSimplifier::VisitAbove(HAbove* instruction) {
}
void InstructionWithAbsorbingInputSimplifier::VisitAboveOrEqual(HAboveOrEqual* instruction) {
- if (instruction->GetRight()->IsConstant() &&
- instruction->GetRight()->AsConstant()->IsArithmeticZero()) {
+ if (instruction->GetLeft() == instruction->GetRight()) {
+ // Replace code looking like
+ // ABOVE_OR_EQUAL lhs, lhs
+ // CONSTANT true
+ instruction->ReplaceWith(GetGraph()->GetConstant(DataType::Type::kBool, 1));
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ } else if (instruction->GetRight()->IsConstant() &&
+ instruction->GetRight()->AsConstant()->IsArithmeticZero()) {
// Replace code looking like
// ABOVE_OR_EQUAL dst, src, 0 // unsigned src >= 0 is always true
// with
@@ -193,8 +394,14 @@ void InstructionWithAbsorbingInputSimplifier::VisitAboveOrEqual(HAboveOrEqual* i
}
void InstructionWithAbsorbingInputSimplifier::VisitBelow(HBelow* instruction) {
- if (instruction->GetRight()->IsConstant() &&
- instruction->GetRight()->AsConstant()->IsArithmeticZero()) {
+ if (instruction->GetLeft() == instruction->GetRight()) {
+ // Replace code looking like
+ // BELOW lhs, lhs
+ // CONSTANT false
+ instruction->ReplaceWith(GetGraph()->GetConstant(DataType::Type::kBool, 0));
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ } else if (instruction->GetRight()->IsConstant() &&
+ instruction->GetRight()->AsConstant()->IsArithmeticZero()) {
// Replace code looking like
// BELOW dst, src, 0 // unsigned src < 0 is always false
// with
@@ -205,8 +412,14 @@ void InstructionWithAbsorbingInputSimplifier::VisitBelow(HBelow* instruction) {
}
void InstructionWithAbsorbingInputSimplifier::VisitBelowOrEqual(HBelowOrEqual* instruction) {
- if (instruction->GetLeft()->IsConstant() &&
- instruction->GetLeft()->AsConstant()->IsArithmeticZero()) {
+ if (instruction->GetLeft() == instruction->GetRight()) {
+ // Replace code looking like
+ // BELOW_OR_EQUAL lhs, lhs
+ // CONSTANT true
+ instruction->ReplaceWith(GetGraph()->GetConstant(DataType::Type::kBool, 1));
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ } else if (instruction->GetLeft()->IsConstant() &&
+ instruction->GetLeft()->AsConstant()->IsArithmeticZero()) {
// Replace code looking like
// BELOW_OR_EQUAL dst, 0, src // unsigned 0 <= src is always true
// with
@@ -216,6 +429,55 @@ void InstructionWithAbsorbingInputSimplifier::VisitBelowOrEqual(HBelowOrEqual* i
}
}
+void InstructionWithAbsorbingInputSimplifier::VisitGreaterThan(HGreaterThan* instruction) {
+ if (instruction->GetLeft() == instruction->GetRight() &&
+ (!DataType::IsFloatingPointType(instruction->GetLeft()->GetType()) ||
+ instruction->IsLtBias())) {
+ // Replace code looking like
+ // GREATER_THAN lhs, lhs
+ // CONSTANT false
+ instruction->ReplaceWith(GetGraph()->GetConstant(DataType::Type::kBool, 0));
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ }
+}
+
+void InstructionWithAbsorbingInputSimplifier::VisitGreaterThanOrEqual(
+ HGreaterThanOrEqual* instruction) {
+ if (instruction->GetLeft() == instruction->GetRight() &&
+ (!DataType::IsFloatingPointType(instruction->GetLeft()->GetType()) ||
+ instruction->IsGtBias())) {
+ // Replace code looking like
+ // GREATER_THAN_OR_EQUAL lhs, lhs
+ // CONSTANT true
+ instruction->ReplaceWith(GetGraph()->GetConstant(DataType::Type::kBool, 1));
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ }
+}
+
+void InstructionWithAbsorbingInputSimplifier::VisitLessThan(HLessThan* instruction) {
+ if (instruction->GetLeft() == instruction->GetRight() &&
+ (!DataType::IsFloatingPointType(instruction->GetLeft()->GetType()) ||
+ instruction->IsGtBias())) {
+ // Replace code looking like
+ // LESS_THAN lhs, lhs
+ // CONSTANT false
+ instruction->ReplaceWith(GetGraph()->GetConstant(DataType::Type::kBool, 0));
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ }
+}
+
+void InstructionWithAbsorbingInputSimplifier::VisitLessThanOrEqual(HLessThanOrEqual* instruction) {
+ if (instruction->GetLeft() == instruction->GetRight() &&
+ (!DataType::IsFloatingPointType(instruction->GetLeft()->GetType()) ||
+ instruction->IsLtBias())) {
+ // Replace code looking like
+ // LESS_THAN_OR_EQUAL lhs, lhs
+ // CONSTANT true
+ instruction->ReplaceWith(GetGraph()->GetConstant(DataType::Type::kBool, 1));
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ }
+}
+
void InstructionWithAbsorbingInputSimplifier::VisitAnd(HAnd* instruction) {
DataType::Type type = instruction->GetType();
HConstant* input_cst = instruction->GetConstantRight();
diff --git a/compiler/optimizing/constant_folding.h b/compiler/optimizing/constant_folding.h
index 72bd95b3cb..29648e907c 100644
--- a/compiler/optimizing/constant_folding.h
+++ b/compiler/optimizing/constant_folding.h
@@ -17,10 +17,12 @@
#ifndef ART_COMPILER_OPTIMIZING_CONSTANT_FOLDING_H_
#define ART_COMPILER_OPTIMIZING_CONSTANT_FOLDING_H_
+#include "base/macros.h"
#include "nodes.h"
#include "optimization.h"
+#include "optimizing/optimizing_compiler_stats.h"
-namespace art {
+namespace art HIDDEN {
/**
* Optimization pass performing a simple constant-expression
@@ -39,13 +41,20 @@ namespace art {
*/
class HConstantFolding : public HOptimization {
public:
- HConstantFolding(HGraph* graph, const char* name) : HOptimization(graph, name) {}
+ HConstantFolding(HGraph* graph,
+ OptimizingCompilerStats* stats = nullptr,
+ const char* name = kConstantFoldingPassName,
+ bool use_all_optimizations = false)
+ : HOptimization(graph, name, stats), use_all_optimizations_(use_all_optimizations) {}
bool Run() override;
static constexpr const char* kConstantFoldingPassName = "constant_folding";
private:
+ // Use all optimizations without restrictions.
+ bool use_all_optimizations_;
+
DISALLOW_COPY_AND_ASSIGN(HConstantFolding);
};
diff --git a/compiler/optimizing/constant_folding_test.cc b/compiler/optimizing/constant_folding_test.cc
index 74d9d3a993..741fd3f822 100644
--- a/compiler/optimizing/constant_folding_test.cc
+++ b/compiler/optimizing/constant_folding_test.cc
@@ -17,6 +17,8 @@
#include <functional>
#include "constant_folding.h"
+
+#include "base/macros.h"
#include "dead_code_elimination.h"
#include "driver/compiler_options.h"
#include "graph_checker.h"
@@ -25,12 +27,12 @@
#include "gtest/gtest.h"
-namespace art {
+namespace art HIDDEN {
/**
* Fixture class for the constant folding and dce tests.
*/
-class ConstantFoldingTest : public OptimizingUnitTest {
+class ConstantFoldingTest : public CommonCompilerTest, public OptimizingUnitTestHelper {
public:
ConstantFoldingTest() : graph_(nullptr) { }
@@ -58,7 +60,9 @@ class ConstantFoldingTest : public OptimizingUnitTest {
std::string actual_before = printer_before.str();
EXPECT_EQ(expected_before, actual_before);
- HConstantFolding(graph_, "constant_folding").Run();
+ HConstantFolding constant_folding(
+ graph_, /* stats= */ nullptr, "constant_folding", /* use_all_optimizations= */ true);
+ constant_folding.Run();
GraphChecker graph_checker_cf(graph_);
graph_checker_cf.Run();
ASSERT_TRUE(graph_checker_cf.IsValid());
diff --git a/compiler/optimizing/constructor_fence_redundancy_elimination.cc b/compiler/optimizing/constructor_fence_redundancy_elimination.cc
index 3a1a9e023d..d9b7652f32 100644
--- a/compiler/optimizing/constructor_fence_redundancy_elimination.cc
+++ b/compiler/optimizing/constructor_fence_redundancy_elimination.cc
@@ -20,12 +20,12 @@
#include "base/scoped_arena_allocator.h"
#include "base/scoped_arena_containers.h"
-namespace art {
+namespace art HIDDEN {
static constexpr bool kCfreLogFenceInputCount = false;
// TODO: refactor this code by reusing escape analysis.
-class CFREVisitor : public HGraphVisitor {
+class CFREVisitor final : public HGraphVisitor {
public:
CFREVisitor(HGraph* graph, OptimizingCompilerStats* stats)
: HGraphVisitor(graph),
@@ -147,16 +147,6 @@ class CFREVisitor : public HGraphVisitor {
void VisitAlias(HInstruction* aliasing_inst) {
// An object is considered "published" if it becomes aliased by other instructions.
if (HasInterestingPublishTargetAsInput(aliasing_inst)) {
- // Note that constructing a "NullCheck" for new-instance, new-array,
- // or a 'this' (receiver) reference is impossible.
- //
- // If by some reason we actually encounter such a NullCheck(FenceTarget),
- // we LOG(WARNING).
- if (UNLIKELY(aliasing_inst->IsNullCheck())) {
- LOG(kIsDebugBuild ? FATAL : WARNING)
- << "Unexpected instruction: NullCheck; should not be legal in graph";
- // We then do a best-effort to handle this case.
- }
MergeCandidateFences();
}
}
diff --git a/compiler/optimizing/constructor_fence_redundancy_elimination.h b/compiler/optimizing/constructor_fence_redundancy_elimination.h
index 014b342258..e04b986171 100644
--- a/compiler/optimizing/constructor_fence_redundancy_elimination.h
+++ b/compiler/optimizing/constructor_fence_redundancy_elimination.h
@@ -17,9 +17,10 @@
#ifndef ART_COMPILER_OPTIMIZING_CONSTRUCTOR_FENCE_REDUNDANCY_ELIMINATION_H_
#define ART_COMPILER_OPTIMIZING_CONSTRUCTOR_FENCE_REDUNDANCY_ELIMINATION_H_
+#include "base/macros.h"
#include "optimization.h"
-namespace art {
+namespace art HIDDEN {
/*
* Constructor Fence Redundancy Elimination (CFRE).
diff --git a/compiler/optimizing/critical_native_abi_fixup_arm.cc b/compiler/optimizing/critical_native_abi_fixup_arm.cc
index 3c4db4bca7..77e156608b 100644
--- a/compiler/optimizing/critical_native_abi_fixup_arm.cc
+++ b/compiler/optimizing/critical_native_abi_fixup_arm.cc
@@ -23,7 +23,7 @@
#include "scoped_thread_state_change-inl.h"
#include "well_known_classes.h"
-namespace art {
+namespace art HIDDEN {
namespace arm {
// Fix up FP arguments passed in core registers for call to @CriticalNative by inserting fake calls
@@ -45,9 +45,9 @@ static void FixUpArguments(HInvokeStaticOrDirect* invoke) {
if (DataType::IsFloatingPointType(input_type)) {
bool is_double = (input_type == DataType::Type::kFloat64);
DataType::Type converted_type = is_double ? DataType::Type::kInt64 : DataType::Type::kInt32;
- jmethodID known_method = is_double ? WellKnownClasses::java_lang_Double_doubleToRawLongBits
- : WellKnownClasses::java_lang_Float_floatToRawIntBits;
- ArtMethod* resolved_method = jni::DecodeArtMethod(known_method);
+ ArtMethod* resolved_method = is_double
+ ? WellKnownClasses::java_lang_Double_doubleToRawLongBits
+ : WellKnownClasses::java_lang_Float_floatToRawIntBits;
DCHECK(resolved_method != nullptr);
DCHECK(resolved_method->IsIntrinsic());
MethodReference target_method(nullptr, 0);
@@ -74,7 +74,8 @@ static void FixUpArguments(HInvokeStaticOrDirect* invoke) {
dispatch_info,
kStatic,
target_method,
- HInvokeStaticOrDirect::ClinitCheckRequirement::kNone);
+ HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
+ !block->GetGraph()->IsDebuggable());
// The intrinsic has no side effects and does not need environment or dex cache on ARM.
new_input->SetSideEffects(SideEffects::None());
IntrinsicOptimizations opt(new_input);
diff --git a/compiler/optimizing/critical_native_abi_fixup_arm.h b/compiler/optimizing/critical_native_abi_fixup_arm.h
index faa3c7a5fe..c2068f5e2d 100644
--- a/compiler/optimizing/critical_native_abi_fixup_arm.h
+++ b/compiler/optimizing/critical_native_abi_fixup_arm.h
@@ -17,10 +17,11 @@
#ifndef ART_COMPILER_OPTIMIZING_CRITICAL_NATIVE_ABI_FIXUP_ARM_H_
#define ART_COMPILER_OPTIMIZING_CRITICAL_NATIVE_ABI_FIXUP_ARM_H_
+#include "base/macros.h"
#include "nodes.h"
#include "optimization.h"
-namespace art {
+namespace art HIDDEN {
namespace arm {
class CriticalNativeAbiFixupArm : public HOptimization {
diff --git a/compiler/optimizing/data_type-inl.h b/compiler/optimizing/data_type-inl.h
index 1b33b775da..bbfe90451b 100644
--- a/compiler/optimizing/data_type-inl.h
+++ b/compiler/optimizing/data_type-inl.h
@@ -20,7 +20,7 @@
#include "data_type.h"
#include "dex/primitive.h"
-namespace art {
+namespace art HIDDEN {
// Note: Not declared in data_type.h to avoid pulling in "primitive.h".
constexpr DataType::Type DataTypeFromPrimitive(Primitive::Type type) {
diff --git a/compiler/optimizing/data_type.cc b/compiler/optimizing/data_type.cc
index cb354f46cc..183cf2c622 100644
--- a/compiler/optimizing/data_type.cc
+++ b/compiler/optimizing/data_type.cc
@@ -16,7 +16,7 @@
#include "data_type.h"
-namespace art {
+namespace art HIDDEN {
static const char* kTypeNames[] = {
"Reference",
diff --git a/compiler/optimizing/data_type.h b/compiler/optimizing/data_type.h
index ec6ca7accb..b6d9519150 100644
--- a/compiler/optimizing/data_type.h
+++ b/compiler/optimizing/data_type.h
@@ -22,8 +22,9 @@
#include <android-base/logging.h>
#include "base/bit_utils.h"
+#include "base/macros.h"
-namespace art {
+namespace art HIDDEN {
class DataType {
public:
diff --git a/compiler/optimizing/data_type_test.cc b/compiler/optimizing/data_type_test.cc
index 8fea22bce8..f6f614d8c4 100644
--- a/compiler/optimizing/data_type_test.cc
+++ b/compiler/optimizing/data_type_test.cc
@@ -22,7 +22,7 @@
#include "base/macros.h"
#include "dex/primitive.h"
-namespace art {
+namespace art HIDDEN {
template <DataType::Type data_type, Primitive::Type primitive_type>
static void CheckConversion() {
diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc
index d808f2ca3a..cf49e39849 100644
--- a/compiler/optimizing/dead_code_elimination.cc
+++ b/compiler/optimizing/dead_code_elimination.cc
@@ -16,14 +16,17 @@
#include "dead_code_elimination.h"
+#include "android-base/logging.h"
#include "base/array_ref.h"
#include "base/bit_vector-inl.h"
+#include "base/logging.h"
#include "base/scoped_arena_allocator.h"
#include "base/scoped_arena_containers.h"
#include "base/stl_util.h"
+#include "optimizing/nodes.h"
#include "ssa_phi_elimination.h"
-namespace art {
+namespace art HIDDEN {
static void MarkReachableBlocks(HGraph* graph, ArenaBitVector* visited) {
// Use local allocator for allocating memory.
@@ -178,6 +181,13 @@ static bool RemoveNonNullControlDependences(HBasicBlock* block, HBasicBlock* thr
} else if (!cond->InputAt(0)->IsNullConstant()) {
return false;
}
+
+ // We can't create a BoundType for an object with an invalid RTI.
+ const ReferenceTypeInfo ti = obj->GetReferenceTypeInfo();
+ if (!ti.IsValid()) {
+ return false;
+ }
+
// Scan all uses of obj and find null check under control dependence.
HBoundType* bound = nullptr;
const HUseList<HInstruction*>& uses = obj->GetUses();
@@ -190,7 +200,6 @@ static bool RemoveNonNullControlDependences(HBasicBlock* block, HBasicBlock* thr
user_block != throws &&
block->Dominates(user_block)) {
if (bound == nullptr) {
- ReferenceTypeInfo ti = obj->GetReferenceTypeInfo();
bound = new (obj->GetBlock()->GetGraph()->GetAllocator()) HBoundType(obj);
bound->SetUpperBound(ti, /*can_be_null*/ false);
bound->SetReferenceTypeInfo(ti);
@@ -213,6 +222,9 @@ static bool RemoveNonNullControlDependences(HBasicBlock* block, HBasicBlock* thr
// | ...
// | instr_n
// | foo() // always throws
+// | instr_n+2
+// | ...
+// | instr_n+m
// \ goto B2
// \ /
// B2
@@ -230,11 +242,14 @@ static bool RemoveNonNullControlDependences(HBasicBlock* block, HBasicBlock* thr
// B2 Exit
//
// Rationale:
-// Removal of the never taken edge to B2 may expose
-// other optimization opportunities, such as code sinking.
+// Removal of the never taken edge to B2 may expose other optimization opportunities, such as code
+// sinking.
+//
+// Note: The example above is a simple one that uses a `goto` but we could end the block with an If,
+// for example.
bool HDeadCodeElimination::SimplifyAlwaysThrows() {
HBasicBlock* exit = graph_->GetExitBlock();
- if (exit == nullptr) {
+ if (!graph_->HasAlwaysThrowingInvokes() || exit == nullptr) {
return false;
}
@@ -242,54 +257,55 @@ bool HDeadCodeElimination::SimplifyAlwaysThrows() {
// Order does not matter, just pick one.
for (HBasicBlock* block : graph_->GetReversePostOrder()) {
- if (block->GetTryCatchInformation() != nullptr) {
+ if (block->IsTryBlock()) {
// We don't want to perform the simplify always throws optimizations for throws inside of
- // tries since those throws might not go to the exit block. We do that by checking the
- // TryCatchInformation of the blocks.
- //
- // As a special case the `catch_block` is the first block of the catch and it has
- // TryCatchInformation. Other blocks in the catch don't have try catch information (as long as
- // they are not part of an outer try). Knowing if a `catch_block` is part of an outer try is
- // possible by checking its successors, but other restrictions of the simplify always throws
- // optimization will block `catch_block` nevertheless (e.g. only one predecessor) so it is not
- // worth the effort.
-
- // TODO(solanes): Maybe we can do a `goto catch` if inside of a try catch instead of going to
- // the exit. If we do so, we have to take into account that we should go to the nearest valid
- // catch i.e. one that would accept our exception type.
+ // tries since those throws might not go to the exit block.
continue;
}
- HInstruction* last = block->GetLastInstruction();
- HInstruction* prev = last->GetPrevious();
- if (prev == nullptr) {
- DCHECK_EQ(block->GetFirstInstruction(), block->GetLastInstruction());
+ // We iterate to find the first instruction that always throws. If two instructions always
+ // throw, the first one will throw and the second one will never be reached.
+ HInstruction* throwing_invoke = nullptr;
+ for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+ if (it.Current()->IsInvoke() && it.Current()->AsInvoke()->AlwaysThrows()) {
+ throwing_invoke = it.Current();
+ break;
+ }
+ }
+
+ if (throwing_invoke == nullptr) {
+ // No always-throwing instruction found. Continue with the rest of the blocks.
continue;
}
- if (prev->AlwaysThrows() &&
- last->IsGoto() &&
- block->GetPhis().IsEmpty() &&
- block->GetPredecessors().size() == 1u) {
- HBasicBlock* pred = block->GetSinglePredecessor();
- HBasicBlock* succ = block->GetSingleSuccessor();
- // Ensure no computations are merged through throwing block.
- // This does not prevent the optimization per se, but would
- // require an elaborate clean up of the SSA graph.
- if (succ != exit &&
- !block->Dominates(pred) &&
- pred->Dominates(succ) &&
- succ->GetPredecessors().size() > 1u &&
- succ->GetPhis().IsEmpty()) {
- block->ReplaceSuccessor(succ, exit);
- rerun_dominance_and_loop_analysis = true;
- MaybeRecordStat(stats_, MethodCompilationStat::kSimplifyThrowingInvoke);
- // Perform a quick follow up optimization on object != null control dependences
- // that is much cheaper to perform now than in a later phase.
- if (RemoveNonNullControlDependences(pred, block)) {
- MaybeRecordStat(stats_, MethodCompilationStat::kRemovedNullCheck);
- }
- }
+ // If we are already pointing at the exit block we could still remove the instructions
+ // between the always throwing instruction, and the exit block. If we have no other
+ // instructions, just continue since there's nothing to do.
+ if (block->GetSuccessors().size() == 1 &&
+ block->GetSingleSuccessor() == exit &&
+ block->GetLastInstruction()->GetPrevious() == throwing_invoke) {
+ continue;
+ }
+
+ // We split the block at the throwing instruction, and the instructions after the throwing
+ // instructions will be disconnected from the graph after `block` points to the exit.
+ // `RemoveDeadBlocks` will take care of removing this new block and its instructions.
+ // Even though `SplitBefore` doesn't guarantee the graph to remain in SSA form, it is fine
+ // since we do not break it.
+ HBasicBlock* new_block = block->SplitBefore(throwing_invoke->GetNext(),
+ /* require_graph_not_in_ssa_form= */ false);
+ DCHECK_EQ(block->GetSingleSuccessor(), new_block);
+ block->ReplaceSuccessor(new_block, exit);
+
+ rerun_dominance_and_loop_analysis = true;
+ MaybeRecordStat(stats_, MethodCompilationStat::kSimplifyThrowingInvoke);
+ // Perform a quick follow up optimization on object != null control dependences
+ // that is much cheaper to perform now than in a later phase.
+ // If there are multiple predecessors, none may end with a HIf as required in
+ // RemoveNonNullControlDependences because we split critical edges.
+ if (block->GetPredecessors().size() == 1u &&
+ RemoveNonNullControlDependences(block->GetSinglePredecessor(), block)) {
+ MaybeRecordStat(stats_, MethodCompilationStat::kRemovedNullCheck);
}
}
@@ -303,54 +319,45 @@ bool HDeadCodeElimination::SimplifyAlwaysThrows() {
return false;
}
-// Simplify the pattern:
-//
-// B1 B2 ...
-// goto goto goto
-// \ | /
-// \ | /
-// B3
-// i1 = phi(input, input)
-// (i2 = condition on i1)
-// if i1 (or i2)
-// / \
-// / \
-// B4 B5
-//
-// Into:
-//
-// B1 B2 ...
-// | | |
-// B4 B5 B?
-//
-// Note that individual edges can be redirected (for example B2->B3
-// can be redirected as B2->B5) without applying this optimization
-// to other incoming edges.
-//
-// This simplification cannot be applied to catch blocks, because
-// exception handler edges do not represent normal control flow.
-// Though in theory this could still apply to normal control flow
-// going directly to a catch block, we cannot support it at the
-// moment because the catch Phi's inputs do not correspond to the
-// catch block's predecessors, so we cannot identify which
-// predecessor corresponds to a given statically evaluated input.
-//
-// We do not apply this optimization to loop headers as this could
-// create irreducible loops. We rely on the suspend check in the
-// loop header to prevent the pattern match.
-//
-// Note that we rely on the dead code elimination to get rid of B3.
bool HDeadCodeElimination::SimplifyIfs() {
bool simplified_one_or_more_ifs = false;
bool rerun_dominance_and_loop_analysis = false;
- for (HBasicBlock* block : graph_->GetReversePostOrder()) {
+ // Iterating in PostOrder it's better for MaybeAddPhi as it can add a Phi for multiple If
+ // instructions in a chain without updating the dominator chain. The branch redirection itself can
+ // work in PostOrder or ReversePostOrder without issues.
+ for (HBasicBlock* block : graph_->GetPostOrder()) {
+ if (block->IsCatchBlock()) {
+ // This simplification cannot be applied to catch blocks, because exception handler edges do
+ // not represent normal control flow. Though in theory this could still apply to normal
+ // control flow going directly to a catch block, we cannot support it at the moment because
+ // the catch Phi's inputs do not correspond to the catch block's predecessors, so we cannot
+ // identify which predecessor corresponds to a given statically evaluated input.
+ continue;
+ }
+
HInstruction* last = block->GetLastInstruction();
- HInstruction* first = block->GetFirstInstruction();
- if (!block->IsCatchBlock() &&
- last->IsIf() &&
- block->HasSinglePhi() &&
+ if (!last->IsIf()) {
+ continue;
+ }
+
+ if (block->IsLoopHeader()) {
+ // We do not apply this optimization to loop headers as this could create irreducible loops.
+ continue;
+ }
+
+ // We will add a Phi which allows the simplification to take place in cases where it wouldn't.
+ MaybeAddPhi(block);
+
+ // TODO(solanes): Investigate support for multiple phis in `block`. We can potentially "push
+ // downwards" existing Phis into the true/false branches. For example, let's say we have another
+ // Phi: Phi(x1,x2,x3,x4,x5,x6). This could turn into Phi(x1,x2) in the true branch, Phi(x3,x4)
+ // in the false branch, and remain as Phi(x5,x6) in `block` (for edges that we couldn't
+ // redirect). We might even be able to remove some phis altogether as they will have only one
+ // value.
+ if (block->HasSinglePhi() &&
block->GetFirstPhi()->HasOnlyOneNonEnvironmentUse()) {
+ HInstruction* first = block->GetFirstInstruction();
bool has_only_phi_and_if = (last == first) && (last->InputAt(0) == block->GetFirstPhi());
bool has_only_phi_condition_and_if =
!has_only_phi_and_if &&
@@ -361,7 +368,6 @@ bool HDeadCodeElimination::SimplifyIfs() {
first->HasOnlyOneNonEnvironmentUse();
if (has_only_phi_and_if || has_only_phi_condition_and_if) {
- DCHECK(!block->IsLoopHeader());
HPhi* phi = block->GetFirstPhi()->AsPhi();
bool phi_input_is_left = (first->InputAt(0) == phi);
@@ -446,6 +452,125 @@ bool HDeadCodeElimination::SimplifyIfs() {
return simplified_one_or_more_ifs;
}
+void HDeadCodeElimination::MaybeAddPhi(HBasicBlock* block) {
+ DCHECK(block->GetLastInstruction()->IsIf());
+ HIf* if_instruction = block->GetLastInstruction()->AsIf();
+ if (if_instruction->InputAt(0)->IsConstant()) {
+ // Constant values are handled in RemoveDeadBlocks.
+ return;
+ }
+
+ if (block->GetNumberOfPredecessors() < 2u) {
+ // Nothing to redirect.
+ return;
+ }
+
+ if (!block->GetPhis().IsEmpty()) {
+ // SimplifyIf doesn't currently work with multiple phis. Adding a phi here won't help that
+ // optimization.
+ return;
+ }
+
+ HBasicBlock* dominator = block->GetDominator();
+ if (!dominator->EndsWithIf()) {
+ return;
+ }
+
+ HInstruction* input = if_instruction->InputAt(0);
+ HInstruction* dominator_input = dominator->GetLastInstruction()->AsIf()->InputAt(0);
+ const bool same_input = dominator_input == input;
+ if (!same_input) {
+ // Try to see if the dominator has the opposite input (e.g. if(cond) and if(!cond)). If that's
+ // the case, we can perform the optimization with the false and true branches reversed.
+ if (!dominator_input->IsCondition() || !input->IsCondition()) {
+ return;
+ }
+
+ HCondition* block_cond = input->AsCondition();
+ HCondition* dominator_cond = dominator_input->AsCondition();
+
+ if (block_cond->GetLeft() != dominator_cond->GetLeft() ||
+ block_cond->GetRight() != dominator_cond->GetRight() ||
+ block_cond->GetOppositeCondition() != dominator_cond->GetCondition()) {
+ return;
+ }
+ }
+
+ if (kIsDebugBuild) {
+ // `block`'s successors should have only one predecessor. Otherwise, we have a critical edge in
+ // the graph.
+ for (HBasicBlock* succ : block->GetSuccessors()) {
+ DCHECK_EQ(succ->GetNumberOfPredecessors(), 1u);
+ }
+ }
+
+ const size_t pred_size = block->GetNumberOfPredecessors();
+ HPhi* new_phi = new (graph_->GetAllocator())
+ HPhi(graph_->GetAllocator(), kNoRegNumber, pred_size, DataType::Type::kInt32);
+
+ for (size_t index = 0; index < pred_size; index++) {
+ HBasicBlock* pred = block->GetPredecessors()[index];
+ const bool dominated_by_true =
+ dominator->GetLastInstruction()->AsIf()->IfTrueSuccessor()->Dominates(pred);
+ const bool dominated_by_false =
+ dominator->GetLastInstruction()->AsIf()->IfFalseSuccessor()->Dominates(pred);
+ if (dominated_by_true == dominated_by_false) {
+ // In this case, we can't know if we are coming from the true branch, or the false branch. It
+ // happens in cases like:
+ // 1 (outer if)
+ // / \
+ // 2 3 (inner if)
+ // | / \
+ // | 4 5
+ // \/ |
+ // 6 |
+ // \ |
+ // 7 (has the same if(cond) as 1)
+ // |
+ // 8
+ // `7` (which would be `block` in this example), and `6` will come from both the true path and
+ // the false path of `1`. We bumped into something similar in SelectGenerator. See
+ // HSelectGenerator::TryFixupDoubleDiamondPattern.
+ // TODO(solanes): Figure out if we can fix up the graph into a double diamond in a generic way
+ // so that DeadCodeElimination and SelectGenerator can take advantage of it.
+
+ if (!same_input) {
+ // `1` and `7` having the opposite condition is a case we are missing. We could potentially
+ // add a BooleanNot instruction to be able to add the Phi, but it seems like overkill since
+ // this case is not that common.
+ return;
+ }
+
+ // The Phi will have `0`, `1`, and `cond` as inputs. If SimplifyIf redirects 0s and 1s, we
+ // will end up with Phi(cond,...,cond) which will be replaced by `cond`. Effectively, we will
+ // redirect edges that we are able to redirect and the rest will remain as before (i.e. we
+ // won't have an extra Phi).
+ new_phi->SetRawInputAt(index, input);
+ } else {
+ // Redirect to either the true branch (1), or the false branch (0).
+ // Given that `dominated_by_true` is the exact opposite of `dominated_by_false`,
+ // `(same_input && dominated_by_true) || (!same_input && dominated_by_false)` is equivalent to
+ // `same_input == dominated_by_true`.
+ new_phi->SetRawInputAt(
+ index,
+ same_input == dominated_by_true ? graph_->GetIntConstant(1) : graph_->GetIntConstant(0));
+ }
+ }
+
+ block->AddPhi(new_phi);
+ if_instruction->ReplaceInput(new_phi, 0);
+
+ // Remove the old input now, if possible. This allows the branch redirection in SimplifyIf to
+ // work without waiting for another pass of DCE.
+ if (input->IsDeadAndRemovable()) {
+ DCHECK(!same_input)
+ << " if both blocks have the same condition, it shouldn't be dead and removable since the "
+ << "dominator block's If instruction would be using that condition.";
+ input->GetBlock()->RemoveInstruction(input);
+ }
+ MaybeRecordStat(stats_, MethodCompilationStat::kSimplifyIfAddedPhi);
+}
+
void HDeadCodeElimination::ConnectSuccessiveBlocks() {
// Order does not matter. Skip the entry block by starting at index 1 in reverse post order.
for (size_t i = 1u, size = graph_->GetReversePostOrder().size(); i != size; ++i) {
@@ -466,7 +591,192 @@ void HDeadCodeElimination::ConnectSuccessiveBlocks() {
}
}
-bool HDeadCodeElimination::RemoveDeadBlocks() {
+struct HDeadCodeElimination::TryBelongingInformation {
+ explicit TryBelongingInformation(ScopedArenaAllocator* allocator)
+ : blocks_in_try(allocator->Adapter(kArenaAllocDCE)),
+ coalesced_try_entries(allocator->Adapter(kArenaAllocDCE)) {}
+
+ // Which blocks belong in the try.
+ ScopedArenaSet<HBasicBlock*> blocks_in_try;
+ // Which other try entries are referencing this same try.
+ ScopedArenaSet<HBasicBlock*> coalesced_try_entries;
+};
+
+bool HDeadCodeElimination::CanPerformTryRemoval(const TryBelongingInformation& try_belonging_info) {
+ for (HBasicBlock* block : try_belonging_info.blocks_in_try) {
+ for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+ if (it.Current()->CanThrow()) {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+void HDeadCodeElimination::DisconnectHandlersAndUpdateTryBoundary(
+ HBasicBlock* block,
+ /* out */ bool* any_block_in_loop) {
+ if (block->IsInLoop()) {
+ *any_block_in_loop = true;
+ }
+
+ // Disconnect the handlers.
+ while (block->GetSuccessors().size() > 1) {
+ HBasicBlock* handler = block->GetSuccessors()[1];
+ DCHECK(handler->IsCatchBlock());
+ block->RemoveSuccessor(handler);
+ handler->RemovePredecessor(block);
+ if (handler->IsInLoop()) {
+ *any_block_in_loop = true;
+ }
+ }
+
+ // Change TryBoundary to Goto.
+ DCHECK(block->EndsWithTryBoundary());
+ HInstruction* last = block->GetLastInstruction();
+ block->RemoveInstruction(last);
+ block->AddInstruction(new (graph_->GetAllocator()) HGoto(last->GetDexPc()));
+ DCHECK_EQ(block->GetSuccessors().size(), 1u);
+}
+
+void HDeadCodeElimination::RemoveTry(HBasicBlock* try_entry,
+ const TryBelongingInformation& try_belonging_info,
+ /* out */ bool* any_block_in_loop) {
+ // Update all try entries.
+ DCHECK(try_entry->EndsWithTryBoundary());
+ DCHECK(try_entry->GetLastInstruction()->AsTryBoundary()->IsEntry());
+ DisconnectHandlersAndUpdateTryBoundary(try_entry, any_block_in_loop);
+
+ for (HBasicBlock* other_try_entry : try_belonging_info.coalesced_try_entries) {
+ DCHECK(other_try_entry->EndsWithTryBoundary());
+ DCHECK(other_try_entry->GetLastInstruction()->AsTryBoundary()->IsEntry());
+ DisconnectHandlersAndUpdateTryBoundary(other_try_entry, any_block_in_loop);
+ }
+
+ // Update the blocks in the try.
+ for (HBasicBlock* block : try_belonging_info.blocks_in_try) {
+ // Update the try catch information since now the try doesn't exist.
+ block->SetTryCatchInformation(nullptr);
+ if (block->IsInLoop()) {
+ *any_block_in_loop = true;
+ }
+
+ if (block->EndsWithTryBoundary()) {
+ // Try exits.
+ DCHECK(!block->GetLastInstruction()->AsTryBoundary()->IsEntry());
+ DisconnectHandlersAndUpdateTryBoundary(block, any_block_in_loop);
+
+ if (block->GetSingleSuccessor()->IsExitBlock()) {
+ // `block` used to be a single exit TryBoundary that got turned into a Goto. It
+ // is now pointing to the exit which we don't allow. To fix it, we disconnect
+ // `block` from its predecessor and RemoveDeadBlocks will remove it from the
+ // graph.
+ DCHECK(block->IsSingleGoto());
+ HBasicBlock* predecessor = block->GetSinglePredecessor();
+ predecessor->ReplaceSuccessor(block, graph_->GetExitBlock());
+
+ if (!block->GetDominatedBlocks().empty()) {
+ // Update domination tree if `block` dominates a block to keep the graph consistent.
+ DCHECK_EQ(block->GetDominatedBlocks().size(), 1u);
+ DCHECK_EQ(graph_->GetExitBlock()->GetDominator(), block);
+ predecessor->AddDominatedBlock(graph_->GetExitBlock());
+ graph_->GetExitBlock()->SetDominator(predecessor);
+ block->RemoveDominatedBlock(graph_->GetExitBlock());
+ }
+ }
+ }
+ }
+}
+
+bool HDeadCodeElimination::RemoveUnneededTries() {
+ if (!graph_->HasTryCatch()) {
+ return false;
+ }
+
+ // Use local allocator for allocating memory.
+ ScopedArenaAllocator allocator(graph_->GetArenaStack());
+
+ // Collect which blocks are part of which try.
+ std::unordered_map<HBasicBlock*, TryBelongingInformation> tries;
+ for (HBasicBlock* block : graph_->GetReversePostOrderSkipEntryBlock()) {
+ if (block->IsTryBlock()) {
+ HBasicBlock* key = block->GetTryCatchInformation()->GetTryEntry().GetBlock();
+ auto it = tries.find(key);
+ if (it == tries.end()) {
+ it = tries.insert({key, TryBelongingInformation(&allocator)}).first;
+ }
+ it->second.blocks_in_try.insert(block);
+ }
+ }
+
+ // Deduplicate the tries which have different try entries but they are really the same try.
+ for (auto it = tries.begin(); it != tries.end(); it++) {
+ DCHECK(it->first->EndsWithTryBoundary());
+ HTryBoundary* try_boundary = it->first->GetLastInstruction()->AsTryBoundary();
+ for (auto other_it = next(it); other_it != tries.end(); /*other_it++ in the loop*/) {
+ DCHECK(other_it->first->EndsWithTryBoundary());
+ HTryBoundary* other_try_boundary = other_it->first->GetLastInstruction()->AsTryBoundary();
+ if (try_boundary->HasSameExceptionHandlersAs(*other_try_boundary)) {
+ // Merge the entries as they are really the same one.
+ // Block merging.
+ it->second.blocks_in_try.insert(other_it->second.blocks_in_try.begin(),
+ other_it->second.blocks_in_try.end());
+
+ // Add the coalesced try entry to update it too.
+ it->second.coalesced_try_entries.insert(other_it->first);
+
+ // Erase the other entry.
+ other_it = tries.erase(other_it);
+ } else {
+ other_it++;
+ }
+ }
+ }
+
+ size_t removed_tries = 0;
+ bool any_block_in_loop = false;
+
+ // Check which tries contain throwing instructions.
+ for (const auto& entry : tries) {
+ if (CanPerformTryRemoval(entry.second)) {
+ ++removed_tries;
+ RemoveTry(entry.first, entry.second, &any_block_in_loop);
+ }
+ }
+
+ if (removed_tries != 0) {
+ // We want to:
+ // 1) Update the dominance information
+ // 2) Remove catch block subtrees, if they are now unreachable.
+ // If we run the dominance recomputation without removing the code, those catch blocks will
+ // not be part of the post order and won't be removed. If we don't run the dominance
+ // recomputation, we risk RemoveDeadBlocks not running it and leaving the graph in an
+ // inconsistent state. So, what we can do is run RemoveDeadBlocks and force a recomputation.
+ // Note that we are not guaranteed to remove a catch block if we have nested try blocks:
+ //
+ // try {
+ // ... nothing can throw. TryBoundary A ...
+ // try {
+ // ... can throw. TryBoundary B...
+ // } catch (Error e) {}
+ // } catch (Exception e) {}
+ //
+ // In the example above, we can remove the TryBoundary A but the Exception catch cannot be
+ // removed as the TryBoundary B might still throw into that catch. TryBoundary A and B don't get
+ // coalesced since they have different catch handlers.
+
+ RemoveDeadBlocks(/* force_recomputation= */ true, any_block_in_loop);
+ MaybeRecordStat(stats_, MethodCompilationStat::kRemovedTry, removed_tries);
+ return true;
+ } else {
+ return false;
+ }
+}
+
+bool HDeadCodeElimination::RemoveDeadBlocks(bool force_recomputation,
+ bool force_loop_recomputation) {
+ DCHECK_IMPLIES(force_loop_recomputation, force_recomputation);
+
// Use local allocator for allocating memory.
ScopedArenaAllocator allocator(graph_->GetArenaStack());
@@ -495,8 +805,8 @@ bool HDeadCodeElimination::RemoveDeadBlocks() {
// If we removed at least one block, we need to recompute the full
// dominator tree and try block membership.
- if (removed_one_or_more_blocks) {
- if (rerun_dominance_and_loop_analysis) {
+ if (removed_one_or_more_blocks || force_recomputation) {
+ if (rerun_dominance_and_loop_analysis || force_loop_recomputation) {
graph_->ClearLoopInformation();
graph_->ClearDominanceInformation();
graph_->BuildDominatorTree();
@@ -530,6 +840,33 @@ void HDeadCodeElimination::RemoveDeadInstructions() {
}
}
+void HDeadCodeElimination::UpdateGraphFlags() {
+ bool has_monitor_operations = false;
+ bool has_simd = false;
+ bool has_bounds_checks = false;
+ bool has_always_throwing_invokes = false;
+
+ for (HBasicBlock* block : graph_->GetReversePostOrder()) {
+ for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+ HInstruction* instruction = it.Current();
+ if (instruction->IsMonitorOperation()) {
+ has_monitor_operations = true;
+ } else if (instruction->IsVecOperation()) {
+ has_simd = true;
+ } else if (instruction->IsBoundsCheck()) {
+ has_bounds_checks = true;
+ } else if (instruction->IsInvoke() && instruction->AsInvoke()->AlwaysThrows()) {
+ has_always_throwing_invokes = true;
+ }
+ }
+ }
+
+ graph_->SetHasMonitorOperations(has_monitor_operations);
+ graph_->SetHasSIMD(has_simd);
+ graph_->SetHasBoundsChecks(has_bounds_checks);
+ graph_->SetHasAlwaysThrowingInvokes(has_always_throwing_invokes);
+}
+
bool HDeadCodeElimination::Run() {
// Do not eliminate dead blocks if the graph has irreducible loops. We could
// support it, but that would require changes in our loop representation to handle
@@ -541,6 +878,11 @@ bool HDeadCodeElimination::Run() {
did_any_simplification |= SimplifyAlwaysThrows();
did_any_simplification |= SimplifyIfs();
did_any_simplification |= RemoveDeadBlocks();
+ // We call RemoveDeadBlocks before RemoveUnneededTries to remove the dead blocks from the
+ // previous optimizations. Otherwise, we might detect that a try has throwing instructions but
+ // they are actually dead code. RemoveUnneededTryBoundary will call RemoveDeadBlocks again if
+ // needed.
+ did_any_simplification |= RemoveUnneededTries();
if (did_any_simplification) {
// Connect successive blocks created by dead branches.
ConnectSuccessiveBlocks();
@@ -548,6 +890,7 @@ bool HDeadCodeElimination::Run() {
}
SsaRedundantPhiElimination(graph_).Run();
RemoveDeadInstructions();
+ UpdateGraphFlags();
return true;
}
diff --git a/compiler/optimizing/dead_code_elimination.h b/compiler/optimizing/dead_code_elimination.h
index 799721acf2..ddd01f7103 100644
--- a/compiler/optimizing/dead_code_elimination.h
+++ b/compiler/optimizing/dead_code_elimination.h
@@ -17,11 +17,12 @@
#ifndef ART_COMPILER_OPTIMIZING_DEAD_CODE_ELIMINATION_H_
#define ART_COMPILER_OPTIMIZING_DEAD_CODE_ELIMINATION_H_
+#include "base/macros.h"
#include "nodes.h"
#include "optimization.h"
#include "optimizing_compiler_stats.h"
-namespace art {
+namespace art HIDDEN {
/**
* Optimization pass performing dead code elimination (removal of
@@ -39,11 +40,87 @@ class HDeadCodeElimination : public HOptimization {
private:
void MaybeRecordDeadBlock(HBasicBlock* block);
void MaybeRecordSimplifyIf();
- bool RemoveDeadBlocks();
+ // If `force_recomputation` is true, we will recompute the dominance information even when we
+ // didn't delete any blocks. `force_loop_recomputation` is similar but it also forces the loop
+ // information recomputation.
+ bool RemoveDeadBlocks(bool force_recomputation = false, bool force_loop_recomputation = false);
void RemoveDeadInstructions();
bool SimplifyAlwaysThrows();
+ // Simplify the pattern:
+ //
+ // B1 B2 ...
+ // goto goto goto
+ // \ | /
+ // \ | /
+ // B3
+ // i1 = phi(input, input)
+ // (i2 = condition on i1)
+ // if i1 (or i2)
+ // / \
+ // / \
+ // B4 B5
+ //
+ // Into:
+ //
+ // B1 B2 ...
+ // | | |
+ // B4 B5 B?
+ //
+ // Note that individual edges can be redirected (for example B2->B3
+ // can be redirected as B2->B5) without applying this optimization
+ // to other incoming edges.
+ //
+ // Note that we rely on the dead code elimination to get rid of B3.
bool SimplifyIfs();
void ConnectSuccessiveBlocks();
+ // Updates the graph flags related to instructions (e.g. HasSIMD()) since we may have eliminated
+ // the relevant instructions. There's no need to update `SetHasTryCatch` since we do that in
+ // `ComputeTryBlockInformation`. Similarly with `HasLoops` and `HasIrreducibleLoops`: They are
+ // cleared in `ClearLoopInformation` and then set as true as part of `HLoopInformation::Populate`,
+ // if needed.
+ void UpdateGraphFlags();
+
+ // Helper struct to eliminate tries.
+ struct TryBelongingInformation;
+ // Disconnects `block`'s handlers and update its `TryBoundary` instruction to a `Goto`.
+ // Sets `any_block_in_loop` to true if any block is currently a loop to later update the loop
+ // information if needed.
+ void DisconnectHandlersAndUpdateTryBoundary(HBasicBlock* block,
+ /* out */ bool* any_block_in_loop);
+ // Returns true iff the try doesn't contain throwing instructions.
+ bool CanPerformTryRemoval(const TryBelongingInformation& try_belonging_info);
+ // Removes the try by disconnecting all try entries and exits from their handlers. Also updates
+ // the graph in the case that a `TryBoundary` instruction of kind `exit` has the Exit block as
+ // its successor.
+ void RemoveTry(HBasicBlock* try_entry,
+ const TryBelongingInformation& try_belonging_info,
+ bool* any_block_in_loop);
+ // Checks which tries (if any) are currently in the graph, coalesces the different try entries
+ // that are referencing the same try, and removes the tries which don't contain any throwing
+ // instructions.
+ bool RemoveUnneededTries();
+
+ // Adds a phi in `block`, if `block` and its dominator have the same (or opposite) condition.
+ // For example it turns:
+ // if(cond)
+ // / \
+ // B1 B2
+ // \ /
+ // if(cond)
+ // / \
+ // B3 B4
+ //
+ // into:
+ // if(cond)
+ // / \
+ // B1 B2
+ // \ /
+ // if(Phi(1, 0))
+ // / \
+ // B3 B4
+ //
+ // Following this, SimplifyIfs is able to connect B1->B3 and B2->B4 effectively skipping an if.
+ void MaybeAddPhi(HBasicBlock* block);
DISALLOW_COPY_AND_ASSIGN(HDeadCodeElimination);
};
diff --git a/compiler/optimizing/dead_code_elimination_test.cc b/compiler/optimizing/dead_code_elimination_test.cc
index f5cd4dc27a..b789434add 100644
--- a/compiler/optimizing/dead_code_elimination_test.cc
+++ b/compiler/optimizing/dead_code_elimination_test.cc
@@ -16,6 +16,7 @@
#include "dead_code_elimination.h"
+#include "base/macros.h"
#include "driver/compiler_options.h"
#include "graph_checker.h"
#include "optimizing_unit_test.h"
@@ -23,9 +24,9 @@
#include "gtest/gtest.h"
-namespace art {
+namespace art HIDDEN {
-class DeadCodeEliminationTest : public OptimizingUnitTest {
+class DeadCodeEliminationTest : public CommonCompilerTest, public OptimizingUnitTestHelper {
protected:
void TestCode(const std::vector<uint16_t>& data,
const std::string& expected_before,
diff --git a/compiler/optimizing/dominator_test.cc b/compiler/optimizing/dominator_test.cc
index 1d72ba116e..5f366ebcd9 100644
--- a/compiler/optimizing/dominator_test.cc
+++ b/compiler/optimizing/dominator_test.cc
@@ -15,6 +15,7 @@
*/
#include "base/arena_allocator.h"
+#include "base/macros.h"
#include "builder.h"
#include "dex/dex_instruction.h"
#include "nodes.h"
@@ -22,9 +23,9 @@
#include "gtest/gtest.h"
-namespace art {
+namespace art HIDDEN {
-class OptimizerTest : public OptimizingUnitTest {
+class OptimizerTest : public CommonCompilerTest, public OptimizingUnitTestHelper {
protected:
void TestCode(const std::vector<uint16_t>& data, const uint32_t* blocks, size_t blocks_length);
};
diff --git a/compiler/optimizing/escape.cc b/compiler/optimizing/escape.cc
index 617833c697..cebe94fd0d 100644
--- a/compiler/optimizing/escape.cc
+++ b/compiler/optimizing/escape.cc
@@ -18,7 +18,7 @@
#include "nodes.h"
-namespace art {
+namespace art HIDDEN {
void VisitEscapes(HInstruction* reference, EscapeVisitor& escape_visitor) {
// References not allocated in the method are intrinsically escaped.
diff --git a/compiler/optimizing/escape.h b/compiler/optimizing/escape.h
index 5402cb1763..3b284fbf43 100644
--- a/compiler/optimizing/escape.h
+++ b/compiler/optimizing/escape.h
@@ -17,7 +17,9 @@
#ifndef ART_COMPILER_OPTIMIZING_ESCAPE_H_
#define ART_COMPILER_OPTIMIZING_ESCAPE_H_
-namespace art {
+#include "base/macros.h"
+
+namespace art HIDDEN {
class HInstruction;
diff --git a/compiler/optimizing/execution_subgraph.cc b/compiler/optimizing/execution_subgraph.cc
index 66fdfcda5b..06aabbe040 100644
--- a/compiler/optimizing/execution_subgraph.cc
+++ b/compiler/optimizing/execution_subgraph.cc
@@ -26,7 +26,7 @@
#include "base/scoped_arena_allocator.h"
#include "nodes.h"
-namespace art {
+namespace art HIDDEN {
ExecutionSubgraph::ExecutionSubgraph(HGraph* graph, ScopedArenaAllocator* allocator)
: graph_(graph),
diff --git a/compiler/optimizing/execution_subgraph.h b/compiler/optimizing/execution_subgraph.h
index 7d2a66077d..5ddf17de60 100644
--- a/compiler/optimizing/execution_subgraph.h
+++ b/compiler/optimizing/execution_subgraph.h
@@ -27,6 +27,7 @@
#include "base/bit_vector-inl.h"
#include "base/globals.h"
#include "base/iteration_range.h"
+#include "base/macros.h"
#include "base/mutex.h"
#include "base/scoped_arena_allocator.h"
#include "base/scoped_arena_containers.h"
@@ -34,7 +35,7 @@
#include "base/transform_iterator.h"
#include "nodes.h"
-namespace art {
+namespace art HIDDEN {
// Helper for transforming blocks to block_ids.
class BlockToBlockIdTransformer {
diff --git a/compiler/optimizing/execution_subgraph_test.cc b/compiler/optimizing/execution_subgraph_test.cc
index 74c243b5b4..921ef056ba 100644
--- a/compiler/optimizing/execution_subgraph_test.cc
+++ b/compiler/optimizing/execution_subgraph_test.cc
@@ -37,7 +37,7 @@
#include "optimizing_unit_test.h"
#include "scoped_thread_state_change.h"
-namespace art {
+namespace art HIDDEN {
using BlockSet = std::unordered_set<const HBasicBlock*>;
diff --git a/compiler/optimizing/execution_subgraph_test.h b/compiler/optimizing/execution_subgraph_test.h
index 13cb2bc7c5..cee105a045 100644
--- a/compiler/optimizing/execution_subgraph_test.h
+++ b/compiler/optimizing/execution_subgraph_test.h
@@ -19,7 +19,9 @@
#include "android-base/macros.h"
-namespace art {
+#include "base/macros.h"
+
+namespace art HIDDEN {
class HGraph;
class ExecutionSubgraph;
diff --git a/compiler/optimizing/find_loops_test.cc b/compiler/optimizing/find_loops_test.cc
index 75b8e9609e..8857b2a775 100644
--- a/compiler/optimizing/find_loops_test.cc
+++ b/compiler/optimizing/find_loops_test.cc
@@ -15,6 +15,7 @@
*/
#include "base/arena_allocator.h"
+#include "base/macros.h"
#include "builder.h"
#include "dex/dex_file.h"
#include "dex/dex_instruction.h"
@@ -25,9 +26,9 @@
#include "gtest/gtest.h"
-namespace art {
+namespace art HIDDEN {
-class FindLoopsTest : public OptimizingUnitTest {};
+class FindLoopsTest : public CommonCompilerTest, public OptimizingUnitTestHelper {};
TEST_F(FindLoopsTest, CFG1) {
// Constant is not used.
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index d1769cea0d..190b362145 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -32,7 +32,7 @@
#include "scoped_thread_state_change-inl.h"
#include "subtype_check.h"
-namespace art {
+namespace art HIDDEN {
using android::base::StringPrintf;
@@ -80,9 +80,91 @@ size_t GraphChecker::Run(bool pass_change, size_t last_size) {
// as the latter might visit dead blocks removed by the dominator
// computation.
VisitReversePostOrder();
+ CheckGraphFlags();
return current_size;
}
+void GraphChecker::VisitReversePostOrder() {
+ for (HBasicBlock* block : GetGraph()->GetReversePostOrder()) {
+ if (block->IsInLoop()) {
+ flag_info_.seen_loop = true;
+ if (block->GetLoopInformation()->IsIrreducible()) {
+ flag_info_.seen_irreducible_loop = true;
+ }
+ }
+
+ VisitBasicBlock(block);
+ }
+}
+
+static const char* StrBool(bool val) {
+ return val ? "true" : "false";
+}
+
+void GraphChecker::CheckGraphFlags() {
+ if (GetGraph()->HasMonitorOperations() != flag_info_.seen_monitor_operation) {
+ AddError(
+ StringPrintf("Flag mismatch: HasMonitorOperations() (%s) should be equal to "
+ "flag_info_.seen_monitor_operation (%s)",
+ StrBool(GetGraph()->HasMonitorOperations()),
+ StrBool(flag_info_.seen_monitor_operation)));
+ }
+
+ if (GetGraph()->HasTryCatch() != flag_info_.seen_try_boundary) {
+ AddError(
+ StringPrintf("Flag mismatch: HasTryCatch() (%s) should be equal to "
+ "flag_info_.seen_try_boundary (%s)",
+ StrBool(GetGraph()->HasTryCatch()),
+ StrBool(flag_info_.seen_try_boundary)));
+ }
+
+ if (GetGraph()->HasLoops() != flag_info_.seen_loop) {
+ AddError(
+ StringPrintf("Flag mismatch: HasLoops() (%s) should be equal to "
+ "flag_info_.seen_loop (%s)",
+ StrBool(GetGraph()->HasLoops()),
+ StrBool(flag_info_.seen_loop)));
+ }
+
+ if (GetGraph()->HasIrreducibleLoops() && !GetGraph()->HasLoops()) {
+ AddError(StringPrintf("Flag mismatch: HasIrreducibleLoops() (%s) implies HasLoops() (%s)",
+ StrBool(GetGraph()->HasIrreducibleLoops()),
+ StrBool(GetGraph()->HasLoops())));
+ }
+
+ if (GetGraph()->HasIrreducibleLoops() != flag_info_.seen_irreducible_loop) {
+ AddError(
+ StringPrintf("Flag mismatch: HasIrreducibleLoops() (%s) should be equal to "
+ "flag_info_.seen_irreducible_loop (%s)",
+ StrBool(GetGraph()->HasIrreducibleLoops()),
+ StrBool(flag_info_.seen_irreducible_loop)));
+ }
+
+ if (GetGraph()->HasSIMD() != flag_info_.seen_SIMD) {
+ AddError(
+ StringPrintf("Flag mismatch: HasSIMD() (%s) should be equal to "
+ "flag_info_.seen_SIMD (%s)",
+ StrBool(GetGraph()->HasSIMD()),
+ StrBool(flag_info_.seen_SIMD)));
+ }
+
+ if (GetGraph()->HasBoundsChecks() != flag_info_.seen_bounds_checks) {
+ AddError(
+ StringPrintf("Flag mismatch: HasBoundsChecks() (%s) should be equal to "
+ "flag_info_.seen_bounds_checks (%s)",
+ StrBool(GetGraph()->HasBoundsChecks()),
+ StrBool(flag_info_.seen_bounds_checks)));
+ }
+
+ if (GetGraph()->HasAlwaysThrowingInvokes() != flag_info_.seen_always_throwing_invokes) {
+ AddError(
+ StringPrintf("Flag mismatch: HasAlwaysThrowingInvokes() (%s) should be equal to "
+ "flag_info_.seen_always_throwing_invokes (%s)",
+ StrBool(GetGraph()->HasAlwaysThrowingInvokes()),
+ StrBool(flag_info_.seen_always_throwing_invokes)));
+ }
+}
+
void GraphChecker::VisitBasicBlock(HBasicBlock* block) {
current_block_ = block;
@@ -159,6 +241,24 @@ void GraphChecker::VisitBasicBlock(HBasicBlock* block) {
}
}
+ // Make sure the first instruction of a catch block is always a Nop that emits an environment.
+ if (block->IsCatchBlock()) {
+ if (!block->GetFirstInstruction()->IsNop()) {
+ AddError(StringPrintf("Block %d doesn't have a Nop as its first instruction.",
+ current_block_->GetBlockId()));
+ } else {
+ HNop* nop = block->GetFirstInstruction()->AsNop();
+ if (!nop->NeedsEnvironment()) {
+ AddError(
+ StringPrintf("%s:%d is a Nop and the first instruction of block %d, but it doesn't "
+ "need an environment.",
+ nop->DebugName(),
+ nop->GetId(),
+ current_block_->GetBlockId()));
+ }
+ }
+ }
+
// Visit this block's list of phis.
for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
HInstruction* current = it.Current();
@@ -219,6 +319,12 @@ void GraphChecker::VisitBasicBlock(HBasicBlock* block) {
}
}
+ // Ensure all blocks have at least one successor, except the Exit block.
+ if (block->GetSuccessors().empty() && !block->IsExitBlock()) {
+ AddError(StringPrintf("Block %d has no successor and it is not the Exit block.",
+ block->GetBlockId()));
+ }
+
// Ensure there is no critical edge (i.e., an edge connecting a
// block with multiple successors to a block with multiple
// predecessors). Exceptional edges are synthesized and hence
@@ -291,27 +397,30 @@ void GraphChecker::VisitBasicBlock(HBasicBlock* block) {
}
void GraphChecker::VisitBoundsCheck(HBoundsCheck* check) {
+ VisitInstruction(check);
+
if (!GetGraph()->HasBoundsChecks()) {
- AddError(StringPrintf("Instruction %s:%d is a HBoundsCheck, "
- "but HasBoundsChecks() returns false",
- check->DebugName(),
- check->GetId()));
+ AddError(
+ StringPrintf("The graph doesn't have the HasBoundsChecks flag set but we saw "
+ "%s:%d in block %d.",
+ check->DebugName(),
+ check->GetId(),
+ check->GetBlock()->GetBlockId()));
}
- // Perform the instruction base checks too.
- VisitInstruction(check);
+ flag_info_.seen_bounds_checks = true;
}
void GraphChecker::VisitDeoptimize(HDeoptimize* deopt) {
+ VisitInstruction(deopt);
if (GetGraph()->IsCompilingOsr()) {
AddError(StringPrintf("A graph compiled OSR cannot have a HDeoptimize instruction"));
}
-
- // Perform the instruction base checks too.
- VisitInstruction(deopt);
}
void GraphChecker::VisitTryBoundary(HTryBoundary* try_boundary) {
+ VisitInstruction(try_boundary);
+
ArrayRef<HBasicBlock* const> handlers = try_boundary->GetExceptionHandlers();
// Ensure that all exception handlers are catch blocks.
@@ -338,24 +447,65 @@ void GraphChecker::VisitTryBoundary(HTryBoundary* try_boundary) {
}
}
- VisitInstruction(try_boundary);
+ if (!GetGraph()->HasTryCatch()) {
+ AddError(
+ StringPrintf("The graph doesn't have the HasTryCatch flag set but we saw "
+ "%s:%d in block %d.",
+ try_boundary->DebugName(),
+ try_boundary->GetId(),
+ try_boundary->GetBlock()->GetBlockId()));
+ }
+
+ flag_info_.seen_try_boundary = true;
+}
+
+void GraphChecker::VisitLoadClass(HLoadClass* load) {
+ VisitInstruction(load);
+
+ if (load->GetLoadedClassRTI().IsValid() && !load->GetLoadedClassRTI().IsExact()) {
+ std::stringstream ssRTI;
+ ssRTI << load->GetLoadedClassRTI();
+ AddError(StringPrintf("%s:%d in block %d with RTI %s has valid but inexact RTI.",
+ load->DebugName(),
+ load->GetId(),
+ load->GetBlock()->GetBlockId(),
+ ssRTI.str().c_str()));
+ }
}
void GraphChecker::VisitLoadException(HLoadException* load) {
- // Ensure that LoadException is the first instruction in a catch block.
+ VisitInstruction(load);
+
+ // Ensure that LoadException is the second instruction in a catch block. The first one should be a
+ // Nop (checked separately).
if (!load->GetBlock()->IsCatchBlock()) {
AddError(StringPrintf("%s:%d is in a non-catch block %d.",
load->DebugName(),
load->GetId(),
load->GetBlock()->GetBlockId()));
- } else if (load->GetBlock()->GetFirstInstruction() != load) {
- AddError(StringPrintf("%s:%d is not the first instruction in catch block %d.",
+ } else if (load->GetBlock()->GetFirstInstruction()->GetNext() != load) {
+ AddError(StringPrintf("%s:%d is not the second instruction in catch block %d.",
load->DebugName(),
load->GetId(),
load->GetBlock()->GetBlockId()));
}
}
+void GraphChecker::VisitMonitorOperation(HMonitorOperation* monitor_op) {
+ VisitInstruction(monitor_op);
+
+ if (!GetGraph()->HasMonitorOperations()) {
+ AddError(
+ StringPrintf("The graph doesn't have the HasMonitorOperations flag set but we saw "
+ "%s:%d in block %d.",
+ monitor_op->DebugName(),
+ monitor_op->GetId(),
+ monitor_op->GetBlock()->GetBlockId()));
+ }
+
+ flag_info_.seen_monitor_operation = true;
+}
+
void GraphChecker::VisitInstruction(HInstruction* instruction) {
if (seen_ids_.IsBitSet(instruction->GetId())) {
AddError(StringPrintf("Instruction id %d is duplicate in graph.",
@@ -497,33 +647,16 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) {
}
}
- // Ensure that reference type instructions have reference type info.
- if (check_reference_type_info_ && instruction->GetType() == DataType::Type::kReference) {
- if (!instruction->GetReferenceTypeInfo().IsValid()) {
- AddError(StringPrintf("Reference type instruction %s:%d does not have "
- "valid reference type information.",
- instruction->DebugName(),
- instruction->GetId()));
- }
- }
-
if (instruction->CanThrow() && !instruction->HasEnvironment()) {
AddError(StringPrintf("Throwing instruction %s:%d in block %d does not have an environment.",
instruction->DebugName(),
instruction->GetId(),
current_block_->GetBlockId()));
} else if (instruction->CanThrowIntoCatchBlock()) {
- // Find the top-level environment. This corresponds to the environment of
- // the catch block since we do not inline methods with try/catch.
- HEnvironment* environment = instruction->GetEnvironment();
- while (environment->GetParent() != nullptr) {
- environment = environment->GetParent();
- }
-
- // Find all catch blocks and test that `instruction` has an environment
- // value for each one.
+ // Find all catch blocks and test that `instruction` has an environment value for each one.
const HTryBoundary& entry = instruction->GetBlock()->GetTryCatchInformation()->GetTryEntry();
for (HBasicBlock* catch_block : entry.GetExceptionHandlers()) {
+ const HEnvironment* environment = catch_block->GetFirstInstruction()->GetEnvironment();
for (HInstructionIterator phi_it(catch_block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
HPhi* catch_phi = phi_it.Current()->AsPhi();
if (environment->GetInstructionAt(catch_phi->GetRegNumber()) == nullptr) {
@@ -541,9 +674,26 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) {
}
}
-void GraphChecker::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+void GraphChecker::VisitInvoke(HInvoke* invoke) {
VisitInstruction(invoke);
+ if (invoke->AlwaysThrows()) {
+ if (!GetGraph()->HasAlwaysThrowingInvokes()) {
+ AddError(
+ StringPrintf("The graph doesn't have the HasAlwaysThrowingInvokes flag set but we saw "
+ "%s:%d in block %d and it always throws.",
+ invoke->DebugName(),
+ invoke->GetId(),
+ invoke->GetBlock()->GetBlockId()));
+ }
+ flag_info_.seen_always_throwing_invokes = true;
+ }
+}
+
+void GraphChecker::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+ // We call VisitInvoke and not VisitInstruction to de-duplicate the always throwing code check.
+ VisitInvoke(invoke);
+
if (invoke->IsStaticWithExplicitClinitCheck()) {
const HInstruction* last_input = invoke->GetInputs().back();
if (last_input == nullptr) {
@@ -612,6 +762,17 @@ void GraphChecker::CheckTypeCheckBitstringInput(HTypeCheckInstruction* check,
void GraphChecker::HandleTypeCheckInstruction(HTypeCheckInstruction* check) {
VisitInstruction(check);
+
+ if (check->GetTargetClassRTI().IsValid() && !check->GetTargetClassRTI().IsExact()) {
+ std::stringstream ssRTI;
+ ssRTI << check->GetTargetClassRTI();
+ AddError(StringPrintf("%s:%d in block %d with RTI %s has valid but inexact RTI.",
+ check->DebugName(),
+ check->GetId(),
+ check->GetBlock()->GetBlockId(),
+ ssRTI.str().c_str()));
+ }
+
HInstruction* input = check->InputAt(1);
if (check->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) {
if (!input->IsNullConstant()) {
@@ -674,13 +835,14 @@ void GraphChecker::HandleLoop(HBasicBlock* loop_header) {
loop_information->GetPreHeader()->GetSuccessors().size()));
}
- if (loop_information->GetSuspendCheck() == nullptr) {
- AddError(StringPrintf(
- "Loop with header %d does not have a suspend check.",
- loop_header->GetBlockId()));
+ if (!GetGraph()->SuspendChecksAreAllowedToNoOp() &&
+ loop_information->GetSuspendCheck() == nullptr) {
+ AddError(StringPrintf("Loop with header %d does not have a suspend check.",
+ loop_header->GetBlockId()));
}
- if (loop_information->GetSuspendCheck() != loop_header->GetFirstInstructionDisregardMoves()) {
+ if (!GetGraph()->SuspendChecksAreAllowedToNoOp() &&
+ loop_information->GetSuspendCheck() != loop_header->GetFirstInstructionDisregardMoves()) {
AddError(StringPrintf(
"Loop header %d does not have the loop suspend check as the first instruction.",
loop_header->GetBlockId()));
@@ -1051,6 +1213,21 @@ void GraphChecker::VisitNeg(HNeg* instruction) {
}
}
+void GraphChecker::VisitArraySet(HArraySet* instruction) {
+ VisitInstruction(instruction);
+
+ if (instruction->NeedsTypeCheck() !=
+ instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC())) {
+ AddError(
+ StringPrintf("%s %d has a flag mismatch. An ArraySet instruction can trigger a GC iff it "
+ "needs a type check. Needs type check: %s, Can trigger GC: %s",
+ instruction->DebugName(),
+ instruction->GetId(),
+ StrBool(instruction->NeedsTypeCheck()),
+ StrBool(instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC()))));
+ }
+}
+
void GraphChecker::VisitBinaryOperation(HBinaryOperation* op) {
VisitInstruction(op);
DataType::Type lhs_type = op->InputAt(0)->GetType();
@@ -1111,6 +1288,8 @@ void GraphChecker::VisitBinaryOperation(HBinaryOperation* op) {
}
void GraphChecker::VisitConstant(HConstant* instruction) {
+ VisitInstruction(instruction);
+
HBasicBlock* block = instruction->GetBlock();
if (!block->IsEntryBlock()) {
AddError(StringPrintf(
@@ -1149,6 +1328,18 @@ void GraphChecker::VisitTypeConversion(HTypeConversion* instruction) {
void GraphChecker::VisitVecOperation(HVecOperation* instruction) {
VisitInstruction(instruction);
+
+ if (!GetGraph()->HasSIMD()) {
+ AddError(
+ StringPrintf("The graph doesn't have the HasSIMD flag set but we saw "
+ "%s:%d in block %d.",
+ instruction->DebugName(),
+ instruction->GetId(),
+ instruction->GetBlock()->GetBlockId()));
+ }
+
+ flag_info_.seen_SIMD = true;
+
if (codegen_ == nullptr) {
return;
}
diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h
index 04c8d2103c..d6644f3b50 100644
--- a/compiler/optimizing/graph_checker.h
+++ b/compiler/optimizing/graph_checker.h
@@ -21,10 +21,11 @@
#include "base/arena_bit_vector.h"
#include "base/bit_vector-inl.h"
+#include "base/macros.h"
#include "base/scoped_arena_allocator.h"
#include "nodes.h"
-namespace art {
+namespace art HIDDEN {
class CodeGenerator;
@@ -54,6 +55,7 @@ class GraphChecker : public HGraphDelegateVisitor {
void VisitInstruction(HInstruction* instruction) override;
void VisitPhi(HPhi* phi) override;
+ void VisitArraySet(HArraySet* instruction) override;
void VisitBinaryOperation(HBinaryOperation* op) override;
void VisitBooleanNot(HBooleanNot* instruction) override;
void VisitBoundType(HBoundType* instruction) override;
@@ -64,8 +66,11 @@ class GraphChecker : public HGraphDelegateVisitor {
void VisitDeoptimize(HDeoptimize* instruction) override;
void VisitIf(HIf* instruction) override;
void VisitInstanceOf(HInstanceOf* check) override;
+ void VisitInvoke(HInvoke* invoke) override;
void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) override;
+ void VisitLoadClass(HLoadClass* load) override;
void VisitLoadException(HLoadException* load) override;
+ void VisitMonitorOperation(HMonitorOperation* monitor_operation) override;
void VisitNeg(HNeg* instruction) override;
void VisitPackedSwitch(HPackedSwitch* instruction) override;
void VisitReturn(HReturn* ret) override;
@@ -102,15 +107,6 @@ class GraphChecker : public HGraphDelegateVisitor {
}
}
- // Enable/Disable the reference type info check.
- //
- // Return: the previous status of the check.
- bool SetRefTypeInfoCheckEnabled(bool value = true) {
- bool old_value = check_reference_type_info_;
- check_reference_type_info_ = value;
- return old_value;
- }
-
protected:
// Report a new error.
void AddError(const std::string& error) {
@@ -123,18 +119,30 @@ class GraphChecker : public HGraphDelegateVisitor {
ArenaVector<std::string> errors_;
private:
+ void VisitReversePostOrder();
+
+ // Checks that the graph's flags are set correctly.
+ void CheckGraphFlags();
+
// String displayed before dumped errors.
const char* const dump_prefix_;
ScopedArenaAllocator allocator_;
ArenaBitVector seen_ids_;
- // Whether to perform the reference type info check for instructions which use or produce
- // object references, e.g. HNewInstance, HLoadClass.
- // The default value is true.
- bool check_reference_type_info_ = true;
// Used to access target information.
CodeGenerator* codegen_;
+ struct FlagInfo {
+ bool seen_try_boundary = false;
+ bool seen_monitor_operation = false;
+ bool seen_loop = false;
+ bool seen_irreducible_loop = false;
+ bool seen_SIMD = false;
+ bool seen_bounds_checks = false;
+ bool seen_always_throwing_invokes = false;
+ };
+ FlagInfo flag_info_;
+
DISALLOW_COPY_AND_ASSIGN(GraphChecker);
};
diff --git a/compiler/optimizing/graph_checker_test.cc b/compiler/optimizing/graph_checker_test.cc
index 08bfa5d80f..b256fbb46d 100644
--- a/compiler/optimizing/graph_checker_test.cc
+++ b/compiler/optimizing/graph_checker_test.cc
@@ -14,12 +14,13 @@
* limitations under the License.
*/
+#include "base/macros.h"
#include "graph_checker.h"
#include "optimizing_unit_test.h"
-namespace art {
+namespace art HIDDEN {
-class GraphCheckerTest : public OptimizingUnitTest {
+class GraphCheckerTest : public CommonCompilerTest, public OptimizingUnitTestHelper {
protected:
HGraph* CreateSimpleCFG();
void TestCode(const std::vector<uint16_t>& data);
diff --git a/compiler/optimizing/graph_test.cc b/compiler/optimizing/graph_test.cc
index 29af808731..b5d712736f 100644
--- a/compiler/optimizing/graph_test.cc
+++ b/compiler/optimizing/graph_test.cc
@@ -15,6 +15,7 @@
*/
#include "base/arena_allocator.h"
+#include "base/macros.h"
#include "builder.h"
#include "nodes.h"
#include "optimizing_unit_test.h"
@@ -22,7 +23,7 @@
#include "gtest/gtest.h"
-namespace art {
+namespace art HIDDEN {
class GraphTest : public OptimizingUnitTest {
protected:
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 4a6ee13005..73bdd1e223 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -43,7 +43,7 @@
#include "ssa_liveness_analysis.h"
#include "utils/assembler.h"
-namespace art {
+namespace art HIDDEN {
// Unique pass-name to identify that the dump is for printing to log.
constexpr const char* kDebugDumpName = "debug";
@@ -480,12 +480,20 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
<< array_set->GetValueCanBeNull() << std::noboolalpha;
StartAttributeStream("needs_type_check") << std::boolalpha
<< array_set->NeedsTypeCheck() << std::noboolalpha;
+ StartAttributeStream("can_trigger_gc")
+ << std::boolalpha << array_set->GetSideEffects().Includes(SideEffects::CanTriggerGC())
+ << std::noboolalpha;
+ StartAttributeStream("write_barrier_kind") << array_set->GetWriteBarrierKind();
}
void VisitCompare(HCompare* compare) override {
StartAttributeStream("bias") << compare->GetBias();
}
+ void VisitCondition(HCondition* condition) override {
+ StartAttributeStream("bias") << condition->GetBias();
+ }
+
void VisitInvoke(HInvoke* invoke) override {
StartAttributeStream("dex_file_index") << invoke->GetMethodReference().index;
ArtMethod* method = invoke->GetResolvedMethod();
@@ -549,7 +557,9 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
iset->GetFieldInfo().GetDexFile().PrettyField(iset->GetFieldInfo().GetFieldIndex(),
/* with type */ false);
StartAttributeStream("field_type") << iset->GetFieldType();
- StartAttributeStream("predicated") << std::boolalpha << iset->GetIsPredicatedSet();
+ StartAttributeStream("predicated")
+ << std::boolalpha << iset->GetIsPredicatedSet() << std::noboolalpha;
+ StartAttributeStream("write_barrier_kind") << iset->GetWriteBarrierKind();
}
void VisitStaticFieldGet(HStaticFieldGet* sget) override {
@@ -564,6 +574,7 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
sset->GetFieldInfo().GetDexFile().PrettyField(sset->GetFieldInfo().GetFieldIndex(),
/* with type */ false);
StartAttributeStream("field_type") << sset->GetFieldType();
+ StartAttributeStream("write_barrier_kind") << sset->GetWriteBarrierKind();
}
void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* field_access) override {
@@ -757,15 +768,7 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
instruction->IsCheckCast()) {
StartAttributeStream("klass") << "unresolved";
} else {
- // The NullConstant may be added to the graph during other passes that happen between
- // ReferenceTypePropagation and Inliner (e.g. InstructionSimplifier). If the inliner
- // doesn't run or doesn't inline anything, the NullConstant remains untyped.
- // So we should check NullConstants for validity only after reference type propagation.
- DCHECK(graph_in_bad_state_ ||
- IsDebugDump() ||
- (!is_after_pass_ && IsPass(HGraphBuilder::kBuilderPassName)))
- << instruction->DebugName() << instruction->GetId() << " has invalid rti "
- << (is_after_pass_ ? "after" : "before") << " pass " << pass_name_;
+ StartAttributeStream("klass") << "invalid";
}
}
if (disasm_info_ != nullptr) {
@@ -904,6 +907,11 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
if (block->IsCatchBlock()) {
PrintProperty("flags", "catch_block");
+ } else if (block->IsTryBlock()) {
+ std::stringstream flags_properties;
+ flags_properties << "try_start "
+ << namer_.GetName(block->GetTryCatchInformation()->GetTryEntry().GetBlock());
+ PrintProperty("flags", flags_properties.str().c_str());
} else if (!IsDebugDump()) {
// Don't print useless information to logcat
PrintEmptyProperty("flags");
diff --git a/compiler/optimizing/graph_visualizer.h b/compiler/optimizing/graph_visualizer.h
index 3429c11cbd..9878917739 100644
--- a/compiler/optimizing/graph_visualizer.h
+++ b/compiler/optimizing/graph_visualizer.h
@@ -22,10 +22,11 @@
#include "arch/instruction_set.h"
#include "base/arena_containers.h"
+#include "base/macros.h"
#include "base/value_object.h"
#include "block_namer.h"
-namespace art {
+namespace art HIDDEN {
class CodeGenerator;
class DexCompilationUnit;
diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc
index c7cd223b51..a6ca057cfc 100644
--- a/compiler/optimizing/gvn.cc
+++ b/compiler/optimizing/gvn.cc
@@ -23,7 +23,7 @@
#include "base/utils.h"
#include "side_effects_analysis.h"
-namespace art {
+namespace art HIDDEN {
/**
* A ValueSet holds instructions that can replace other instructions. It is updated
diff --git a/compiler/optimizing/gvn.h b/compiler/optimizing/gvn.h
index bbf2265e98..df4e3a8dbf 100644
--- a/compiler/optimizing/gvn.h
+++ b/compiler/optimizing/gvn.h
@@ -17,10 +17,11 @@
#ifndef ART_COMPILER_OPTIMIZING_GVN_H_
#define ART_COMPILER_OPTIMIZING_GVN_H_
+#include "base/macros.h"
#include "nodes.h"
#include "optimization.h"
-namespace art {
+namespace art HIDDEN {
class SideEffectsAnalysis;
diff --git a/compiler/optimizing/gvn_test.cc b/compiler/optimizing/gvn_test.cc
index 3bf4cc35ba..1eb6307cb1 100644
--- a/compiler/optimizing/gvn_test.cc
+++ b/compiler/optimizing/gvn_test.cc
@@ -17,12 +17,13 @@
#include "gvn.h"
#include "base/arena_allocator.h"
+#include "base/macros.h"
#include "builder.h"
#include "nodes.h"
#include "optimizing_unit_test.h"
#include "side_effects_analysis.h"
-namespace art {
+namespace art HIDDEN {
class GVNTest : public OptimizingUnitTest {};
diff --git a/compiler/optimizing/induction_var_analysis.cc b/compiler/optimizing/induction_var_analysis.cc
index 3b5a2f1f9d..be6c268f5d 100644
--- a/compiler/optimizing/induction_var_analysis.cc
+++ b/compiler/optimizing/induction_var_analysis.cc
@@ -16,9 +16,10 @@
#include "induction_var_analysis.h"
+#include "base/scoped_arena_containers.h"
#include "induction_var_range.h"
-namespace art {
+namespace art HIDDEN {
/**
* Returns true if the from/to types denote a narrowing, integral conversion (precision loss).
@@ -214,18 +215,25 @@ struct HInductionVarAnalysis::StackEntry {
size_t low_depth;
};
-HInductionVarAnalysis::HInductionVarAnalysis(HGraph* graph, const char* name)
- : HOptimization(graph, name),
+HInductionVarAnalysis::HInductionVarAnalysis(HGraph* graph,
+ OptimizingCompilerStats* stats,
+ const char* name)
+ : HOptimization(graph, name, stats),
induction_(std::less<const HLoopInformation*>(),
graph->GetAllocator()->Adapter(kArenaAllocInductionVarAnalysis)),
- cycles_(std::less<HPhi*>(),
- graph->GetAllocator()->Adapter(kArenaAllocInductionVarAnalysis)) {
+ cycles_(std::less<HPhi*>(), graph->GetAllocator()->Adapter(kArenaAllocInductionVarAnalysis)) {
}
bool HInductionVarAnalysis::Run() {
// Detects sequence variables (generalized induction variables) during an outer to inner
// traversal of all loops using Gerlek's algorithm. The order is important to enable
// range analysis on outer loop while visiting inner loops.
+
+ if (IsPathologicalCase()) {
+ MaybeRecordStat(stats_, MethodCompilationStat::kNotVarAnalyzedPathological);
+ return false;
+ }
+
for (HBasicBlock* graph_block : graph_->GetReversePostOrder()) {
// Don't analyze irreducible loops.
if (graph_block->IsLoopHeader() && !graph_block->GetLoopInformation()->IsIrreducible()) {
@@ -1576,4 +1584,84 @@ std::string HInductionVarAnalysis::InductionToString(InductionInfo* info) {
return "";
}
+void HInductionVarAnalysis::CalculateLoopHeaderPhisInARow(
+ HPhi* initial_phi,
+ ScopedArenaSafeMap<HPhi*, int>& cached_values,
+ ScopedArenaAllocator& allocator) {
+ DCHECK(initial_phi->IsLoopHeaderPhi());
+ ScopedArenaQueue<HPhi*> worklist(allocator.Adapter(kArenaAllocInductionVarAnalysis));
+ worklist.push(initial_phi);
+ // Used to check which phis are in the current chain we are checking.
+ ScopedArenaSet<HPhi*> phis_in_chain(allocator.Adapter(kArenaAllocInductionVarAnalysis));
+ while (!worklist.empty()) {
+ HPhi* current_phi = worklist.front();
+ DCHECK(current_phi->IsLoopHeaderPhi());
+ if (cached_values.find(current_phi) != cached_values.end()) {
+ // Already processed.
+ worklist.pop();
+ continue;
+ }
+
+ phis_in_chain.insert(current_phi);
+ int max_value = 0;
+ bool pushed_other_phis = false;
+ for (size_t index = 0; index < current_phi->InputCount(); index++) {
+ // If the input is not a loop header phi, we only have 1 (current_phi).
+ int current_value = 1;
+ if (current_phi->InputAt(index)->IsLoopHeaderPhi()) {
+ HPhi* loop_header_phi = current_phi->InputAt(index)->AsPhi();
+ auto it = cached_values.find(loop_header_phi);
+ if (it != cached_values.end()) {
+ current_value += it->second;
+ } else if (phis_in_chain.find(current_phi) == phis_in_chain.end()) {
+ // Push phis which aren't in the chain already to be processed.
+ pushed_other_phis = true;
+ worklist.push(loop_header_phi);
+ }
+ // Phis in the chain will get processed later. We keep `current_value` as 1 to avoid
+ // double counting `loop_header_phi`.
+ }
+ max_value = std::max(max_value, current_value);
+ }
+
+ if (!pushed_other_phis) {
+ // Only finish processing after all inputs were processed.
+ worklist.pop();
+ phis_in_chain.erase(current_phi);
+ cached_values.FindOrAdd(current_phi, max_value);
+ }
+ }
+}
+
+bool HInductionVarAnalysis::IsPathologicalCase() {
+ ScopedArenaAllocator local_allocator(graph_->GetArenaStack());
+ ScopedArenaSafeMap<HPhi*, int> cached_values(
+ std::less<HPhi*>(), local_allocator.Adapter(kArenaAllocInductionVarAnalysis));
+
+ // Due to how our induction passes work, we will take a lot of time compiling if we have several
+ // loop header phis in a row. If we have more than 15 different loop header phis in a row, we
+ // don't perform the analysis.
+ constexpr int kMaximumLoopHeaderPhisInARow = 15;
+
+ for (HBasicBlock* block : graph_->GetReversePostOrder()) {
+ if (!block->IsLoopHeader()) {
+ continue;
+ }
+
+ for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+ DCHECK(it.Current()->IsLoopHeaderPhi());
+ HPhi* phi = it.Current()->AsPhi();
+ CalculateLoopHeaderPhisInARow(phi, cached_values, local_allocator);
+ DCHECK(cached_values.find(phi) != cached_values.end())
+ << " we should have a value for Phi " << phi->GetId()
+ << " in block " << phi->GetBlock()->GetBlockId();
+ if (cached_values.find(phi)->second > kMaximumLoopHeaderPhisInARow) {
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
} // namespace art
diff --git a/compiler/optimizing/induction_var_analysis.h b/compiler/optimizing/induction_var_analysis.h
index 09417722da..050950089a 100644
--- a/compiler/optimizing/induction_var_analysis.h
+++ b/compiler/optimizing/induction_var_analysis.h
@@ -21,11 +21,12 @@
#include "base/arena_containers.h"
#include "base/array_ref.h"
+#include "base/macros.h"
#include "base/scoped_arena_containers.h"
#include "nodes.h"
#include "optimization.h"
-namespace art {
+namespace art HIDDEN {
/**
* Induction variable analysis. This class does not have a direct public API.
@@ -38,7 +39,9 @@ namespace art {
*/
class HInductionVarAnalysis : public HOptimization {
public:
- explicit HInductionVarAnalysis(HGraph* graph, const char* name = kInductionPassName);
+ explicit HInductionVarAnalysis(HGraph* graph,
+ OptimizingCompilerStats* stats = nullptr,
+ const char* name = kInductionPassName);
bool Run() override;
@@ -307,6 +310,15 @@ class HInductionVarAnalysis : public HOptimization {
static std::string FetchToString(HInstruction* fetch);
static std::string InductionToString(InductionInfo* info);
+ // Returns true if we have a pathological case we don't want to analyze.
+ bool IsPathologicalCase();
+ // Starting with initial_phi, it calculates how many loop header phis in a row we have. To do
+ // this, we count the loop header phi which are used as an input of other loop header phis. It
+ // uses `cached_values` to avoid recomputing results.
+ void CalculateLoopHeaderPhisInARow(HPhi* initial_phi,
+ ScopedArenaSafeMap<HPhi*, int>& cached_values,
+ ScopedArenaAllocator& allocator);
+
/**
* Maintains the results of the analysis as a mapping from loops to a mapping from instructions
* to the induction information for that instruction in that loop.
diff --git a/compiler/optimizing/induction_var_analysis_test.cc b/compiler/optimizing/induction_var_analysis_test.cc
index 4c11ad4643..80c15371dc 100644
--- a/compiler/optimizing/induction_var_analysis_test.cc
+++ b/compiler/optimizing/induction_var_analysis_test.cc
@@ -17,12 +17,13 @@
#include <regex>
#include "base/arena_allocator.h"
+#include "base/macros.h"
#include "builder.h"
#include "induction_var_analysis.h"
#include "nodes.h"
#include "optimizing_unit_test.h"
-namespace art {
+namespace art HIDDEN {
/**
* Fixture class for the InductionVarAnalysis tests.
diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc
index ad3d1a9321..9b78699ead 100644
--- a/compiler/optimizing/induction_var_range.cc
+++ b/compiler/optimizing/induction_var_range.cc
@@ -17,8 +17,9 @@
#include "induction_var_range.h"
#include <limits>
+#include "optimizing/nodes.h"
-namespace art {
+namespace art HIDDEN {
/** Returns true if 64-bit constant fits in 32-bit constant. */
static bool CanLongValueFitIntoInt(int64_t c) {
@@ -1064,10 +1065,13 @@ bool InductionVarRange::GenerateRangeOrLastValue(const HBasicBlock* context,
case HInductionVarAnalysis::kLinear:
if (*stride_value > 0) {
lower = nullptr;
+ return GenerateLastValueLinear(
+ context, loop, info, trip, graph, block, /*is_min=*/false, upper);
} else {
upper = nullptr;
+ return GenerateLastValueLinear(
+ context, loop, info, trip, graph, block, /*is_min=*/true, lower);
}
- break;
case HInductionVarAnalysis::kPolynomial:
return GenerateLastValuePolynomial(context, loop, info, trip, graph, block, lower);
case HInductionVarAnalysis::kGeometric:
@@ -1113,6 +1117,54 @@ bool InductionVarRange::GenerateRangeOrLastValue(const HBasicBlock* context,
GenerateCode(context, loop, info, trip, graph, block, /*is_min=*/ false, upper);
}
+bool InductionVarRange::GenerateLastValueLinear(const HBasicBlock* context,
+ const HLoopInformation* loop,
+ HInductionVarAnalysis::InductionInfo* info,
+ HInductionVarAnalysis::InductionInfo* trip,
+ HGraph* graph,
+ HBasicBlock* block,
+ bool is_min,
+ /*out*/ HInstruction** result) const {
+ DataType::Type type = info->type;
+ // Avoid any narrowing linear induction or any type mismatch between the linear induction and the
+ // trip count expression.
+ if (HInductionVarAnalysis::IsNarrowingLinear(info) || trip->type != type) {
+ return false;
+ }
+
+ // Stride value must be a known constant that fits into int32.
+ int64_t stride_value = 0;
+ if (!IsConstant(context, loop, info->op_a, kExact, &stride_value) ||
+ !CanLongValueFitIntoInt(stride_value)) {
+ return false;
+ }
+
+ // We require `a` to be a constant value that didn't overflow.
+ const bool is_min_a = stride_value >= 0 ? is_min : !is_min;
+ Value val_a = GetVal(context, loop, trip, trip, is_min_a);
+ HInstruction* opb;
+ if (!IsConstantValue(val_a) ||
+ !GenerateCode(context, loop, info->op_b, trip, graph, block, is_min, &opb)) {
+ return false;
+ }
+
+ if (graph != nullptr) {
+ ArenaAllocator* allocator = graph->GetAllocator();
+ HInstruction* oper;
+ HInstruction* opa = graph->GetConstant(type, val_a.b_constant);
+ if (stride_value == 1) {
+ oper = new (allocator) HAdd(type, opa, opb);
+ } else if (stride_value == -1) {
+ oper = new (graph->GetAllocator()) HSub(type, opb, opa);
+ } else {
+ HInstruction* mul = new (allocator) HMul(type, graph->GetConstant(type, stride_value), opa);
+ oper = new (allocator) HAdd(type, Insert(block, mul), opb);
+ }
+ *result = Insert(block, oper);
+ }
+ return true;
+}
+
bool InductionVarRange::GenerateLastValuePolynomial(const HBasicBlock* context,
const HLoopInformation* loop,
HInductionVarAnalysis::InductionInfo* info,
diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h
index 552837c044..3e1212bec8 100644
--- a/compiler/optimizing/induction_var_range.h
+++ b/compiler/optimizing/induction_var_range.h
@@ -17,9 +17,10 @@
#ifndef ART_COMPILER_OPTIMIZING_INDUCTION_VAR_RANGE_H_
#define ART_COMPILER_OPTIMIZING_INDUCTION_VAR_RANGE_H_
+#include "base/macros.h"
#include "induction_var_analysis.h"
-namespace art {
+namespace art HIDDEN {
/**
* This class implements range analysis on expressions within loops. It takes the results
@@ -317,6 +318,15 @@ class InductionVarRange {
/*out*/ bool* needs_finite_test,
/*out*/ bool* needs_taken_test) const;
+ bool GenerateLastValueLinear(const HBasicBlock* context,
+ const HLoopInformation* loop,
+ HInductionVarAnalysis::InductionInfo* info,
+ HInductionVarAnalysis::InductionInfo* trip,
+ HGraph* graph,
+ HBasicBlock* block,
+ bool is_min,
+ /*out*/ HInstruction** result) const;
+
bool GenerateLastValuePolynomial(const HBasicBlock* context,
const HLoopInformation* loop,
HInductionVarAnalysis::InductionInfo* info,
diff --git a/compiler/optimizing/induction_var_range_test.cc b/compiler/optimizing/induction_var_range_test.cc
index 962123d948..d879897959 100644
--- a/compiler/optimizing/induction_var_range_test.cc
+++ b/compiler/optimizing/induction_var_range_test.cc
@@ -17,12 +17,13 @@
#include "induction_var_range.h"
#include "base/arena_allocator.h"
+#include "base/macros.h"
#include "builder.h"
#include "induction_var_analysis.h"
#include "nodes.h"
#include "optimizing_unit_test.h"
-namespace art {
+namespace art HIDDEN {
using Value = InductionVarRange::Value;
@@ -1064,10 +1065,6 @@ TEST_F(InductionVarRangeTest, ConstantTripCountDown) {
HInstruction* last = range_.GenerateLastValue(phi, graph_, loop_preheader_);
ASSERT_TRUE(last->IsSub());
ExpectInt(1000, last->InputAt(0));
- ASSERT_TRUE(last->InputAt(1)->IsNeg());
- last = last->InputAt(1)->InputAt(0);
- ASSERT_TRUE(last->IsSub());
- ExpectInt(0, last->InputAt(0));
ExpectInt(1000, last->InputAt(1));
// Loop logic.
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index f73c0d38e4..5a4478dc14 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -46,7 +46,7 @@
#include "thread.h"
#include "verifier/verifier_compiler_binding.h"
-namespace art {
+namespace art HIDDEN {
// Instruction limit to control memory.
static constexpr size_t kMaximumNumberOfTotalInstructions = 1024;
@@ -72,6 +72,9 @@ static constexpr size_t kMaximumNumberOfPolymorphicRecursiveCalls = 0;
// Controls the use of inline caches in AOT mode.
static constexpr bool kUseAOTInlineCaches = true;
+// Controls the use of inlining try catches.
+static constexpr bool kInlineTryCatches = true;
+
// We check for line numbers to make sure the DepthString implementation
// aligns the output nicely.
#define LOG_INTERNAL(msg) \
@@ -141,7 +144,11 @@ bool HInliner::Run() {
}
bool did_inline = false;
- bool did_set_always_throws = false;
+ // The inliner is the only phase that sets invokes as `always throwing`, and since we only run the
+ // inliner once per graph this value should always be false at the beginning of the inlining
+ // phase. This is important since we use `HasAlwaysThrowingInvokes` to know whether the inliner
+ // phase performed a relevant change in the graph.
+ DCHECK(!graph_->HasAlwaysThrowingInvokes());
// Initialize the number of instructions for the method being compiled. Recursive calls
// to HInliner::Run have already updated the instruction count.
@@ -175,14 +182,14 @@ bool HInliner::Run() {
HInstruction* next = instruction->GetNext();
HInvoke* call = instruction->AsInvoke();
// As long as the call is not intrinsified, it is worth trying to inline.
- if (call != nullptr && call->GetIntrinsic() == Intrinsics::kNone) {
+ if (call != nullptr && !codegen_->IsImplementedIntrinsic(call)) {
if (honor_noinline_directives) {
// Debugging case: directives in method names control or assert on inlining.
std::string callee_name =
call->GetMethodReference().PrettyMethod(/* with_signature= */ false);
// Tests prevent inlining by having $noinline$ in their method names.
if (callee_name.find("$noinline$") == std::string::npos) {
- if (TryInline(call, &did_set_always_throws)) {
+ if (TryInline(call)) {
did_inline = true;
} else if (honor_inline_directives) {
bool should_have_inlined = (callee_name.find("$inline$") != std::string::npos);
@@ -192,7 +199,7 @@ bool HInliner::Run() {
} else {
DCHECK(!honor_inline_directives);
// Normal case: try to inline.
- if (TryInline(call, &did_set_always_throws)) {
+ if (TryInline(call)) {
did_inline = true;
}
}
@@ -201,7 +208,9 @@ bool HInliner::Run() {
}
}
- return did_inline || did_set_always_throws;
+ // We return true if we either inlined at least one method, or we marked one of our methods as
+ // always throwing.
+ return did_inline || graph_->HasAlwaysThrowingInvokes();
}
static bool IsMethodOrDeclaringClassFinal(ArtMethod* method)
@@ -436,7 +445,7 @@ static bool AlwaysThrows(ArtMethod* method)
return throw_seen;
}
-bool HInliner::TryInline(HInvoke* invoke_instruction, /*inout*/ bool* did_set_always_throws) {
+bool HInliner::TryInline(HInvoke* invoke_instruction) {
MaybeRecordStat(stats_, MethodCompilationStat::kTryInline);
// Don't bother to move further if we know the method is unresolved or the invocation is
@@ -472,7 +481,8 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, /*inout*/ bool* did_set_al
bool result = TryInlineAndReplace(invoke_instruction,
actual_method,
ReferenceTypeInfo::CreateInvalid(),
- /* do_rtp= */ true);
+ /* do_rtp= */ true,
+ /* is_speculative= */ false);
if (result) {
MaybeRecordStat(stats_, MethodCompilationStat::kInlinedInvokeVirtualOrInterface);
if (outermost_graph_ == graph_) {
@@ -487,11 +497,10 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, /*inout*/ bool* did_set_al
} else {
invoke_to_analyze = invoke_instruction;
}
- // Set always throws property for non-inlined method call with single
- // target.
- if (AlwaysThrows(actual_method)) {
- invoke_to_analyze->SetAlwaysThrows(true);
- *did_set_always_throws = true;
+ // Set always throws property for non-inlined method call with single target.
+ if (invoke_instruction->AlwaysThrows() || AlwaysThrows(actual_method)) {
+ invoke_to_analyze->SetAlwaysThrows(/* always_throws= */ true);
+ graph_->SetHasAlwaysThrowingInvokes(/* value= */ true);
}
}
return result;
@@ -499,10 +508,27 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, /*inout*/ bool* did_set_al
DCHECK(!invoke_instruction->IsInvokeStaticOrDirect());
+ // No try catch inlining allowed here, or recursively. For try catch inlining we are banking on
+ // the fact that we have a unique dex pc list. We cannot guarantee that for some TryInline methods
+ // e.g. `TryInlinePolymorphicCall`.
+ // TODO(solanes): Setting `try_catch_inlining_allowed_` to false here covers all cases from
+ // `TryInlineFromCHA` and from `TryInlineFromInlineCache` as well (e.g.
+ // `TryInlinePolymorphicCall`). Reassess to see if we can inline inline catch blocks in
+ // `TryInlineFromCHA`, `TryInlineMonomorphicCall` and `TryInlinePolymorphicCallToSameTarget`.
+
+ // We store the value to restore it since we will use the same HInliner instance for other inlinee
+ // candidates.
+ const bool previous_value = try_catch_inlining_allowed_;
+ try_catch_inlining_allowed_ = false;
+
if (TryInlineFromCHA(invoke_instruction)) {
+ try_catch_inlining_allowed_ = previous_value;
return true;
}
- return TryInlineFromInlineCache(invoke_instruction);
+
+ const bool result = TryInlineFromInlineCache(invoke_instruction);
+ try_catch_inlining_allowed_ = previous_value;
+ return result;
}
bool HInliner::TryInlineFromCHA(HInvoke* invoke_instruction) {
@@ -518,7 +544,8 @@ bool HInliner::TryInlineFromCHA(HInvoke* invoke_instruction) {
if (!TryInlineAndReplace(invoke_instruction,
method,
ReferenceTypeInfo::CreateInvalid(),
- /* do_rtp= */ true)) {
+ /* do_rtp= */ true,
+ /* is_speculative= */ true)) {
return false;
}
AddCHAGuard(invoke_instruction, dex_pc, cursor, bb_cursor);
@@ -786,7 +813,8 @@ bool HInliner::TryInlineMonomorphicCall(
if (!TryInlineAndReplace(invoke_instruction,
resolved_method,
ReferenceTypeInfo::Create(monomorphic_type, /* is_exact= */ true),
- /* do_rtp= */ false)) {
+ /* do_rtp= */ false,
+ /* is_speculative= */ true)) {
return false;
}
@@ -802,7 +830,6 @@ bool HInliner::TryInlineMonomorphicCall(
// Run type propagation to get the guard typed, and eventually propagate the
// type of the receiver.
ReferenceTypePropagation rtp_fixup(graph_,
- outer_compilation_unit_.GetClassLoader(),
outer_compilation_unit_.GetDexCache(),
/* is_first_run= */ false);
rtp_fixup.Run();
@@ -982,7 +1009,8 @@ bool HInliner::TryInlinePolymorphicCall(
!TryBuildAndInline(invoke_instruction,
method,
ReferenceTypeInfo::Create(handle, /* is_exact= */ true),
- &return_replacement)) {
+ &return_replacement,
+ /* is_speculative= */ true)) {
all_targets_inlined = false;
} else {
one_target_inlined = true;
@@ -1024,7 +1052,6 @@ bool HInliner::TryInlinePolymorphicCall(
// Run type propagation to get the guards typed.
ReferenceTypePropagation rtp_fixup(graph_,
- outer_compilation_unit_.GetClassLoader(),
outer_compilation_unit_.GetDexCache(),
/* is_first_run= */ false);
rtp_fixup.Run();
@@ -1160,7 +1187,8 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget(
if (!TryBuildAndInline(invoke_instruction,
actual_method,
ReferenceTypeInfo::CreateInvalid(),
- &return_replacement)) {
+ &return_replacement,
+ /* is_speculative= */ true)) {
return false;
}
@@ -1215,7 +1243,6 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget(
// Run type propagation to get the guard typed.
ReferenceTypePropagation rtp_fixup(graph_,
- outer_compilation_unit_.GetClassLoader(),
outer_compilation_unit_.GetDexCache(),
/* is_first_run= */ false);
rtp_fixup.Run();
@@ -1232,7 +1259,6 @@ void HInliner::MaybeRunReferenceTypePropagation(HInstruction* replacement,
// Actual return value has a more specific type than the method's declared
// return type. Run RTP again on the outer graph to propagate it.
ReferenceTypePropagation(graph_,
- outer_compilation_unit_.GetClassLoader(),
outer_compilation_unit_.GetDexCache(),
/* is_first_run= */ false).Run();
}
@@ -1246,6 +1272,13 @@ bool HInliner::TryDevirtualize(HInvoke* invoke_instruction,
return false;
}
+ // Don't try to devirtualize intrinsics as it breaks pattern matching from later phases.
+ // TODO(solanes): This `if` could be removed if we update optimizations like
+ // TryReplaceStringBuilderAppend.
+ if (invoke_instruction->IsIntrinsic()) {
+ return false;
+ }
+
// Don't bother trying to call directly a default conflict method. It
// doesn't have a proper MethodReference, but also `GetCanonicalMethod`
// will return an actual default implementation.
@@ -1288,7 +1321,8 @@ bool HInliner::TryDevirtualize(HInvoke* invoke_instruction,
dispatch_info,
kDirect,
MethodReference(method->GetDexFile(), method->GetDexMethodIndex()),
- HInvokeStaticOrDirect::ClinitCheckRequirement::kNone);
+ HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
+ !graph_->IsDebuggable());
HInputsRef inputs = invoke_instruction->GetInputs();
DCHECK_EQ(inputs.size(), invoke_instruction->GetNumberOfArguments());
for (size_t index = 0; index != inputs.size(); ++index) {
@@ -1301,7 +1335,7 @@ bool HInliner::TryDevirtualize(HInvoke* invoke_instruction,
invoke_instruction->GetBlock()->InsertInstructionBefore(new_invoke, invoke_instruction);
new_invoke->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
if (invoke_instruction->GetType() == DataType::Type::kReference) {
- new_invoke->SetReferenceTypeInfo(invoke_instruction->GetReferenceTypeInfo());
+ new_invoke->SetReferenceTypeInfoIfValid(invoke_instruction->GetReferenceTypeInfo());
}
*replacement = new_invoke;
@@ -1316,11 +1350,13 @@ bool HInliner::TryDevirtualize(HInvoke* invoke_instruction,
bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction,
ArtMethod* method,
ReferenceTypeInfo receiver_type,
- bool do_rtp) {
- DCHECK(!invoke_instruction->IsIntrinsic());
+ bool do_rtp,
+ bool is_speculative) {
+ DCHECK(!codegen_->IsImplementedIntrinsic(invoke_instruction));
HInstruction* return_replacement = nullptr;
- if (!TryBuildAndInline(invoke_instruction, method, receiver_type, &return_replacement)) {
+ if (!TryBuildAndInline(
+ invoke_instruction, method, receiver_type, &return_replacement, is_speculative)) {
return false;
}
@@ -1378,6 +1414,15 @@ bool HInliner::IsInliningAllowed(ArtMethod* method, const CodeItemDataAccessor&
return false;
}
+ if (annotations::MethodIsNeverInline(*method->GetDexFile(),
+ method->GetClassDef(),
+ method->GetDexMethodIndex())) {
+ LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedNeverInlineAnnotation)
+ << "Method " << method->PrettyMethod()
+ << " has the @NeverInline annotation so it won't be inlined";
+ return false;
+ }
+
return true;
}
@@ -1397,9 +1442,25 @@ bool HInliner::IsInliningSupported(const HInvoke* invoke_instruction,
}
if (accessor.TriesSize() != 0) {
- LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedTryCatchCallee)
- << "Method " << method->PrettyMethod() << " is not inlined because of try block";
- return false;
+ if (!kInlineTryCatches) {
+ LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedTryCatchDisabled)
+ << "Method " << method->PrettyMethod()
+ << " is not inlined because inlining try catches is disabled globally";
+ return false;
+ }
+ const bool disallowed_try_catch_inlining =
+ // Direct parent is a try block.
+ invoke_instruction->GetBlock()->IsTryBlock() ||
+ // Indirect parent disallows try catch inlining.
+ !try_catch_inlining_allowed_;
+ if (disallowed_try_catch_inlining) {
+ LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedTryCatchCallee)
+ << "Method " << method->PrettyMethod()
+ << " is not inlined because it has a try catch and we are not supporting it for this"
+ << " particular call. This is could be because e.g. it would be inlined inside another"
+ << " try block, we arrived here from TryInlinePolymorphicCall, etc.";
+ return false;
+ }
}
if (invoke_instruction->IsInvokeStaticOrDirect() &&
@@ -1416,9 +1477,9 @@ bool HInliner::IsInliningSupported(const HInvoke* invoke_instruction,
return true;
}
-// Returns whether our resource limits allow inlining this method.
-bool HInliner::IsInliningBudgetAvailable(ArtMethod* method,
- const CodeItemDataAccessor& accessor) const {
+bool HInliner::IsInliningEncouraged(const HInvoke* invoke_instruction,
+ ArtMethod* method,
+ const CodeItemDataAccessor& accessor) const {
if (CountRecursiveCallsOf(method) > kMaximumNumberOfRecursiveCalls) {
LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedRecursiveBudget)
<< "Method "
@@ -1438,13 +1499,21 @@ bool HInliner::IsInliningBudgetAvailable(ArtMethod* method,
return false;
}
+ if (invoke_instruction->GetBlock()->GetLastInstruction()->IsThrow()) {
+ LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedEndsWithThrow)
+ << "Method " << method->PrettyMethod()
+ << " is not inlined because its block ends with a throw";
+ return false;
+ }
+
return true;
}
bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
ArtMethod* method,
ReferenceTypeInfo receiver_type,
- HInstruction** return_replacement) {
+ HInstruction** return_replacement,
+ bool is_speculative) {
// If invoke_instruction is devirtualized to a different method, give intrinsics
// another chance before we try to inline it.
if (invoke_instruction->GetResolvedMethod() != method && method->IsIntrinsic()) {
@@ -1459,7 +1528,8 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
invoke_instruction->GetMethodReference(), // Use existing invoke's method's reference.
method,
MethodReference(method->GetDexFile(), method->GetDexMethodIndex()),
- method->GetMethodIndex());
+ method->GetMethodIndex(),
+ !graph_->IsDebuggable());
DCHECK_NE(new_invoke->GetIntrinsic(), Intrinsics::kNone);
HInputsRef inputs = invoke_instruction->GetInputs();
for (size_t index = 0; index != inputs.size(); ++index) {
@@ -1468,7 +1538,7 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
invoke_instruction->GetBlock()->InsertInstructionBefore(new_invoke, invoke_instruction);
new_invoke->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
if (invoke_instruction->GetType() == DataType::Type::kReference) {
- new_invoke->SetReferenceTypeInfo(invoke_instruction->GetReferenceTypeInfo());
+ new_invoke->SetReferenceTypeInfoIfValid(invoke_instruction->GetReferenceTypeInfo());
}
*return_replacement = new_invoke;
return true;
@@ -1503,12 +1573,12 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
return false;
}
- if (!IsInliningBudgetAvailable(method, accessor)) {
+ if (!IsInliningEncouraged(invoke_instruction, method, accessor)) {
return false;
}
if (!TryBuildAndInlineHelper(
- invoke_instruction, method, receiver_type, return_replacement)) {
+ invoke_instruction, method, receiver_type, return_replacement, is_speculative)) {
return false;
}
@@ -1627,7 +1697,7 @@ bool HInliner::TryPatternSubstitution(HInvoke* invoke_instruction,
bool needs_constructor_barrier = false;
for (size_t i = 0; i != number_of_iputs; ++i) {
HInstruction* value = GetInvokeInputForArgVRegIndex(invoke_instruction, iput_args[i]);
- if (!value->IsConstant() || !value->AsConstant()->IsZeroBitPattern()) {
+ if (!IsZeroBitPattern(value)) {
uint16_t field_index = iput_field_indexes[i];
bool is_final;
HInstanceFieldSet* iput =
@@ -1684,7 +1754,6 @@ HInstanceFieldGet* HInliner::CreateInstanceFieldGet(uint32_t field_index,
Handle<mirror::DexCache> dex_cache =
graph_->GetHandleCache()->NewHandle(referrer->GetDexCache());
ReferenceTypePropagation rtp(graph_,
- outer_compilation_unit_.GetClassLoader(),
dex_cache,
/* is_first_run= */ false);
rtp.Visit(iget);
@@ -1795,7 +1864,7 @@ void HInliner::SubstituteArguments(HGraph* callee_graph,
run_rtp = true;
current->SetReferenceTypeInfo(receiver_type);
} else {
- current->SetReferenceTypeInfo(argument->GetReferenceTypeInfo());
+ current->SetReferenceTypeInfoIfValid(argument->GetReferenceTypeInfo());
}
current->AsParameterValue()->SetCanBeNull(argument->CanBeNull());
}
@@ -1807,7 +1876,6 @@ void HInliner::SubstituteArguments(HGraph* callee_graph,
// are more specific than the declared ones, run RTP again on the inner graph.
if (run_rtp || ArgumentTypesMoreSpecific(invoke_instruction, resolved_method)) {
ReferenceTypePropagation(callee_graph,
- outer_compilation_unit_.GetClassLoader(),
dex_compilation_unit.GetDexCache(),
/* is_first_run= */ false).Run();
}
@@ -1821,8 +1889,9 @@ void HInliner::SubstituteArguments(HGraph* callee_graph,
// If this function returns true, it will also set out_number_of_instructions to
// the number of instructions in the inlined body.
bool HInliner::CanInlineBody(const HGraph* callee_graph,
- const HBasicBlock* target_block,
- size_t* out_number_of_instructions) const {
+ HInvoke* invoke,
+ size_t* out_number_of_instructions,
+ bool is_speculative) const {
ArtMethod* const resolved_method = callee_graph->GetArtMethod();
HBasicBlock* exit_block = callee_graph->GetExitBlock();
@@ -1835,15 +1904,30 @@ bool HInliner::CanInlineBody(const HGraph* callee_graph,
bool has_one_return = false;
for (HBasicBlock* predecessor : exit_block->GetPredecessors()) {
- if (predecessor->GetLastInstruction()->IsThrow()) {
- if (target_block->IsTryBlock()) {
- // TODO(ngeoffray): Support adding HTryBoundary in Hgraph::InlineInto.
- LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedTryCatchCaller)
+ const HInstruction* last_instruction = predecessor->GetLastInstruction();
+ // On inlinees, we can have Return/ReturnVoid/Throw -> TryBoundary -> Exit. To check for the
+ // actual last instruction, we have to skip the TryBoundary instruction.
+ if (last_instruction->IsTryBoundary()) {
+ predecessor = predecessor->GetSinglePredecessor();
+ last_instruction = predecessor->GetLastInstruction();
+
+ // If the last instruction chain is Return/ReturnVoid -> TryBoundary -> Exit we will have to
+ // split a critical edge in InlineInto and might recompute loop information, which is
+ // unsupported for irreducible loops.
+ if (!last_instruction->IsThrow() && graph_->HasIrreducibleLoops()) {
+ DCHECK(last_instruction->IsReturn() || last_instruction->IsReturnVoid());
+ // TODO(ngeoffray): Support re-computing loop information to graphs with
+ // irreducible loops?
+ LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedIrreducibleLoopCaller)
<< "Method " << resolved_method->PrettyMethod()
- << " could not be inlined because one branch always throws and"
- << " caller is in a try/catch block";
+ << " could not be inlined because we will have to recompute the loop information and"
+ << " the caller has irreducible loops";
return false;
- } else if (graph_->GetExitBlock() == nullptr) {
+ }
+ }
+
+ if (last_instruction->IsThrow()) {
+ if (graph_->GetExitBlock() == nullptr) {
// TODO(ngeoffray): Support adding HExit in the caller graph.
LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedInfiniteLoop)
<< "Method " << resolved_method->PrettyMethod()
@@ -1853,9 +1937,10 @@ bool HInliner::CanInlineBody(const HGraph* callee_graph,
} else if (graph_->HasIrreducibleLoops()) {
// TODO(ngeoffray): Support re-computing loop information to graphs with
// irreducible loops?
- VLOG(compiler) << "Method " << resolved_method->PrettyMethod()
- << " could not be inlined because one branch always throws and"
- << " caller has irreducible loops";
+ LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedIrreducibleLoopCaller)
+ << "Method " << resolved_method->PrettyMethod()
+ << " could not be inlined because one branch always throws and"
+ << " the caller has irreducible loops";
return false;
}
} else {
@@ -1864,6 +1949,15 @@ bool HInliner::CanInlineBody(const HGraph* callee_graph,
}
if (!has_one_return) {
+ if (!is_speculative) {
+ // If we know that the method always throws with the particular parameters, set it as such.
+ // This is better than using the dex instructions as we have more information about this
+ // particular call. We don't mark speculative inlines (e.g. the ones from the inline cache) as
+ // always throwing since they might not throw when executed.
+ invoke->SetAlwaysThrows(/* always_throws= */ true);
+ graph_->SetHasAlwaysThrowingInvokes(/* value= */ true);
+ }
+
LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedAlwaysThrows)
<< "Method " << resolved_method->PrettyMethod()
<< " could not be inlined because it always throws";
@@ -1882,7 +1976,7 @@ bool HInliner::CanInlineBody(const HGraph* callee_graph,
if (block->GetLoopInformation()->IsIrreducible()) {
// Don't inline methods with irreducible loops, they could prevent some
// optimizations to run.
- LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedIrreducibleLoop)
+ LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedIrreducibleLoopCallee)
<< "Method " << resolved_method->PrettyMethod()
<< " could not be inlined because it contains an irreducible loop";
return false;
@@ -1930,8 +2024,10 @@ bool HInliner::CanInlineBody(const HGraph* callee_graph,
if (current->IsUnresolvedStaticFieldGet() ||
current->IsUnresolvedInstanceFieldGet() ||
current->IsUnresolvedStaticFieldSet() ||
- current->IsUnresolvedInstanceFieldSet()) {
- // Entrypoint for unresolved fields does not handle inlined frames.
+ current->IsUnresolvedInstanceFieldSet() ||
+ current->IsInvokeUnresolved()) {
+ // Unresolved invokes / field accesses are expensive at runtime when decoding inlining info,
+ // so don't inline methods that have them.
LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedUnresolvedEntrypoint)
<< "Method " << resolved_method->PrettyMethod()
<< " could not be inlined because it is using an unresolved"
@@ -1964,7 +2060,8 @@ bool HInliner::CanInlineBody(const HGraph* callee_graph,
bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
ArtMethod* resolved_method,
ReferenceTypeInfo receiver_type,
- HInstruction** return_replacement) {
+ HInstruction** return_replacement,
+ bool is_speculative) {
DCHECK(!(resolved_method->IsStatic() && receiver_type.IsValid()));
const dex::CodeItem* code_item = resolved_method->GetCodeItem();
const DexFile& callee_dex_file = *resolved_method->GetDexFile();
@@ -2057,10 +2154,18 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
SubstituteArguments(callee_graph, invoke_instruction, receiver_type, dex_compilation_unit);
- RunOptimizations(callee_graph, code_item, dex_compilation_unit);
+ const bool try_catch_inlining_allowed_for_recursive_inline =
+ // It was allowed previously.
+ try_catch_inlining_allowed_ &&
+ // The current invoke is not a try block.
+ !invoke_instruction->GetBlock()->IsTryBlock();
+ RunOptimizations(callee_graph,
+ code_item,
+ dex_compilation_unit,
+ try_catch_inlining_allowed_for_recursive_inline);
size_t number_of_instructions = 0;
- if (!CanInlineBody(callee_graph, invoke_instruction->GetBlock(), &number_of_instructions)) {
+ if (!CanInlineBody(callee_graph, invoke_instruction, &number_of_instructions, is_speculative)) {
return false;
}
@@ -2095,16 +2200,17 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
void HInliner::RunOptimizations(HGraph* callee_graph,
const dex::CodeItem* code_item,
- const DexCompilationUnit& dex_compilation_unit) {
+ const DexCompilationUnit& dex_compilation_unit,
+ bool try_catch_inlining_allowed_for_recursive_inline) {
// Note: if the outermost_graph_ is being compiled OSR, we should not run any
// optimization that could lead to a HDeoptimize. The following optimizations do not.
HDeadCodeElimination dce(callee_graph, inline_stats_, "dead_code_elimination$inliner");
- HConstantFolding fold(callee_graph, "constant_folding$inliner");
+ HConstantFolding fold(callee_graph, inline_stats_, "constant_folding$inliner");
InstructionSimplifier simplify(callee_graph, codegen_, inline_stats_);
HOptimization* optimizations[] = {
- &simplify,
&fold,
+ &simplify,
&dce,
};
@@ -2141,7 +2247,8 @@ void HInliner::RunOptimizations(HGraph* callee_graph,
total_number_of_dex_registers_ + accessor.RegistersSize(),
total_number_of_instructions_ + number_of_instructions,
this,
- depth_ + 1);
+ depth_ + 1,
+ try_catch_inlining_allowed_for_recursive_inline);
inliner.Run();
}
@@ -2155,6 +2262,10 @@ static bool IsReferenceTypeRefinement(ObjPtr<mirror::Class> declared_class,
}
ReferenceTypeInfo actual_rti = actual_obj->GetReferenceTypeInfo();
+ if (!actual_rti.IsValid()) {
+ return false;
+ }
+
ObjPtr<mirror::Class> actual_class = actual_rti.GetTypeHandle().Get();
return (actual_rti.IsExact() && !declared_is_exact) ||
(declared_class != actual_class && declared_class->IsAssignableFrom(actual_class));
diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h
index a2c2085e00..af067dae73 100644
--- a/compiler/optimizing/inliner.h
+++ b/compiler/optimizing/inliner.h
@@ -17,13 +17,14 @@
#ifndef ART_COMPILER_OPTIMIZING_INLINER_H_
#define ART_COMPILER_OPTIMIZING_INLINER_H_
+#include "base/macros.h"
#include "dex/dex_file_types.h"
#include "dex/invoke_type.h"
#include "jit/profiling_info.h"
#include "optimization.h"
#include "profile/profile_compilation_info.h"
-namespace art {
+namespace art HIDDEN {
class CodeGenerator;
class DexCompilationUnit;
@@ -42,7 +43,8 @@ class HInliner : public HOptimization {
size_t total_number_of_dex_registers,
size_t total_number_of_instructions,
HInliner* parent,
- size_t depth = 0,
+ size_t depth,
+ bool try_catch_inlining_allowed,
const char* name = kInlinerPassName)
: HOptimization(outer_graph, name, stats),
outermost_graph_(outermost_graph),
@@ -54,6 +56,7 @@ class HInliner : public HOptimization {
parent_(parent),
depth_(depth),
inlining_budget_(0),
+ try_catch_inlining_allowed_(try_catch_inlining_allowed),
inline_stats_(nullptr) {}
bool Run() override;
@@ -70,9 +73,7 @@ class HInliner : public HOptimization {
kInlineCacheMissingTypes = 5
};
- // We set `did_set_always_throws` as true if we analyzed `invoke_instruction` and it always
- // throws.
- bool TryInline(HInvoke* invoke_instruction, /*inout*/ bool* did_set_always_throws);
+ bool TryInline(HInvoke* invoke_instruction);
// Try to inline `resolved_method` in place of `invoke_instruction`. `do_rtp` is whether
// reference type propagation can run after the inlining. If the inlining is successful, this
@@ -80,19 +81,22 @@ class HInliner : public HOptimization {
bool TryInlineAndReplace(HInvoke* invoke_instruction,
ArtMethod* resolved_method,
ReferenceTypeInfo receiver_type,
- bool do_rtp)
+ bool do_rtp,
+ bool is_speculative)
REQUIRES_SHARED(Locks::mutator_lock_);
bool TryBuildAndInline(HInvoke* invoke_instruction,
ArtMethod* resolved_method,
ReferenceTypeInfo receiver_type,
- HInstruction** return_replacement)
+ HInstruction** return_replacement,
+ bool is_speculative)
REQUIRES_SHARED(Locks::mutator_lock_);
bool TryBuildAndInlineHelper(HInvoke* invoke_instruction,
ArtMethod* resolved_method,
ReferenceTypeInfo receiver_type,
- HInstruction** return_replacement)
+ HInstruction** return_replacement,
+ bool is_speculative)
REQUIRES_SHARED(Locks::mutator_lock_);
// Substitutes parameters in the callee graph with their values from the caller.
@@ -105,8 +109,9 @@ class HInliner : public HOptimization {
// Run simple optimizations on `callee_graph`.
void RunOptimizations(HGraph* callee_graph,
const dex::CodeItem* code_item,
- const DexCompilationUnit& dex_compilation_unit)
- REQUIRES_SHARED(Locks::mutator_lock_);
+ const DexCompilationUnit& dex_compilation_unit,
+ bool try_catch_inlining_allowed_for_recursive_inline)
+ REQUIRES_SHARED(Locks::mutator_lock_);
// Try to recognize known simple patterns and replace invoke call with appropriate instructions.
bool TryPatternSubstitution(HInvoke* invoke_instruction,
@@ -129,12 +134,14 @@ class HInliner : public HOptimization {
const CodeItemDataAccessor& accessor) const
REQUIRES_SHARED(Locks::mutator_lock_);
- // Returns whether the inlining budget allows inlining method.
+ // Returns whether inlining is encouraged.
//
// For example, this checks whether the function has grown too large and
// inlining should be prevented.
- bool IsInliningBudgetAvailable(art::ArtMethod* method, const CodeItemDataAccessor& accessor) const
- REQUIRES_SHARED(Locks::mutator_lock_);
+ bool IsInliningEncouraged(const HInvoke* invoke_instruction,
+ art::ArtMethod* method,
+ const CodeItemDataAccessor& accessor) const
+ REQUIRES_SHARED(Locks::mutator_lock_);
// Inspects the body of a method (callee_graph) and returns whether it can be
// inlined.
@@ -142,8 +149,9 @@ class HInliner : public HOptimization {
// This checks for instructions and constructs that we do not support
// inlining, such as inlining a throw instruction into a try block.
bool CanInlineBody(const HGraph* callee_graph,
- const HBasicBlock* target_block,
- size_t* out_number_of_instructions) const
+ HInvoke* invoke,
+ size_t* out_number_of_instructions,
+ bool is_speculative) const
REQUIRES_SHARED(Locks::mutator_lock_);
// Create a new HInstanceFieldGet.
@@ -320,6 +328,9 @@ class HInliner : public HOptimization {
// The budget left for inlining, in number of instructions.
size_t inlining_budget_;
+ // States if we are allowing try catch inlining to occur at this particular instance of inlining.
+ bool try_catch_inlining_allowed_;
+
// Used to record stats about optimizations on the inlined graph.
// If the inlining is successful, these stats are merged to the caller graph's stats.
OptimizingCompilerStats* inline_stats_;
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index e0bdd0963c..fee9091145 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -42,7 +42,7 @@
#include "ssa_builder.h"
#include "well_known_classes.h"
-namespace art {
+namespace art HIDDEN {
namespace {
@@ -343,6 +343,10 @@ static bool IsBlockPopulated(HBasicBlock* block) {
// Suspend checks were inserted into loop headers during building of dominator tree.
DCHECK(block->GetFirstInstruction()->IsSuspendCheck());
return block->GetFirstInstruction() != block->GetLastInstruction();
+ } else if (block->IsCatchBlock()) {
+ // Nops were inserted into the beginning of catch blocks.
+ DCHECK(block->GetFirstInstruction()->IsNop());
+ return block->GetFirstInstruction() != block->GetLastInstruction();
} else {
return !block->GetInstructions().IsEmpty();
}
@@ -387,6 +391,11 @@ bool HInstructionBuilder::Build() {
// This is slightly odd because the loop header might not be empty (TryBoundary).
// But we're still creating the environment with locals from the top of the block.
InsertInstructionAtTop(suspend_check);
+ } else if (current_block_->IsCatchBlock()) {
+ // We add an environment emitting instruction at the beginning of each catch block, in order
+ // to support try catch inlining.
+ // This is slightly odd because the catch block might not be empty (TryBoundary).
+ InsertInstructionAtTop(new (allocator_) HNop(block_dex_pc, /* needs_environment= */ true));
}
if (block_dex_pc == kNoDexPc || current_block_ != block_builder_->GetBlockAt(block_dex_pc)) {
@@ -414,7 +423,7 @@ bool HInstructionBuilder::Build() {
}
if (native_debuggable && native_debug_info_locations->IsBitSet(dex_pc)) {
- AppendInstruction(new (allocator_) HNativeDebugInfo(dex_pc));
+ AppendInstruction(new (allocator_) HNop(dex_pc, /* needs_environment= */ true));
}
// Note: There may be no Thread for gtests.
@@ -460,6 +469,9 @@ void HInstructionBuilder::BuildIntrinsic(ArtMethod* method) {
current_block_ = graph_->GetEntryBlock();
InitializeBlockLocals();
InitializeParameters();
+ if (graph_->IsDebuggable() && code_generator_->GetCompilerOptions().IsJitCompiler()) {
+ AppendInstruction(new (allocator_) HMethodEntryHook(0u));
+ }
AppendInstruction(new (allocator_) HGoto(0u));
// Fill the body.
@@ -495,14 +507,21 @@ void HInstructionBuilder::BuildIntrinsic(ArtMethod* method) {
dispatch_info,
invoke_type,
target_method,
- HInvokeStaticOrDirect::ClinitCheckRequirement::kNone);
+ HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
+ !graph_->IsDebuggable());
HandleInvoke(invoke, operands, shorty, /* is_unresolved= */ false);
}
// Add the return instruction.
if (return_type_ == DataType::Type::kVoid) {
+ if (graph_->IsDebuggable() && code_generator_->GetCompilerOptions().IsJitCompiler()) {
+ AppendInstruction(new (allocator_) HMethodExitHook(graph_->GetNullConstant(), kNoDexPc));
+ }
AppendInstruction(new (allocator_) HReturnVoid());
} else {
+ if (graph_->IsDebuggable() && code_generator_->GetCompilerOptions().IsJitCompiler()) {
+ AppendInstruction(new (allocator_) HMethodExitHook(latest_result_, kNoDexPc));
+ }
AppendInstruction(new (allocator_) HReturn(latest_result_));
}
@@ -972,11 +991,11 @@ static ArtMethod* ResolveMethod(uint16_t method_idx,
*imt_or_vtable_index = resolved_method->GetVtableIndex();
} else if (*invoke_type == kInterface) {
// For HInvokeInterface we need the IMT index.
- *imt_or_vtable_index = ImTable::GetImtIndex(resolved_method);
+ *imt_or_vtable_index = resolved_method->GetImtIndex();
+ DCHECK_EQ(*imt_or_vtable_index, ImTable::GetImtIndex(resolved_method));
}
- *is_string_constructor =
- resolved_method->IsConstructor() && resolved_method->GetDeclaringClass()->IsStringClass();
+ *is_string_constructor = resolved_method->IsStringConstructor();
return resolved_method;
}
@@ -1041,7 +1060,8 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction,
dispatch_info,
invoke_type,
resolved_method_reference,
- HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit);
+ HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit,
+ !graph_->IsDebuggable());
return HandleStringInit(invoke, operands, shorty);
}
@@ -1054,7 +1074,7 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction,
}
// Try to build an HIR replacement for the intrinsic.
- if (UNLIKELY(resolved_method->IsIntrinsic())) {
+ if (UNLIKELY(resolved_method->IsIntrinsic()) && !graph_->IsDebuggable()) {
// All intrinsics are in the primary boot image, so their class can always be referenced
// and we do not need to rely on the implicit class initialization check. The class should
// be initialized but we do not require that here.
@@ -1105,7 +1125,8 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction,
dispatch_info,
invoke_type,
resolved_method_reference,
- clinit_check_requirement);
+ clinit_check_requirement,
+ !graph_->IsDebuggable());
if (clinit_check != nullptr) {
// Add the class initialization check as last input of `invoke`.
DCHECK_EQ(clinit_check_requirement, HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit);
@@ -1121,7 +1142,8 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction,
method_reference,
resolved_method,
resolved_method_reference,
- /*vtable_index=*/ imt_or_vtable_index);
+ /*vtable_index=*/ imt_or_vtable_index,
+ !graph_->IsDebuggable());
} else {
DCHECK_EQ(invoke_type, kInterface);
if (kIsDebugBuild) {
@@ -1142,7 +1164,8 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction,
resolved_method,
resolved_method_reference,
/*imt_index=*/ imt_or_vtable_index,
- load_kind);
+ load_kind,
+ !graph_->IsDebuggable());
}
return HandleInvoke(invoke, operands, shorty, /* is_unresolved= */ false);
}
@@ -1341,12 +1364,14 @@ bool HInstructionBuilder::BuildInvokePolymorphic(uint32_t dex_pc,
method_reference,
resolved_method,
resolved_method_reference,
- proto_idx);
+ proto_idx,
+ !graph_->IsDebuggable());
if (!HandleInvoke(invoke, operands, shorty, /* is_unresolved= */ false)) {
return false;
}
- if (invoke->GetIntrinsic() != Intrinsics::kMethodHandleInvoke &&
+ if (invoke->GetIntrinsic() != Intrinsics::kNone &&
+ invoke->GetIntrinsic() != Intrinsics::kMethodHandleInvoke &&
invoke->GetIntrinsic() != Intrinsics::kMethodHandleInvokeExact &&
VarHandleAccessorNeedsReturnTypeCheck(invoke, return_type)) {
// Type check is needed because VarHandle intrinsics do not type check the retrieved reference.
@@ -1379,7 +1404,8 @@ bool HInstructionBuilder::BuildInvokeCustom(uint32_t dex_pc,
call_site_idx,
return_type,
dex_pc,
- method_reference);
+ method_reference,
+ !graph_->IsDebuggable());
return HandleInvoke(invoke, operands, shorty, /* is_unresolved= */ false);
}
diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h
index 817fbaa9e8..3d65d8fb54 100644
--- a/compiler/optimizing/instruction_builder.h
+++ b/compiler/optimizing/instruction_builder.h
@@ -18,6 +18,7 @@
#define ART_COMPILER_OPTIMIZING_INSTRUCTION_BUILDER_H_
#include "base/array_ref.h"
+#include "base/macros.h"
#include "base/scoped_arena_allocator.h"
#include "base/scoped_arena_containers.h"
#include "data_type.h"
@@ -27,7 +28,7 @@
#include "handle.h"
#include "nodes.h"
-namespace art {
+namespace art HIDDEN {
class ArenaBitVector;
class ArtField;
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 789f07786c..0c2fd5de56 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -31,13 +31,13 @@
#include "sharpening.h"
#include "string_builder_append.h"
-namespace art {
+namespace art HIDDEN {
// Whether to run an exhaustive test of individual HInstructions cloning when each instruction
// is replaced with its copy if it is clonable.
static constexpr bool kTestInstructionClonerExhaustively = false;
-class InstructionSimplifierVisitor : public HGraphDelegateVisitor {
+class InstructionSimplifierVisitor final : public HGraphDelegateVisitor {
public:
InstructionSimplifierVisitor(HGraph* graph,
CodeGenerator* codegen,
@@ -970,7 +970,7 @@ void InstructionSimplifierVisitor::VisitPredicatedInstanceFieldGet(
pred_get->GetFieldInfo().GetDexFile(),
pred_get->GetDexPc());
if (pred_get->GetType() == DataType::Type::kReference) {
- replace_with->SetReferenceTypeInfo(pred_get->GetReferenceTypeInfo());
+ replace_with->SetReferenceTypeInfoIfValid(pred_get->GetReferenceTypeInfo());
}
pred_get->GetBlock()->InsertInstructionBefore(replace_with, pred_get);
pred_get->ReplaceWith(replace_with);
@@ -1117,6 +1117,10 @@ void InstructionSimplifierVisitor::VisitIf(HIf* instruction) {
}
}
+// TODO(solanes): This optimization should be in ConstantFolding since we are folding to a constant.
+// However, we get code size regressions when we do that since we sometimes have a NullCheck between
+// HArrayLength and IsNewArray, and said NullCheck is eliminated in InstructionSimplifier. If we run
+// ConstantFolding and InstructionSimplifier in lockstep this wouldn't be an issue.
void InstructionSimplifierVisitor::VisitArrayLength(HArrayLength* instruction) {
HInstruction* input = instruction->InputAt(0);
// If the array is a NewArray with constant size, replace the array length
@@ -1142,13 +1146,13 @@ void InstructionSimplifierVisitor::VisitArraySet(HArraySet* instruction) {
if (value->IsArrayGet()) {
if (value->AsArrayGet()->GetArray() == instruction->GetArray()) {
// If the code is just swapping elements in the array, no need for a type check.
- instruction->ClearNeedsTypeCheck();
+ instruction->ClearTypeCheck();
return;
}
}
if (value->IsNullConstant()) {
- instruction->ClearNeedsTypeCheck();
+ instruction->ClearTypeCheck();
return;
}
@@ -1160,13 +1164,13 @@ void InstructionSimplifierVisitor::VisitArraySet(HArraySet* instruction) {
}
if (value_rti.IsValid() && array_rti.CanArrayHold(value_rti)) {
- instruction->ClearNeedsTypeCheck();
+ instruction->ClearTypeCheck();
return;
}
if (array_rti.IsObjectArray()) {
if (array_rti.IsExact()) {
- instruction->ClearNeedsTypeCheck();
+ instruction->ClearTypeCheck();
return;
}
instruction->SetStaticTypeOfArrayIsObjectArray();
@@ -1860,13 +1864,16 @@ void InstructionSimplifierVisitor::VisitDiv(HDiv* instruction) {
// Search HDiv having the specified dividend and divisor which is in the specified basic block.
// Return nullptr if nothing has been found.
-static HInstruction* FindDivWithInputsInBasicBlock(HInstruction* dividend,
- HInstruction* divisor,
- HBasicBlock* basic_block) {
+static HDiv* FindDivWithInputsInBasicBlock(HInstruction* dividend,
+ HInstruction* divisor,
+ HBasicBlock* basic_block) {
for (const HUseListNode<HInstruction*>& use : dividend->GetUses()) {
HInstruction* user = use.GetUser();
- if (user->GetBlock() == basic_block && user->IsDiv() && user->InputAt(1) == divisor) {
- return user;
+ if (user->GetBlock() == basic_block &&
+ user->IsDiv() &&
+ user->InputAt(0) == dividend &&
+ user->InputAt(1) == divisor) {
+ return user->AsDiv();
}
}
return nullptr;
@@ -1900,7 +1907,7 @@ void InstructionSimplifierVisitor::TryToReuseDiv(HRem* rem) {
}
}
- HInstruction* quotient = FindDivWithInputsInBasicBlock(dividend, divisor, basic_block);
+ HDiv* quotient = FindDivWithInputsInBasicBlock(dividend, divisor, basic_block);
if (quotient == nullptr) {
return;
}
@@ -2458,7 +2465,7 @@ void InstructionSimplifierVisitor::SimplifySystemArrayCopy(HInvoke* instruction)
DCHECK(method != nullptr);
DCHECK(method->IsStatic());
DCHECK(method->GetDeclaringClass() == system);
- invoke->SetResolvedMethod(method);
+ invoke->SetResolvedMethod(method, !codegen_->GetGraph()->IsDebuggable());
// Sharpen the new invoke. Note that we do not update the dex method index of
// the invoke, as we would need to look it up in the current dex file, and it
// is unlikely that it exists. The most usual situation for such typed
@@ -2647,15 +2654,13 @@ static bool TryReplaceStringBuilderAppend(HInvoke* invoke) {
// Collect args and check for unexpected uses.
// We expect one call to a constructor with no arguments, one constructor fence (unless
// eliminated), some number of append calls and one call to StringBuilder.toString().
- bool constructor_inlined = false;
bool seen_constructor = false;
bool seen_constructor_fence = false;
bool seen_to_string = false;
uint32_t format = 0u;
uint32_t num_args = 0u;
+ bool has_fp_args = false;
HInstruction* args[StringBuilderAppend::kMaxArgs]; // Added in reverse order.
- // When inlining, `maybe_new_array` tracks an environment use that we want to allow.
- HInstruction* maybe_new_array = nullptr;
for (HBackwardInstructionIterator iter(block->GetInstructions()); !iter.Done(); iter.Advance()) {
HInstruction* user = iter.Current();
// Instructions of interest apply to `sb`, skip those that do not involve `sb`.
@@ -2700,6 +2705,14 @@ static bool TryReplaceStringBuilderAppend(HInvoke* invoke) {
case Intrinsics::kStringBuilderAppendLong:
arg = StringBuilderAppend::Argument::kLong;
break;
+ case Intrinsics::kStringBuilderAppendFloat:
+ arg = StringBuilderAppend::Argument::kFloat;
+ has_fp_args = true;
+ break;
+ case Intrinsics::kStringBuilderAppendDouble:
+ arg = StringBuilderAppend::Argument::kDouble;
+ has_fp_args = true;
+ break;
case Intrinsics::kStringBuilderAppendCharSequence: {
ReferenceTypeInfo rti = user->AsInvokeVirtual()->InputAt(1)->GetReferenceTypeInfo();
if (!rti.IsValid()) {
@@ -2719,10 +2732,6 @@ static bool TryReplaceStringBuilderAppend(HInvoke* invoke) {
}
break;
}
- case Intrinsics::kStringBuilderAppendFloat:
- case Intrinsics::kStringBuilderAppendDouble:
- // TODO: Unimplemented, needs to call FloatingDecimal.getBinaryToASCIIConverter().
- return false;
default: {
return false;
}
@@ -2736,25 +2745,13 @@ static bool TryReplaceStringBuilderAppend(HInvoke* invoke) {
format = (format << StringBuilderAppend::kBitsPerArg) | static_cast<uint32_t>(arg);
args[num_args] = as_invoke_virtual->InputAt(1u);
++num_args;
- } else if (!seen_constructor) {
- // At this point, we should see the constructor. However, we might have inlined it so we have
- // to take care of both cases. We accept only the constructor with no extra arguments. This
- // means that if we inline it, we have to check it is setting its field to a new array.
- if (user->IsInvokeStaticOrDirect() &&
- user->AsInvokeStaticOrDirect()->GetResolvedMethod() != nullptr &&
- user->AsInvokeStaticOrDirect()->GetResolvedMethod()->IsConstructor() &&
- user->AsInvokeStaticOrDirect()->GetNumberOfArguments() == 1u) {
- constructor_inlined = false;
- } else if (user->IsInstanceFieldSet() &&
- user->AsInstanceFieldSet()->GetFieldType() == DataType::Type::kReference &&
- user->AsInstanceFieldSet()->InputAt(0) == sb &&
- user->AsInstanceFieldSet()->GetValue()->IsNewArray()) {
- maybe_new_array = user->AsInstanceFieldSet()->GetValue();
- constructor_inlined = true;
- } else {
- // We were expecting a constructor but we haven't seen it. Abort optimization.
- return false;
- }
+ } else if (user->IsInvokeStaticOrDirect() &&
+ user->AsInvokeStaticOrDirect()->GetResolvedMethod() != nullptr &&
+ user->AsInvokeStaticOrDirect()->GetResolvedMethod()->IsConstructor() &&
+ user->AsInvokeStaticOrDirect()->GetNumberOfArguments() == 1u) {
+ // After arguments, we should see the constructor.
+ // We accept only the constructor with no extra arguments.
+ DCHECK(!seen_constructor);
DCHECK(!seen_constructor_fence);
seen_constructor = true;
} else if (user->IsConstructorFence()) {
@@ -2780,17 +2777,6 @@ static bool TryReplaceStringBuilderAppend(HInvoke* invoke) {
// Accept only calls on the StringBuilder (which shall all be removed).
// TODO: Carve-out for const-string? Or rely on environment pruning (to be implemented)?
if (holder->InputCount() == 0 || holder->InputAt(0) != sb) {
- // When inlining the constructor, we have a NewArray and may have a LoadClass as an
- // environment use.
- if (constructor_inlined) {
- if (holder == maybe_new_array) {
- continue;
- }
- if (holder == maybe_new_array->InputAt(0)) {
- DCHECK(holder->IsLoadClass());
- continue;
- }
- }
return false;
}
}
@@ -2798,9 +2784,9 @@ static bool TryReplaceStringBuilderAppend(HInvoke* invoke) {
// Create replacement instruction.
HIntConstant* fmt = block->GetGraph()->GetIntConstant(static_cast<int32_t>(format));
ArenaAllocator* allocator = block->GetGraph()->GetAllocator();
- HStringBuilderAppend* append =
- new (allocator) HStringBuilderAppend(fmt, num_args, allocator, invoke->GetDexPc());
- append->SetReferenceTypeInfo(invoke->GetReferenceTypeInfo());
+ HStringBuilderAppend* append = new (allocator) HStringBuilderAppend(
+ fmt, num_args, has_fp_args, allocator, invoke->GetDexPc());
+ append->SetReferenceTypeInfoIfValid(invoke->GetReferenceTypeInfo());
for (size_t i = 0; i != num_args; ++i) {
append->SetArgumentAt(i, args[num_args - 1u - i]);
}
@@ -2824,33 +2810,6 @@ static bool TryReplaceStringBuilderAppend(HInvoke* invoke) {
while (sb->HasNonEnvironmentUses()) {
block->RemoveInstruction(sb->GetUses().front().GetUser());
}
- if (constructor_inlined) {
- // We need to remove the inlined constructor instructions,
- // and all remaining environment uses (if any).
- DCHECK(sb->HasEnvironmentUses());
- DCHECK(maybe_new_array != nullptr);
- DCHECK(maybe_new_array->IsNewArray());
- DCHECK(maybe_new_array->HasNonEnvironmentUses());
- HInstruction* fence = maybe_new_array->GetUses().front().GetUser();
- DCHECK(fence->IsConstructorFence());
- block->RemoveInstruction(fence);
- block->RemoveInstruction(maybe_new_array);
- if (sb->HasEnvironmentUses()) {
- // We know the only remaining uses are from the LoadClass.
- HInstruction* load_class = maybe_new_array->InputAt(0);
- DCHECK(load_class->IsLoadClass());
- for (HEnvironment* env = load_class->GetEnvironment();
- env != nullptr;
- env = env->GetParent()) {
- for (size_t i = 0, size = env->Size(); i != size; ++i) {
- if (env->GetInstructionAt(i) == sb) {
- env->RemoveAsUserOfInput(i);
- env->SetRawEnvAt(i, /*instruction=*/ nullptr);
- }
- }
- }
- }
- }
DCHECK(!sb->HasEnvironmentUses());
block->RemoveInstruction(sb);
return true;
diff --git a/compiler/optimizing/instruction_simplifier.h b/compiler/optimizing/instruction_simplifier.h
index feea771096..98ebaafebc 100644
--- a/compiler/optimizing/instruction_simplifier.h
+++ b/compiler/optimizing/instruction_simplifier.h
@@ -17,11 +17,12 @@
#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_H_
#define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_H_
+#include "base/macros.h"
#include "nodes.h"
#include "optimization.h"
#include "optimizing_compiler_stats.h"
-namespace art {
+namespace art HIDDEN {
class CodeGenerator;
diff --git a/compiler/optimizing/instruction_simplifier_arm.cc b/compiler/optimizing/instruction_simplifier_arm.cc
index 1371ea7781..05a518d544 100644
--- a/compiler/optimizing/instruction_simplifier_arm.cc
+++ b/compiler/optimizing/instruction_simplifier_arm.cc
@@ -23,7 +23,7 @@
#include "mirror/string.h"
#include "nodes.h"
-namespace art {
+namespace art HIDDEN {
using helpers::CanFitInShifterOperand;
using helpers::HasShifterOperand;
@@ -31,7 +31,7 @@ using helpers::IsSubRightSubLeftShl;
namespace arm {
-class InstructionSimplifierArmVisitor : public HGraphVisitor {
+class InstructionSimplifierArmVisitor final : public HGraphVisitor {
public:
InstructionSimplifierArmVisitor(HGraph* graph, OptimizingCompilerStats* stats)
: HGraphVisitor(graph), stats_(stats) {}
diff --git a/compiler/optimizing/instruction_simplifier_arm.h b/compiler/optimizing/instruction_simplifier_arm.h
index fca9341d59..0517e4f49e 100644
--- a/compiler/optimizing/instruction_simplifier_arm.h
+++ b/compiler/optimizing/instruction_simplifier_arm.h
@@ -17,10 +17,11 @@
#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_ARM_H_
#define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_ARM_H_
+#include "base/macros.h"
#include "nodes.h"
#include "optimization.h"
-namespace art {
+namespace art HIDDEN {
namespace arm {
class InstructionSimplifierArm : public HOptimization {
diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc
index a6ec02012c..671900bd9d 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.cc
+++ b/compiler/optimizing/instruction_simplifier_arm64.cc
@@ -21,7 +21,7 @@
#include "mirror/array-inl.h"
#include "mirror/string.h"
-namespace art {
+namespace art HIDDEN {
using helpers::CanFitInShifterOperand;
using helpers::HasShifterOperand;
@@ -31,7 +31,7 @@ namespace arm64 {
using helpers::ShifterOperandSupportsExtension;
-class InstructionSimplifierArm64Visitor : public HGraphVisitor {
+class InstructionSimplifierArm64Visitor final : public HGraphVisitor {
public:
InstructionSimplifierArm64Visitor(HGraph* graph, OptimizingCompilerStats* stats)
: HGraphVisitor(graph), stats_(stats) {}
diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h
index 8d93c01ebf..374638ab9e 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.h
+++ b/compiler/optimizing/instruction_simplifier_arm64.h
@@ -17,10 +17,11 @@
#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_ARM64_H_
#define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_ARM64_H_
+#include "base/macros.h"
#include "nodes.h"
#include "optimization.h"
-namespace art {
+namespace art HIDDEN {
namespace arm64 {
class InstructionSimplifierArm64 : public HOptimization {
diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc
index dc60ba62bb..34daae21ee 100644
--- a/compiler/optimizing/instruction_simplifier_shared.cc
+++ b/compiler/optimizing/instruction_simplifier_shared.cc
@@ -18,7 +18,7 @@
#include "mirror/array-inl.h"
-namespace art {
+namespace art HIDDEN {
namespace {
@@ -244,7 +244,7 @@ bool TryExtractArrayAccessAddress(HInstruction* access,
// The access may require a runtime call or the original array pointer.
return false;
}
- if (kEmitCompilerReadBarrier &&
+ if (gUseReadBarrier &&
!kUseBakerReadBarrier &&
access->IsArrayGet() &&
access->GetType() == DataType::Type::kReference) {
diff --git a/compiler/optimizing/instruction_simplifier_shared.h b/compiler/optimizing/instruction_simplifier_shared.h
index 876ed21a22..ddc3a867b8 100644
--- a/compiler/optimizing/instruction_simplifier_shared.h
+++ b/compiler/optimizing/instruction_simplifier_shared.h
@@ -17,9 +17,10 @@
#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_SHARED_H_
#define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_SHARED_H_
+#include "base/macros.h"
#include "nodes.h"
-namespace art {
+namespace art HIDDEN {
namespace helpers {
diff --git a/compiler/optimizing/instruction_simplifier_test.cc b/compiler/optimizing/instruction_simplifier_test.cc
index c7c5b12e25..966f5b91cf 100644
--- a/compiler/optimizing/instruction_simplifier_test.cc
+++ b/compiler/optimizing/instruction_simplifier_test.cc
@@ -26,13 +26,15 @@
#include "optimizing/data_type.h"
#include "optimizing_unit_test.h"
-namespace art {
+namespace art HIDDEN {
namespace mirror {
class ClassExt;
class Throwable;
} // namespace mirror
+static constexpr bool kDebugSimplifierTests = false;
+
template<typename SuperClass>
class InstructionSimplifierTestBase : public SuperClass, public OptimizingUnitTestHelper {
public:
@@ -49,6 +51,19 @@ class InstructionSimplifierTestBase : public SuperClass, public OptimizingUnitTe
SuperClass::TearDown();
gLogVerbosity.compiler = false;
}
+
+ void PerformSimplification(const AdjacencyListGraph& blks) {
+ if (kDebugSimplifierTests) {
+ LOG(INFO) << "Pre simplification " << blks;
+ }
+ graph_->ClearDominanceInformation();
+ graph_->BuildDominatorTree();
+ InstructionSimplifier simp(graph_, /*codegen=*/nullptr);
+ simp.Run();
+ if (kDebugSimplifierTests) {
+ LOG(INFO) << "Post simplify " << blks;
+ }
+ }
};
class InstructionSimplifierTest : public InstructionSimplifierTestBase<CommonCompilerTest> {};
@@ -197,13 +212,7 @@ TEST_F(InstructionSimplifierTest, SimplifyPredicatedFieldGetNoMerge) {
SetupExit(exit);
- LOG(INFO) << "Pre simplification " << blks;
- graph_->ClearDominanceInformation();
- graph_->BuildDominatorTree();
- InstructionSimplifier simp(graph_, /*codegen=*/nullptr);
- simp.Run();
-
- LOG(INFO) << "Post simplify " << blks;
+ PerformSimplification(blks);
EXPECT_INS_RETAINED(read_end);
@@ -289,13 +298,7 @@ TEST_F(InstructionSimplifierTest, SimplifyPredicatedFieldGetMerge) {
SetupExit(exit);
- LOG(INFO) << "Pre simplification " << blks;
- graph_->ClearDominanceInformation();
- graph_->BuildDominatorTree();
- InstructionSimplifier simp(graph_, /*codegen=*/nullptr);
- simp.Run();
-
- LOG(INFO) << "Post simplify " << blks;
+ PerformSimplification(blks);
EXPECT_FALSE(obj3->CanBeNull());
EXPECT_INS_RETAINED(read_end);
@@ -373,13 +376,7 @@ TEST_F(InstructionSimplifierTest, SimplifyPredicatedFieldGetNoNull) {
SetupExit(exit);
- LOG(INFO) << "Pre simplification " << blks;
- graph_->ClearDominanceInformation();
- graph_->BuildDominatorTree();
- InstructionSimplifier simp(graph_, /*codegen=*/nullptr);
- simp.Run();
-
- LOG(INFO) << "Post simplify " << blks;
+ PerformSimplification(blks);
EXPECT_FALSE(obj1->CanBeNull());
EXPECT_FALSE(obj2->CanBeNull());
@@ -464,16 +461,7 @@ TEST_P(InstanceOfInstructionSimplifierTestGroup, ExactClassInstanceOfOther) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
-
- LOG(INFO) << "Pre simplification " << blks;
- graph_->ClearDominanceInformation();
- graph_->BuildDominatorTree();
- InstructionSimplifier simp(graph_, /*codegen=*/nullptr);
- simp.Run();
-
- LOG(INFO) << "Post simplify " << blks;
+ PerformSimplification(blks);
if (!GetConstantResult() || GetParam() == InstanceOfKind::kSelf) {
EXPECT_INS_RETAINED(target_klass);
@@ -532,16 +520,7 @@ TEST_P(InstanceOfInstructionSimplifierTestGroup, ExactClassCheckCastOther) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
-
- LOG(INFO) << "Pre simplification " << blks;
- graph_->ClearDominanceInformation();
- graph_->BuildDominatorTree();
- InstructionSimplifier simp(graph_, /*codegen=*/nullptr);
- simp.Run();
-
- LOG(INFO) << "Post simplify " << blks;
+ PerformSimplification(blks);
if (!GetConstantResult() || GetParam() == InstanceOfKind::kSelf) {
EXPECT_INS_RETAINED(target_klass);
diff --git a/compiler/optimizing/instruction_simplifier_x86.cc b/compiler/optimizing/instruction_simplifier_x86.cc
index 2d8f94a85b..5a4345d589 100644
--- a/compiler/optimizing/instruction_simplifier_x86.cc
+++ b/compiler/optimizing/instruction_simplifier_x86.cc
@@ -17,11 +17,11 @@
#include "instruction_simplifier_x86_shared.h"
#include "code_generator_x86.h"
-namespace art {
+namespace art HIDDEN {
namespace x86 {
-class InstructionSimplifierX86Visitor : public HGraphVisitor {
+class InstructionSimplifierX86Visitor final : public HGraphVisitor {
public:
InstructionSimplifierX86Visitor(HGraph* graph,
CodeGenerator* codegen,
diff --git a/compiler/optimizing/instruction_simplifier_x86.h b/compiler/optimizing/instruction_simplifier_x86.h
index 6f10006db2..25ebe203b8 100644
--- a/compiler/optimizing/instruction_simplifier_x86.h
+++ b/compiler/optimizing/instruction_simplifier_x86.h
@@ -16,10 +16,11 @@
#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_H_
#define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_H_
+#include "base/macros.h"
#include "nodes.h"
#include "optimization.h"
-namespace art {
+namespace art HIDDEN {
class CodeGenerator;
namespace x86 {
diff --git a/compiler/optimizing/instruction_simplifier_x86_64.cc b/compiler/optimizing/instruction_simplifier_x86_64.cc
index 56c6b414d7..9ba1a8a960 100644
--- a/compiler/optimizing/instruction_simplifier_x86_64.cc
+++ b/compiler/optimizing/instruction_simplifier_x86_64.cc
@@ -17,11 +17,11 @@
#include "instruction_simplifier_x86_shared.h"
#include "code_generator_x86_64.h"
-namespace art {
+namespace art HIDDEN {
namespace x86_64 {
-class InstructionSimplifierX86_64Visitor : public HGraphVisitor {
+class InstructionSimplifierX86_64Visitor final : public HGraphVisitor {
public:
InstructionSimplifierX86_64Visitor(HGraph* graph,
CodeGenerator* codegen,
diff --git a/compiler/optimizing/instruction_simplifier_x86_64.h b/compiler/optimizing/instruction_simplifier_x86_64.h
index 6cae24d11a..1654dc4774 100644
--- a/compiler/optimizing/instruction_simplifier_x86_64.h
+++ b/compiler/optimizing/instruction_simplifier_x86_64.h
@@ -16,10 +16,11 @@
#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_64_H_
#define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_64_H_
+#include "base/macros.h"
#include "nodes.h"
#include "optimization.h"
-namespace art {
+namespace art HIDDEN {
class CodeGenerator;
diff --git a/compiler/optimizing/instruction_simplifier_x86_shared.cc b/compiler/optimizing/instruction_simplifier_x86_shared.cc
index 2805abb2bb..74c5ca2466 100644
--- a/compiler/optimizing/instruction_simplifier_x86_shared.cc
+++ b/compiler/optimizing/instruction_simplifier_x86_shared.cc
@@ -14,9 +14,10 @@
*/
#include "instruction_simplifier_x86_shared.h"
+
#include "nodes_x86.h"
-namespace art {
+namespace art HIDDEN {
bool TryCombineAndNot(HAnd* instruction) {
DataType::Type type = instruction->GetType();
diff --git a/compiler/optimizing/instruction_simplifier_x86_shared.h b/compiler/optimizing/instruction_simplifier_x86_shared.h
index 7f94d7ea4c..1a44d0fdb5 100644
--- a/compiler/optimizing/instruction_simplifier_x86_shared.h
+++ b/compiler/optimizing/instruction_simplifier_x86_shared.h
@@ -16,13 +16,16 @@
#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_SHARED_H_
#define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_SHARED_H_
+#include "base/macros.h"
#include "nodes.h"
-namespace art {
+namespace art HIDDEN {
+
bool TryCombineAndNot(HAnd* instruction);
bool TryGenerateResetLeastSetBit(HAnd* instruction);
bool TryGenerateMaskUptoLeastSetBit(HXor* instruction);
bool AreLeastSetBitInputs(HInstruction* to_test, HInstruction* other);
+
} // namespace art
#endif // ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_SHARED_H_
diff --git a/compiler/optimizing/intrinsic_objects.cc b/compiler/optimizing/intrinsic_objects.cc
index 5f6f562161..7e542117a9 100644
--- a/compiler/optimizing/intrinsic_objects.cc
+++ b/compiler/optimizing/intrinsic_objects.cc
@@ -22,7 +22,7 @@
#include "image.h"
#include "obj_ptr-inl.h"
-namespace art {
+namespace art HIDDEN {
static constexpr size_t kIntrinsicObjectsOffset =
enum_cast<size_t>(ImageHeader::kIntrinsicObjectsStart);
diff --git a/compiler/optimizing/intrinsic_objects.h b/compiler/optimizing/intrinsic_objects.h
index ed764bd4b2..d750f2934b 100644
--- a/compiler/optimizing/intrinsic_objects.h
+++ b/compiler/optimizing/intrinsic_objects.h
@@ -19,9 +19,10 @@
#include "base/bit_field.h"
#include "base/bit_utils.h"
+#include "base/macros.h"
#include "base/mutex.h"
-namespace art {
+namespace art HIDDEN {
class ClassLinker;
template <class MirrorType> class ObjPtr;
@@ -56,15 +57,15 @@ class IntrinsicObjects {
}
// Functions for retrieving data for Integer.valueOf().
- static ObjPtr<mirror::ObjectArray<mirror::Object>> LookupIntegerCache(
+ EXPORT static ObjPtr<mirror::ObjectArray<mirror::Object>> LookupIntegerCache(
Thread* self, ClassLinker* class_linker) REQUIRES_SHARED(Locks::mutator_lock_);
- static ObjPtr<mirror::ObjectArray<mirror::Object>> GetIntegerValueOfCache(
+ EXPORT static ObjPtr<mirror::ObjectArray<mirror::Object>> GetIntegerValueOfCache(
ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects)
REQUIRES_SHARED(Locks::mutator_lock_);
- static ObjPtr<mirror::Object> GetIntegerValueOfObject(
+ EXPORT static ObjPtr<mirror::Object> GetIntegerValueOfObject(
ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects,
uint32_t index) REQUIRES_SHARED(Locks::mutator_lock_);
- static MemberOffset GetIntegerValueOfArrayDataOffset(
+ EXPORT static MemberOffset GetIntegerValueOfArrayDataOffset(
ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects)
REQUIRES_SHARED(Locks::mutator_lock_);
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index f2d2b45da9..774deec438 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -32,7 +32,7 @@
#include "scoped_thread_state_change-inl.h"
#include "thread-current-inl.h"
-namespace art {
+namespace art HIDDEN {
std::ostream& operator<<(std::ostream& os, const Intrinsics& intrinsic) {
switch (intrinsic) {
@@ -171,6 +171,7 @@ void IntrinsicVisitor::ComputeIntegerValueOfLocations(HInvoke* invoke,
if (!CanReferenceBootImageObjects(invoke, compiler_options)) {
return;
}
+ HInstruction* const input = invoke->InputAt(0);
if (compiler_options.IsBootImage()) {
if (!compiler_options.IsImageClass(kIntegerCacheDescriptor) ||
!compiler_options.IsImageClass(kIntegerDescriptor)) {
@@ -207,8 +208,8 @@ void IntrinsicVisitor::ComputeIntegerValueOfLocations(HInvoke* invoke,
CHECK_EQ(value_field->GetInt(current_object), low + i);
}
}
- if (invoke->InputAt(0)->IsIntConstant()) {
- int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
+ if (input->IsIntConstant()) {
+ int32_t value = input->AsIntConstant()->GetValue();
if (static_cast<uint32_t>(value) - static_cast<uint32_t>(low) <
static_cast<uint32_t>(high - low + 1)) {
// No call, we shall use direct pointer to the Integer object.
@@ -232,8 +233,8 @@ void IntrinsicVisitor::ComputeIntegerValueOfLocations(HInvoke* invoke,
} else {
DCHECK(compiler_options.IsAotCompiler());
DCHECK(CheckIntegerCache(self, runtime->GetClassLinker(), boot_image_live_objects, cache));
- if (invoke->InputAt(0)->IsIntConstant()) {
- int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
+ if (input->IsIntConstant()) {
+ int32_t value = input->AsIntConstant()->GetValue();
// Retrieve the `value` from the lowest cached Integer.
ObjPtr<mirror::Object> low_integer =
IntrinsicObjects::GetIntegerValueOfObject(boot_image_live_objects, 0u);
@@ -255,11 +256,11 @@ void IntrinsicVisitor::ComputeIntegerValueOfLocations(HInvoke* invoke,
ArenaAllocator* allocator = codegen->GetGraph()->GetAllocator();
LocationSummary* locations = new (allocator) LocationSummary(invoke, call_kind, kIntrinsified);
if (call_kind == LocationSummary::kCallOnMainOnly) {
- locations->SetInAt(0, Location::RegisterOrConstant(invoke->InputAt(0)));
+ locations->SetInAt(0, Location::RegisterOrConstant(input));
locations->AddTemp(first_argument_location);
locations->SetOut(return_location);
} else {
- locations->SetInAt(0, Location::ConstantLocation(invoke->InputAt(0)->AsConstant()));
+ locations->SetInAt(0, Location::ConstantLocation(input));
locations->SetOut(Location::RequiresRegister());
}
}
@@ -392,7 +393,7 @@ void IntrinsicVisitor::CreateReferenceGetReferentLocations(HInvoke* invoke,
}
void IntrinsicVisitor::CreateReferenceRefersToLocations(HInvoke* invoke) {
- if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+ if (gUseReadBarrier && !kUseBakerReadBarrier) {
// Unimplemented for non-Baker read barrier.
return;
}
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index 5109882295..893cd04411 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h
@@ -17,12 +17,13 @@
#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_H_
#define ART_COMPILER_OPTIMIZING_INTRINSICS_H_
+#include "base/macros.h"
#include "code_generator.h"
#include "nodes.h"
#include "optimization.h"
#include "parallel_move_resolver.h"
-namespace art {
+namespace art HIDDEN {
class DexFile;
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 646f4f2ea7..d2dbaa32e3 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -46,7 +46,7 @@ using namespace vixl::aarch64; // NOLINT(build/namespaces)
#include "aarch64/macro-assembler-aarch64.h"
#pragma GCC diagnostic pop
-namespace art {
+namespace art HIDDEN {
namespace arm64 {
@@ -55,7 +55,6 @@ using helpers::DRegisterFrom;
using helpers::HeapOperand;
using helpers::LocationFrom;
using helpers::InputCPURegisterOrZeroRegAt;
-using helpers::IsConstantZeroBitPattern;
using helpers::OperandFrom;
using helpers::RegisterFrom;
using helpers::SRegisterFrom;
@@ -92,7 +91,7 @@ class ReadBarrierSystemArrayCopySlowPathARM64 : public SlowPathCodeARM64 {
public:
ReadBarrierSystemArrayCopySlowPathARM64(HInstruction* instruction, Location tmp)
: SlowPathCodeARM64(instruction), tmp_(tmp) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
DCHECK(kUseBakerReadBarrier);
}
@@ -711,7 +710,7 @@ static void GenUnsafeGet(HInvoke* invoke,
Location trg_loc = locations->Out();
Register trg = RegisterFrom(trg_loc, type);
- if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (type == DataType::Type::kReference && gUseReadBarrier && kUseBakerReadBarrier) {
// UnsafeGetObject/UnsafeGetObjectVolatile with Baker's read barrier case.
Register temp = WRegisterFrom(locations->GetTemp(0));
MacroAssembler* masm = codegen->GetVIXLAssembler();
@@ -754,7 +753,7 @@ static bool UnsafeGetIntrinsicOnCallList(Intrinsics intrinsic) {
}
static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- bool can_call = kEmitCompilerReadBarrier && UnsafeGetIntrinsicOnCallList(invoke->GetIntrinsic());
+ bool can_call = gUseReadBarrier && UnsafeGetIntrinsicOnCallList(invoke->GetIntrinsic());
LocationSummary* locations =
new (allocator) LocationSummary(invoke,
can_call
@@ -1096,7 +1095,7 @@ void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutLongRelease(HInvoke* invoke)
}
static void CreateUnsafeCASLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- const bool can_call = kEmitCompilerReadBarrier && IsUnsafeCASObject(invoke);
+ const bool can_call = gUseReadBarrier && IsUnsafeCASObject(invoke);
LocationSummary* locations =
new (allocator) LocationSummary(invoke,
can_call
@@ -1448,7 +1447,7 @@ static void GenUnsafeCas(HInvoke* invoke, DataType::Type type, CodeGeneratorARM6
vixl::aarch64::Label* exit_loop = &exit_loop_label;
vixl::aarch64::Label* cmp_failure = &exit_loop_label;
- if (kEmitCompilerReadBarrier && type == DataType::Type::kReference) {
+ if (gUseReadBarrier && type == DataType::Type::kReference) {
// We need to store the `old_value` in a non-scratch register to make sure
// the read barrier in the slow path does not clobber it.
old_value = WRegisterFrom(locations->GetTemp(0)); // The old value from main path.
@@ -1523,12 +1522,12 @@ void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeCompareAndSetLong(HInvoke* in
}
void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) {
// The only supported read barrier implementation is the Baker-style read barriers.
- if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+ if (gUseReadBarrier && !kUseBakerReadBarrier) {
return;
}
CreateUnsafeCASLocations(allocator_, invoke);
- if (kEmitCompilerReadBarrier) {
+ if (gUseReadBarrier) {
// We need two non-scratch temporary registers for read barrier.
LocationSummary* locations = invoke->GetLocations();
if (kUseBakerReadBarrier) {
@@ -1578,7 +1577,7 @@ void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeCompareAndSetLong(HInvoke* invok
}
void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) {
// The only supported read barrier implementation is the Baker-style read barriers.
- DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
GenUnsafeCas(invoke, DataType::Type::kReference, codegen_);
}
@@ -2576,9 +2575,9 @@ void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
__ Bind(&done);
}
-// Mirrors ARRAYCOPY_SHORT_CHAR_ARRAY_THRESHOLD in libcore, so we can choose to use the native
-// implementation there for longer copy lengths.
-static constexpr int32_t kSystemArrayCopyCharThreshold = 32;
+// This value is greater than ARRAYCOPY_SHORT_CHAR_ARRAY_THRESHOLD in libcore,
+// so if we choose to jump to the slow path we will end up in the native implementation.
+static constexpr int32_t kSystemArrayCopyCharThreshold = 192;
static void SetSystemArrayCopyLocationRequires(LocationSummary* locations,
uint32_t at,
@@ -2710,11 +2709,13 @@ static void GenSystemArrayCopyAddresses(MacroAssembler* masm,
__ Add(dst_base, dst_base, Operand(XRegisterFrom(dst_pos), LSL, element_size_shift));
}
- if (copy_length.IsConstant()) {
- int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue();
- __ Add(src_end, src_base, element_size * constant);
- } else {
- __ Add(src_end, src_base, Operand(XRegisterFrom(copy_length), LSL, element_size_shift));
+ if (src_end.IsValid()) {
+ if (copy_length.IsConstant()) {
+ int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue();
+ __ Add(src_end, src_base, element_size * constant);
+ } else {
+ __ Add(src_end, src_base, Operand(XRegisterFrom(copy_length), LSL, element_size_shift));
+ }
}
}
@@ -2745,13 +2746,14 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopyChar(HInvoke* invoke) {
if (!length.IsConstant()) {
// Merge the following two comparisons into one:
// If the length is negative, bail out (delegate to libcore's native implementation).
- // If the length > 32 then (currently) prefer libcore's native implementation.
+ // If the length > kSystemArrayCopyCharThreshold then (currently) prefer libcore's
+ // native implementation.
__ Cmp(WRegisterFrom(length), kSystemArrayCopyCharThreshold);
__ B(slow_path->GetEntryLabel(), hi);
} else {
// We have already checked in the LocationsBuilder for the constant case.
DCHECK_GE(length.GetConstant()->AsIntConstant()->GetValue(), 0);
- DCHECK_LE(length.GetConstant()->AsIntConstant()->GetValue(), 32);
+ DCHECK_LE(length.GetConstant()->AsIntConstant()->GetValue(), kSystemArrayCopyCharThreshold);
}
Register src_curr_addr = WRegisterFrom(locations->GetTemp(0));
@@ -2787,21 +2789,102 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopyChar(HInvoke* invoke) {
length,
src_curr_addr,
dst_curr_addr,
- src_stop_addr);
+ Register());
// Iterate over the arrays and do a raw copy of the chars.
const int32_t char_size = DataType::Size(DataType::Type::kUint16);
UseScratchRegisterScope temps(masm);
- Register tmp = temps.AcquireW();
- vixl::aarch64::Label loop, done;
- __ Bind(&loop);
- __ Cmp(src_curr_addr, src_stop_addr);
- __ B(&done, eq);
- __ Ldrh(tmp, MemOperand(src_curr_addr, char_size, PostIndex));
- __ Strh(tmp, MemOperand(dst_curr_addr, char_size, PostIndex));
- __ B(&loop);
- __ Bind(&done);
+ // We split processing of the array in two parts: head and tail.
+ // A first loop handles the head by copying a block of characters per
+ // iteration (see: chars_per_block).
+ // A second loop handles the tail by copying the remaining characters.
+ // If the copy length is not constant, we copy them one-by-one.
+ // If the copy length is constant, we optimize by always unrolling the tail
+ // loop, and also unrolling the head loop when the copy length is small (see:
+ // unroll_threshold).
+ //
+ // Both loops are inverted for better performance, meaning they are
+ // implemented as conditional do-while loops.
+ // Here, the loop condition is first checked to determine if there are
+ // sufficient chars to run an iteration, then we enter the do-while: an
+ // iteration is performed followed by a conditional branch only if another
+ // iteration is necessary. As opposed to a standard while-loop, this inversion
+ // can save some branching (e.g. we don't branch back to the initial condition
+ // at the end of every iteration only to potentially immediately branch
+ // again).
+ //
+ // A full block of chars is subtracted and added before and after the head
+ // loop, respectively. This ensures that any remaining length after each
+ // head loop iteration means there is a full block remaining, reducing the
+ // number of conditional checks required on every iteration.
+ constexpr int32_t chars_per_block = 4;
+ constexpr int32_t unroll_threshold = 2 * chars_per_block;
+ vixl::aarch64::Label loop1, loop2, pre_loop2, done;
+
+ Register length_tmp = src_stop_addr.W();
+ Register tmp = temps.AcquireRegisterOfSize(char_size * chars_per_block * kBitsPerByte);
+
+ auto emitHeadLoop = [&]() {
+ __ Bind(&loop1);
+ __ Ldr(tmp, MemOperand(src_curr_addr, char_size * chars_per_block, PostIndex));
+ __ Subs(length_tmp, length_tmp, chars_per_block);
+ __ Str(tmp, MemOperand(dst_curr_addr, char_size * chars_per_block, PostIndex));
+ __ B(&loop1, ge);
+ };
+
+ auto emitTailLoop = [&]() {
+ __ Bind(&loop2);
+ __ Ldrh(tmp, MemOperand(src_curr_addr, char_size, PostIndex));
+ __ Subs(length_tmp, length_tmp, 1);
+ __ Strh(tmp, MemOperand(dst_curr_addr, char_size, PostIndex));
+ __ B(&loop2, gt);
+ };
+
+ auto emitUnrolledTailLoop = [&](const int32_t tail_length) {
+ DCHECK_LT(tail_length, 4);
+
+ // Don't use post-index addressing, and instead add a constant offset later.
+ if ((tail_length & 2) != 0) {
+ __ Ldr(tmp.W(), MemOperand(src_curr_addr));
+ __ Str(tmp.W(), MemOperand(dst_curr_addr));
+ }
+ if ((tail_length & 1) != 0) {
+ const int32_t offset = (tail_length & ~1) * char_size;
+ __ Ldrh(tmp, MemOperand(src_curr_addr, offset));
+ __ Strh(tmp, MemOperand(dst_curr_addr, offset));
+ }
+ };
+
+ if (length.IsConstant()) {
+ const int32_t constant_length = length.GetConstant()->AsIntConstant()->GetValue();
+ if (constant_length >= unroll_threshold) {
+ __ Mov(length_tmp, constant_length - chars_per_block);
+ emitHeadLoop();
+ } else {
+ static_assert(unroll_threshold == 8, "The unroll_threshold must be 8.");
+ // Fully unroll both the head and tail loops.
+ if ((constant_length & 4) != 0) {
+ __ Ldr(tmp, MemOperand(src_curr_addr, 4 * char_size, PostIndex));
+ __ Str(tmp, MemOperand(dst_curr_addr, 4 * char_size, PostIndex));
+ }
+ }
+ emitUnrolledTailLoop(constant_length % chars_per_block);
+ } else {
+ Register length_reg = WRegisterFrom(length);
+ __ Subs(length_tmp, length_reg, chars_per_block);
+ __ B(&pre_loop2, lt);
+
+ emitHeadLoop();
+
+ __ Bind(&pre_loop2);
+ __ Adds(length_tmp, length_tmp, chars_per_block);
+ __ B(&done, eq);
+
+ emitTailLoop();
+ }
+
+ __ Bind(&done);
__ Bind(slow_path->GetExitLabel());
}
@@ -2814,7 +2897,7 @@ static constexpr int32_t kSystemArrayCopyThreshold = 128;
void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) {
// The only read barrier implementation supporting the
// SystemArrayCopy intrinsic is the Baker-style read barriers.
- if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+ if (gUseReadBarrier && !kUseBakerReadBarrier) {
return;
}
@@ -2866,7 +2949,7 @@ void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) {
locations->AddTemp(Location::RequiresRegister());
locations->AddTemp(Location::RequiresRegister());
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
// Temporary register IP0, obtained from the VIXL scratch register
// pool, cannot be used in ReadBarrierSystemArrayCopySlowPathARM64
// (because that register is clobbered by ReadBarrierMarkRegX
@@ -2884,7 +2967,7 @@ void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) {
void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
// The only read barrier implementation supporting the
// SystemArrayCopy intrinsic is the Baker-style read barriers.
- DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
MacroAssembler* masm = GetVIXLAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -2991,7 +3074,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
UseScratchRegisterScope temps(masm);
Location temp3_loc; // Used only for Baker read barrier.
Register temp3;
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
temp3_loc = locations->GetTemp(2);
temp3 = WRegisterFrom(temp3_loc);
} else {
@@ -3004,7 +3087,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
// or the destination is Object[]. If none of these checks succeed, we go to the
// slow path.
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
if (!optimizations.GetSourceIsNonPrimitiveArray()) {
// /* HeapReference<Class> */ temp1 = src->klass_
codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
@@ -3165,7 +3248,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
} else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
// Bail out if the source is not a non primitive array.
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
// /* HeapReference<Class> */ temp1 = src->klass_
codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
temp1_loc,
@@ -3215,7 +3298,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
__ Cbz(WRegisterFrom(length), &done);
}
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
// TODO: Also convert this intrinsic to the IsGcMarking strategy?
// SystemArrayCopy implementation for Baker read barriers (see
@@ -3335,7 +3418,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
}
// We only need one card marking on the destination array.
- codegen_->MarkGCCard(dest.W(), Register(), /* value_can_be_null= */ false);
+ codegen_->MarkGCCard(dest.W(), Register(), /* emit_null_check= */ false);
__ Bind(intrinsic_slow_path->GetExitLabel());
}
@@ -3451,7 +3534,7 @@ void IntrinsicCodeGeneratorARM64::VisitIntegerValueOf(HInvoke* invoke) {
void IntrinsicLocationsBuilderARM64::VisitReferenceGetReferent(HInvoke* invoke) {
IntrinsicVisitor::CreateReferenceGetReferentLocations(invoke, codegen_);
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && invoke->GetLocations() != nullptr) {
+ if (gUseReadBarrier && kUseBakerReadBarrier && invoke->GetLocations() != nullptr) {
invoke->GetLocations()->AddTemp(Location::RequiresRegister());
}
}
@@ -3466,7 +3549,7 @@ void IntrinsicCodeGeneratorARM64::VisitReferenceGetReferent(HInvoke* invoke) {
SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
codegen_->AddSlowPath(slow_path);
- if (kEmitCompilerReadBarrier) {
+ if (gUseReadBarrier) {
// Check self->GetWeakRefAccessEnabled().
UseScratchRegisterScope temps(masm);
Register temp = temps.AcquireW();
@@ -3493,7 +3576,7 @@ void IntrinsicCodeGeneratorARM64::VisitReferenceGetReferent(HInvoke* invoke) {
// Load the value from the field.
uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
out,
WRegisterFrom(obj),
@@ -3533,7 +3616,7 @@ void IntrinsicCodeGeneratorARM64::VisitReferenceRefersTo(HInvoke* invoke) {
__ Cmp(tmp, other);
- if (kEmitCompilerReadBarrier) {
+ if (gUseReadBarrier) {
DCHECK(kUseBakerReadBarrier);
vixl::aarch64::Label calculate_result;
@@ -4629,7 +4712,7 @@ static void GenerateVarHandleTarget(HInvoke* invoke,
method.X(),
ArtField::DeclaringClassOffset().Int32Value(),
/*fixup_label=*/ nullptr,
- kCompilerReadBarrierOption);
+ gCompilerReadBarrierOption);
}
}
} else {
@@ -4673,8 +4756,8 @@ static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke) {
uint32_t number_of_arguments = invoke->GetNumberOfArguments();
for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
HInstruction* arg = invoke->InputAt(arg_index);
- if (IsConstantZeroBitPattern(arg)) {
- locations->SetInAt(arg_index, Location::ConstantLocation(arg->AsConstant()));
+ if (IsZeroBitPattern(arg)) {
+ locations->SetInAt(arg_index, Location::ConstantLocation(arg));
} else if (DataType::IsFloatingPointType(arg->GetType())) {
locations->SetInAt(arg_index, Location::RequiresFpuRegister());
} else {
@@ -4683,7 +4766,7 @@ static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke) {
}
// Add a temporary for offset.
- if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
+ if ((gUseReadBarrier && !kUseBakerReadBarrier) &&
GetExpectedVarHandleCoordinatesCount(invoke) == 0u) { // For static fields.
// To preserve the offset value across the non-Baker read barrier slow path
// for loading the declaring class, use a fixed callee-save register.
@@ -4706,7 +4789,7 @@ static void CreateVarHandleGetLocations(HInvoke* invoke) {
return;
}
- if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
+ if ((gUseReadBarrier && !kUseBakerReadBarrier) &&
invoke->GetType() == DataType::Type::kReference &&
invoke->GetIntrinsic() != Intrinsics::kVarHandleGet &&
invoke->GetIntrinsic() != Intrinsics::kVarHandleGetOpaque) {
@@ -4746,7 +4829,7 @@ static void GenerateVarHandleGet(HInvoke* invoke,
DCHECK(use_load_acquire || order == std::memory_order_relaxed);
// Load the value from the target location.
- if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (type == DataType::Type::kReference && gUseReadBarrier && kUseBakerReadBarrier) {
// Piggy-back on the field load path using introspection for the Baker read barrier.
// The `target.offset` is a temporary, use it for field address.
Register tmp_ptr = target.offset.X();
@@ -4898,7 +4981,7 @@ static void GenerateVarHandleSet(HInvoke* invoke,
}
if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(value_index))) {
- codegen->MarkGCCard(target.object, Register(value), /*value_can_be_null=*/ true);
+ codegen->MarkGCCard(target.object, Register(value), /* emit_null_check= */ true);
}
if (slow_path != nullptr) {
@@ -4947,7 +5030,7 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, boo
uint32_t number_of_arguments = invoke->GetNumberOfArguments();
DataType::Type value_type = GetDataTypeFromShorty(invoke, number_of_arguments - 1u);
- if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
+ if ((gUseReadBarrier && !kUseBakerReadBarrier) &&
value_type == DataType::Type::kReference) {
// Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
// the passed reference and reloads it from the field. This breaks the read barriers
@@ -4961,7 +5044,7 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, boo
LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
- if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+ if (gUseReadBarrier && !kUseBakerReadBarrier) {
// We need callee-save registers for both the class object and offset instead of
// the temporaries reserved in CreateVarHandleCommonLocations().
static_assert(POPCOUNT(kArm64CalleeSaveRefSpills) >= 2u);
@@ -4985,16 +5068,16 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, boo
// Add a temporary for old value and exclusive store result if floating point
// `expected` and/or `new_value` take scratch registers.
size_t available_scratch_registers =
- (IsConstantZeroBitPattern(invoke->InputAt(number_of_arguments - 1u)) ? 1u : 0u) +
- (IsConstantZeroBitPattern(invoke->InputAt(number_of_arguments - 2u)) ? 1u : 0u);
+ (IsZeroBitPattern(invoke->InputAt(number_of_arguments - 1u)) ? 1u : 0u) +
+ (IsZeroBitPattern(invoke->InputAt(number_of_arguments - 2u)) ? 1u : 0u);
size_t temps_needed = /* pointer, old value, store result */ 3u - available_scratch_registers;
// We can reuse the declaring class (if present) and offset temporary.
if (temps_needed > old_temp_count) {
locations->AddRegisterTemps(temps_needed - old_temp_count);
}
} else if ((value_type != DataType::Type::kReference && DataType::Size(value_type) != 1u) &&
- !IsConstantZeroBitPattern(invoke->InputAt(number_of_arguments - 2u)) &&
- !IsConstantZeroBitPattern(invoke->InputAt(number_of_arguments - 1u)) &&
+ !IsZeroBitPattern(invoke->InputAt(number_of_arguments - 2u)) &&
+ !IsZeroBitPattern(invoke->InputAt(number_of_arguments - 1u)) &&
GetExpectedVarHandleCoordinatesCount(invoke) == 2u) {
// Allocate a normal temporary for store result in the non-native byte order path
// because scratch registers are used by the byte-swapped `expected` and `new_value`.
@@ -5002,7 +5085,7 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, boo
locations->AddTemp(Location::RequiresRegister());
}
}
- if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) {
+ if (gUseReadBarrier && value_type == DataType::Type::kReference) {
// Add a temporary for the `old_value_temp` in slow path.
locations->AddTemp(Location::RequiresRegister());
}
@@ -5068,7 +5151,7 @@ static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
// except for references that need the offset for the read barrier.
UseScratchRegisterScope temps(masm);
Register tmp_ptr = target.offset.X();
- if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) {
+ if (gUseReadBarrier && value_type == DataType::Type::kReference) {
tmp_ptr = temps.AcquireX();
}
__ Add(tmp_ptr, target.object.X(), target.offset.X());
@@ -5151,7 +5234,7 @@ static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
vixl::aarch64::Label* exit_loop = &exit_loop_label;
vixl::aarch64::Label* cmp_failure = &exit_loop_label;
- if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) {
+ if (gUseReadBarrier && value_type == DataType::Type::kReference) {
// The `old_value_temp` is used first for the marked `old_value` and then for the unmarked
// reloaded old value for subsequent CAS in the slow path. It cannot be a scratch register.
size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
@@ -5296,7 +5379,7 @@ static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke,
return;
}
- if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
+ if ((gUseReadBarrier && !kUseBakerReadBarrier) &&
invoke->GetType() == DataType::Type::kReference) {
// Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
// the passed reference and reloads it from the field, thus seeing the new value
@@ -5316,7 +5399,7 @@ static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke,
DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
// We can reuse the declaring class temporary if present.
if (old_temp_count == 1u &&
- !IsConstantZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) {
+ !IsZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) {
// Add a temporary for `old_value` if floating point `new_value` takes a scratch register.
locations->AddTemp(Location::RequiresRegister());
}
@@ -5327,7 +5410,7 @@ static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke,
if (old_temp_count == 1u &&
(get_and_update_op != GetAndUpdateOp::kSet && get_and_update_op != GetAndUpdateOp::kAdd) &&
GetExpectedVarHandleCoordinatesCount(invoke) == 2u &&
- !IsConstantZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) {
+ !IsZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) {
DataType::Type value_type =
GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
if (value_type != DataType::Type::kReference && DataType::Size(value_type) != 1u) {
@@ -5372,7 +5455,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
// except for references that need the offset for the non-Baker read barrier.
UseScratchRegisterScope temps(masm);
Register tmp_ptr = target.offset.X();
- if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
+ if ((gUseReadBarrier && !kUseBakerReadBarrier) &&
value_type == DataType::Type::kReference) {
tmp_ptr = temps.AcquireX();
}
@@ -5402,7 +5485,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
// the new value unless it is zero bit pattern (+0.0f or +0.0) and need another one
// in GenerateGetAndUpdate(). We have allocated a normal temporary to handle that.
old_value = CPURegisterFrom(locations->GetTemp(1u), load_store_type);
- } else if ((kEmitCompilerReadBarrier && kUseBakerReadBarrier) &&
+ } else if ((gUseReadBarrier && kUseBakerReadBarrier) &&
value_type == DataType::Type::kReference) {
// Load the old value initially to a scratch register.
// We shall move it to `out` later with a read barrier.
@@ -5450,7 +5533,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
__ Sxtb(out.W(), old_value.W());
} else if (value_type == DataType::Type::kInt16) {
__ Sxth(out.W(), old_value.W());
- } else if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) {
+ } else if (gUseReadBarrier && value_type == DataType::Type::kReference) {
if (kUseBakerReadBarrier) {
codegen->GenerateIntrinsicCasMoveWithBakerReadBarrier(out.W(), old_value.W());
} else {
@@ -5647,7 +5730,7 @@ void VarHandleSlowPathARM64::EmitByteArrayViewCode(CodeGenerator* codegen_in) {
// Byte order check. For native byte order return to the main path.
if (access_mode_template == mirror::VarHandle::AccessModeTemplate::kSet &&
- IsConstantZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) {
+ IsZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) {
// There is no reason to differentiate between native byte order and byte-swap
// for setting a zero bit pattern. Just return to the main path.
__ B(GetNativeByteOrderLabel());
@@ -5677,42 +5760,9 @@ void VarHandleSlowPathARM64::EmitByteArrayViewCode(CodeGenerator* codegen_in) {
__ B(GetExitLabel());
}
-UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOf);
-UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOfAfter);
-UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferAppend);
-UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferLength);
-UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferToString);
-UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendObject);
-UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendString);
-UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendCharSequence);
-UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendCharArray);
-UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendBoolean);
-UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendChar);
-UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendInt);
-UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendLong);
-UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendFloat);
-UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendDouble);
-UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderLength);
-UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderToString);
-UNIMPLEMENTED_INTRINSIC(ARM64, SystemArrayCopyByte);
-UNIMPLEMENTED_INTRINSIC(ARM64, SystemArrayCopyInt);
-
-// 1.8.
-UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddInt)
-UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddLong)
-UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetInt)
-UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetLong)
-UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetObject)
-
-UNIMPLEMENTED_INTRINSIC(ARM64, MethodHandleInvokeExact)
-UNIMPLEMENTED_INTRINSIC(ARM64, MethodHandleInvoke)
-
-// OpenJDK 11
-UNIMPLEMENTED_INTRINSIC(ARM64, JdkUnsafeGetAndAddInt)
-UNIMPLEMENTED_INTRINSIC(ARM64, JdkUnsafeGetAndAddLong)
-UNIMPLEMENTED_INTRINSIC(ARM64, JdkUnsafeGetAndSetInt)
-UNIMPLEMENTED_INTRINSIC(ARM64, JdkUnsafeGetAndSetLong)
-UNIMPLEMENTED_INTRINSIC(ARM64, JdkUnsafeGetAndSetObject)
+#define MARK_UNIMPLEMENTED(Name) UNIMPLEMENTED_INTRINSIC(ARM64, Name)
+UNIMPLEMENTED_INTRINSIC_LIST_ARM64(MARK_UNIMPLEMENTED);
+#undef MARK_UNIMPLEMENTED
UNREACHABLE_INTRINSICS(ARM64)
diff --git a/compiler/optimizing/intrinsics_arm64.h b/compiler/optimizing/intrinsics_arm64.h
index 9c46efddec..a0ccf87f7b 100644
--- a/compiler/optimizing/intrinsics_arm64.h
+++ b/compiler/optimizing/intrinsics_arm64.h
@@ -17,6 +17,7 @@
#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_ARM64_H_
#define ART_COMPILER_OPTIMIZING_INTRINSICS_ARM64_H_
+#include "base/macros.h"
#include "intrinsics.h"
namespace vixl {
@@ -27,7 +28,7 @@ class MacroAssembler;
} // namespace aarch64
} // namespace vixl
-namespace art {
+namespace art HIDDEN {
class ArenaAllocator;
class HInvokeStaticOrDirect;
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index d850cadc2b..266b5bc799 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -34,7 +34,7 @@
#include "aarch32/constants-aarch32.h"
-namespace art {
+namespace art HIDDEN {
namespace arm {
#define __ assembler->GetVIXLAssembler()->
@@ -120,7 +120,7 @@ class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL {
public:
explicit ReadBarrierSystemArrayCopySlowPathARMVIXL(HInstruction* instruction)
: SlowPathCodeARMVIXL(instruction) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
DCHECK(kUseBakerReadBarrier);
}
@@ -1242,7 +1242,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromString(HInvoke* invo
void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
// The only read barrier implementation supporting the
// SystemArrayCopy intrinsic is the Baker-style read barriers.
- if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+ if (gUseReadBarrier && !kUseBakerReadBarrier) {
return;
}
@@ -1265,7 +1265,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
if (length != nullptr && !assembler_->ShifterOperandCanAlwaysHold(length->GetValue())) {
locations->SetInAt(4, Location::RequiresRegister());
}
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
// Temporary register IP cannot be used in
// ReadBarrierSystemArrayCopySlowPathARM (because that register
// is clobbered by ReadBarrierMarkRegX entry points). Get an extra
@@ -1339,7 +1339,7 @@ static void CheckPosition(ArmVIXLAssembler* assembler,
void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
// The only read barrier implementation supporting the
// SystemArrayCopy intrinsic is the Baker-style read barriers.
- DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
ArmVIXLAssembler* assembler = GetAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -1453,7 +1453,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
// or the destination is Object[]. If none of these checks succeed, we go to the
// slow path.
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
if (!optimizations.GetSourceIsNonPrimitiveArray()) {
// /* HeapReference<Class> */ temp1 = src->klass_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
@@ -1584,7 +1584,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
} else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
// Bail out if the source is not a non primitive array.
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
// /* HeapReference<Class> */ temp1 = src->klass_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check= */ false);
@@ -1621,7 +1621,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
__ CompareAndBranchIfZero(RegisterFrom(length), &done, /* is_far_target= */ false);
}
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
// TODO: Also convert this intrinsic to the IsGcMarking strategy?
// SystemArrayCopy implementation for Baker read barriers (see
@@ -1723,7 +1723,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
}
// We only need one card marking on the destination array.
- codegen_->MarkGCCard(temp1, temp2, dest, NoReg, /* value_can_be_null= */ false);
+ codegen_->MarkGCCard(temp1, temp2, dest, NoReg, /* emit_null_check= */ false);
__ Bind(intrinsic_slow_path->GetExitLabel());
}
@@ -2511,7 +2511,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) {
SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
codegen_->AddSlowPath(slow_path);
- if (kEmitCompilerReadBarrier) {
+ if (gUseReadBarrier) {
// Check self->GetWeakRefAccessEnabled().
UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
vixl32::Register temp = temps.Acquire();
@@ -2539,7 +2539,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) {
// Load the value from the field.
uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
out,
RegisterFrom(obj),
@@ -2587,7 +2587,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitReferenceRefersTo(HInvoke* invoke) {
assembler->MaybeUnpoisonHeapReference(tmp);
codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); // `referent` is volatile.
- if (kEmitCompilerReadBarrier) {
+ if (gUseReadBarrier) {
DCHECK(kUseBakerReadBarrier);
vixl32::Label calculate_result;
@@ -2613,7 +2613,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitReferenceRefersTo(HInvoke* invoke) {
__ Bind(&calculate_result);
} else {
- DCHECK(!kEmitCompilerReadBarrier);
+ DCHECK(!gUseReadBarrier);
__ Sub(out, tmp, other);
}
@@ -2732,7 +2732,7 @@ static void GenerateIntrinsicGet(HInvoke* invoke,
}
break;
case DataType::Type::kReference:
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
// Piggy-back on the field load path using introspection for the Baker read barrier.
vixl32::Register temp = RegisterFrom(maybe_temp);
__ Add(temp, base, offset);
@@ -2777,7 +2777,7 @@ static void GenerateIntrinsicGet(HInvoke* invoke,
codegen->GenerateMemoryBarrier(
seq_cst_barrier ? MemBarrierKind::kAnyAny : MemBarrierKind::kLoadAny);
}
- if (type == DataType::Type::kReference && !(kEmitCompilerReadBarrier && kUseBakerReadBarrier)) {
+ if (type == DataType::Type::kReference && !(gUseReadBarrier && kUseBakerReadBarrier)) {
Location base_loc = LocationFrom(base);
Location index_loc = LocationFrom(offset);
codegen->MaybeGenerateReadBarrierSlow(invoke, out, out, base_loc, /* offset=*/ 0u, index_loc);
@@ -2802,7 +2802,7 @@ static void CreateUnsafeGetLocations(HInvoke* invoke,
CodeGeneratorARMVIXL* codegen,
DataType::Type type,
bool atomic) {
- bool can_call = kEmitCompilerReadBarrier && UnsafeGetIntrinsicOnCallList(invoke->GetIntrinsic());
+ bool can_call = gUseReadBarrier && UnsafeGetIntrinsicOnCallList(invoke->GetIntrinsic());
ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
LocationSummary* locations =
new (allocator) LocationSummary(invoke,
@@ -2818,7 +2818,7 @@ static void CreateUnsafeGetLocations(HInvoke* invoke,
locations->SetInAt(2, Location::RequiresRegister());
locations->SetOut(Location::RequiresRegister(),
(can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
- if ((kEmitCompilerReadBarrier && kUseBakerReadBarrier && type == DataType::Type::kReference) ||
+ if ((gUseReadBarrier && kUseBakerReadBarrier && type == DataType::Type::kReference) ||
(type == DataType::Type::kInt64 && Use64BitExclusiveLoadStore(atomic, codegen))) {
// We need a temporary register for the read barrier marking slow
// path in CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier,
@@ -2837,7 +2837,7 @@ static void GenUnsafeGet(HInvoke* invoke,
vixl32::Register offset = LowRegisterFrom(locations->InAt(2)); // Long offset, lo part only.
Location out = locations->Out();
Location maybe_temp = Location::NoLocation();
- if ((kEmitCompilerReadBarrier && kUseBakerReadBarrier && type == DataType::Type::kReference) ||
+ if ((gUseReadBarrier && kUseBakerReadBarrier && type == DataType::Type::kReference) ||
(type == DataType::Type::kInt64 && Use64BitExclusiveLoadStore(atomic, codegen))) {
maybe_temp = locations->GetTemp(0);
}
@@ -3470,7 +3470,7 @@ static void GenerateCompareAndSet(CodeGeneratorARMVIXL* codegen,
// branch goes to the read barrier slow path that clobbers `success` anyway.
bool init_failure_for_cmp =
success.IsValid() &&
- !(kEmitCompilerReadBarrier && type == DataType::Type::kReference && expected.IsRegister());
+ !(gUseReadBarrier && type == DataType::Type::kReference && expected.IsRegister());
// Instruction scheduling: Loading a constant between LDREX* and using the loaded value
// is essentially free, so prepare the failure value here if we can.
bool init_failure_for_cmp_early =
@@ -3655,7 +3655,7 @@ class ReadBarrierCasSlowPathARMVIXL : public SlowPathCodeARMVIXL {
};
static void CreateUnsafeCASLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- const bool can_call = kEmitCompilerReadBarrier && IsUnsafeCASObject(invoke);
+ const bool can_call = gUseReadBarrier && IsUnsafeCASObject(invoke);
LocationSummary* locations =
new (allocator) LocationSummary(invoke,
can_call
@@ -3706,7 +3706,7 @@ static void GenUnsafeCas(HInvoke* invoke, DataType::Type type, CodeGeneratorARMV
vixl32::Label* exit_loop = &exit_loop_label;
vixl32::Label* cmp_failure = &exit_loop_label;
- if (kEmitCompilerReadBarrier && type == DataType::Type::kReference) {
+ if (gUseReadBarrier && type == DataType::Type::kReference) {
// If marking, check if the stored reference is a from-space reference to the same
// object as the to-space reference `expected`. If so, perform a custom CAS loop.
ReadBarrierCasSlowPathARMVIXL* slow_path =
@@ -3770,7 +3770,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeCompareAndSetInt(HInvoke* i
}
void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) {
// The only supported read barrier implementation is the Baker-style read barriers (b/173104084).
- if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+ if (gUseReadBarrier && !kUseBakerReadBarrier) {
return;
}
@@ -3798,7 +3798,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeCompareAndSetInt(HInvoke* invo
}
void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) {
// The only supported read barrier implementation is the Baker-style read barriers (b/173104084).
- DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
GenUnsafeCas(invoke, DataType::Type::kReference, codegen_);
}
@@ -4351,7 +4351,7 @@ static void GenerateVarHandleTarget(HInvoke* invoke,
LocationFrom(target.object),
method,
ArtField::DeclaringClassOffset().Int32Value(),
- kCompilerReadBarrierOption);
+ gCompilerReadBarrierOption);
}
}
} else {
@@ -4403,7 +4403,7 @@ static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke) {
}
// Add a temporary for offset.
- if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
+ if ((gUseReadBarrier && !kUseBakerReadBarrier) &&
GetExpectedVarHandleCoordinatesCount(invoke) == 0u) { // For static fields.
// To preserve the offset value across the non-Baker read barrier slow path
// for loading the declaring class, use a fixed callee-save register.
@@ -4428,7 +4428,7 @@ static void CreateVarHandleGetLocations(HInvoke* invoke,
return;
}
- if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
+ if ((gUseReadBarrier && !kUseBakerReadBarrier) &&
invoke->GetType() == DataType::Type::kReference &&
invoke->GetIntrinsic() != Intrinsics::kVarHandleGet &&
invoke->GetIntrinsic() != Intrinsics::kVarHandleGetOpaque) {
@@ -4476,7 +4476,7 @@ static void GenerateVarHandleGet(HInvoke* invoke,
Location maybe_temp = Location::NoLocation();
Location maybe_temp2 = Location::NoLocation();
Location maybe_temp3 = Location::NoLocation();
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && type == DataType::Type::kReference) {
+ if (gUseReadBarrier && kUseBakerReadBarrier && type == DataType::Type::kReference) {
// Reuse the offset temporary.
maybe_temp = LocationFrom(target.offset);
} else if (DataType::Is64BitType(type) && Use64BitExclusiveLoadStore(atomic, codegen)) {
@@ -4590,7 +4590,7 @@ static void CreateVarHandleSetLocations(HInvoke* invoke,
HInstruction* arg = invoke->InputAt(number_of_arguments - 1u);
bool has_reverse_bytes_slow_path =
(expected_coordinates_count == 2u) &&
- !(arg->IsConstant() && arg->AsConstant()->IsZeroBitPattern());
+ !IsZeroBitPattern(arg);
if (Use64BitExclusiveLoadStore(atomic, codegen)) {
// We need 4 temporaries in the byte array view slow path. Otherwise, we need
// 2 or 3 temporaries for GenerateIntrinsicSet() depending on the value type.
@@ -4699,7 +4699,7 @@ static void GenerateVarHandleSet(HInvoke* invoke,
vixl32::Register temp = target.offset;
vixl32::Register card = temps.Acquire();
vixl32::Register value_reg = RegisterFrom(value);
- codegen->MarkGCCard(temp, card, target.object, value_reg, /*value_can_be_null=*/ true);
+ codegen->MarkGCCard(temp, card, target.object, value_reg, /* emit_null_check= */ true);
}
if (slow_path != nullptr) {
@@ -4749,7 +4749,7 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, boo
uint32_t number_of_arguments = invoke->GetNumberOfArguments();
DataType::Type value_type = GetDataTypeFromShorty(invoke, number_of_arguments - 1u);
- if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
+ if ((gUseReadBarrier && !kUseBakerReadBarrier) &&
value_type == DataType::Type::kReference) {
// Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
// the passed reference and reloads it from the field. This breaks the read barriers
@@ -4763,7 +4763,7 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, boo
LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
- if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+ if (gUseReadBarrier && !kUseBakerReadBarrier) {
// We need callee-save registers for both the class object and offset instead of
// the temporaries reserved in CreateVarHandleCommonLocations().
static_assert(POPCOUNT(kArmCalleeSaveRefSpills) >= 2u);
@@ -4799,7 +4799,7 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, boo
locations->AddRegisterTemps(2u);
}
}
- if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) {
+ if (gUseReadBarrier && value_type == DataType::Type::kReference) {
// Add a temporary for store result, also used for the `old_value_temp` in slow path.
locations->AddTemp(Location::RequiresRegister());
}
@@ -4930,7 +4930,7 @@ static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
vixl32::Label* exit_loop = &exit_loop_label;
vixl32::Label* cmp_failure = &exit_loop_label;
- if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) {
+ if (gUseReadBarrier && value_type == DataType::Type::kReference) {
// The `old_value_temp` is used first for the marked `old_value` and then for the unmarked
// reloaded old value for subsequent CAS in the slow path. This must not clobber `old_value`.
vixl32::Register old_value_temp = return_success ? RegisterFrom(out) : store_result;
@@ -5086,7 +5086,7 @@ static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke,
return;
}
- if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
+ if ((gUseReadBarrier && !kUseBakerReadBarrier) &&
invoke->GetType() == DataType::Type::kReference) {
// Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
// the passed reference and reloads it from the field, thus seeing the new value
@@ -5107,7 +5107,7 @@ static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke,
// Add temps needed to do the GenerateGetAndUpdate() with core registers.
size_t temps_needed = (value_type == DataType::Type::kFloat64) ? 5u : 3u;
locations->AddRegisterTemps(temps_needed - locations->GetTempCount());
- } else if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
+ } else if ((gUseReadBarrier && !kUseBakerReadBarrier) &&
value_type == DataType::Type::kReference) {
// We need to preserve the declaring class (if present) and offset for read barrier
// slow paths, so we must use a separate temporary for the exclusive store result.
@@ -5213,7 +5213,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
if (byte_swap) {
GenerateReverseBytes(assembler, DataType::Type::kInt32, arg, arg);
}
- } else if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) {
+ } else if (gUseReadBarrier && value_type == DataType::Type::kReference) {
if (kUseBakerReadBarrier) {
// Load the old value initially to a temporary register.
// We shall move it to `out` later with a read barrier.
@@ -5296,7 +5296,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
} else {
__ Vmov(SRegisterFrom(out), RegisterFrom(old_value));
}
- } else if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) {
+ } else if (gUseReadBarrier && value_type == DataType::Type::kReference) {
if (kUseBakerReadBarrier) {
codegen->GenerateIntrinsicCasMoveWithBakerReadBarrier(RegisterFrom(out),
RegisterFrom(old_value));
@@ -5517,7 +5517,7 @@ void VarHandleSlowPathARMVIXL::EmitByteArrayViewCode(CodeGenerator* codegen_in)
// Byte order check. For native byte order return to the main path.
if (access_mode_template == mirror::VarHandle::AccessModeTemplate::kSet) {
HInstruction* arg = invoke->InputAt(invoke->GetNumberOfArguments() - 1u);
- if (arg->IsConstant() && arg->AsConstant()->IsZeroBitPattern()) {
+ if (IsZeroBitPattern(arg)) {
// There is no reason to differentiate between native byte order and byte-swap
// for setting a zero bit pattern. Just return to the main path.
__ B(GetNativeByteOrderLabel());
@@ -5549,69 +5549,9 @@ void VarHandleSlowPathARMVIXL::EmitByteArrayViewCode(CodeGenerator* codegen_in)
__ B(GetExitLabel());
}
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble) // Could be done by changing rounding mode, maybe?
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong) // High register pressure.
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, LongDivideUnsigned)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32Update)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32UpdateBytes)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32UpdateByteBuffer)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16ToFloat)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16ToHalf)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Floor)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Ceil)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Rint)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Greater)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16GreaterEquals)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Less)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16LessEquals)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Compare)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Min)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Max)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMultiplyHigh)
-
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOf);
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOfAfter);
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferAppend);
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferLength);
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferToString);
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendObject);
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendString);
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendCharSequence);
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendCharArray);
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendBoolean);
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendChar);
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendInt);
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendLong);
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendFloat);
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendDouble);
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderLength);
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderToString);
-
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyByte);
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyInt);
-
-// 1.8.
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathFmaDouble)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathFmaFloat)
-
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddInt)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddLong)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetInt)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetLong)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetObject)
-
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, MethodHandleInvokeExact)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, MethodHandleInvoke)
-
-// OpenJDK 11
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, JdkUnsafeCASLong) // High register pressure.
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, JdkUnsafeGetAndAddInt)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, JdkUnsafeGetAndAddLong)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, JdkUnsafeGetAndSetInt)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, JdkUnsafeGetAndSetLong)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, JdkUnsafeGetAndSetObject)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, JdkUnsafeCompareAndSetLong)
+#define MARK_UNIMPLEMENTED(Name) UNIMPLEMENTED_INTRINSIC(ARMVIXL, Name)
+UNIMPLEMENTED_INTRINSIC_LIST_ARM(MARK_UNIMPLEMENTED);
+#undef MARK_UNIMPLEMENTED
UNREACHABLE_INTRINSICS(ARMVIXL)
diff --git a/compiler/optimizing/intrinsics_arm_vixl.h b/compiler/optimizing/intrinsics_arm_vixl.h
index 3103cec8f0..54475bcc7e 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.h
+++ b/compiler/optimizing/intrinsics_arm_vixl.h
@@ -17,10 +17,11 @@
#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_ARM_VIXL_H_
#define ART_COMPILER_OPTIMIZING_INTRINSICS_ARM_VIXL_H_
+#include "base/macros.h"
#include "intrinsics.h"
#include "utils/arm/assembler_arm_vixl.h"
-namespace art {
+namespace art HIDDEN {
namespace arm {
diff --git a/compiler/optimizing/intrinsics_utils.h b/compiler/optimizing/intrinsics_utils.h
index 19f5e332a8..13cabdafed 100644
--- a/compiler/optimizing/intrinsics_utils.h
+++ b/compiler/optimizing/intrinsics_utils.h
@@ -29,7 +29,7 @@
#include "utils/assembler.h"
#include "utils/label.h"
-namespace art {
+namespace art HIDDEN {
// Default slow-path for fallback (calling the managed code to handle the intrinsic) in an
// intrinsified call. This will copy the arguments into the positions for a regular call.
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 7d90aae984..d2072201f8 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -38,7 +38,7 @@
#include "utils/x86/assembler_x86.h"
#include "utils/x86/constants_x86.h"
-namespace art {
+namespace art HIDDEN {
namespace x86 {
@@ -75,7 +75,7 @@ class ReadBarrierSystemArrayCopySlowPathX86 : public SlowPathCode {
public:
explicit ReadBarrierSystemArrayCopySlowPathX86(HInstruction* instruction)
: SlowPathCode(instruction) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
DCHECK(kUseBakerReadBarrier);
}
@@ -1699,7 +1699,7 @@ static void GenUnsafeGet(HInvoke* invoke,
case DataType::Type::kReference: {
Register output = output_loc.AsRegister<Register>();
- if (kEmitCompilerReadBarrier) {
+ if (gUseReadBarrier) {
if (kUseBakerReadBarrier) {
Address src(base, offset, ScaleFactor::TIMES_1, 0);
codegen->GenerateReferenceLoadWithBakerReadBarrier(
@@ -1757,7 +1757,7 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator,
HInvoke* invoke,
DataType::Type type,
bool is_volatile) {
- bool can_call = kEmitCompilerReadBarrier && UnsafeGetIntrinsicOnCallList(invoke->GetIntrinsic());
+ bool can_call = gUseReadBarrier && UnsafeGetIntrinsicOnCallList(invoke->GetIntrinsic());
LocationSummary* locations =
new (allocator) LocationSummary(invoke,
can_call
@@ -2103,7 +2103,7 @@ void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
static void CreateIntIntIntIntIntToInt(ArenaAllocator* allocator,
DataType::Type type,
HInvoke* invoke) {
- const bool can_call = kEmitCompilerReadBarrier &&
+ const bool can_call = gUseReadBarrier &&
kUseBakerReadBarrier &&
IsUnsafeCASObject(invoke);
LocationSummary* locations =
@@ -2175,7 +2175,7 @@ void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCompareAndSetLong(HInvoke* invo
void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) {
// The only supported read barrier implementation is the Baker-style read barriers.
- if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+ if (gUseReadBarrier && !kUseBakerReadBarrier) {
return;
}
@@ -2304,7 +2304,7 @@ static void GenReferenceCAS(HInvoke* invoke,
DCHECK_EQ(expected, EAX);
DCHECK_NE(temp, temp2);
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
// Need to make sure the reference stored in the field is a to-space
// one before attempting the CAS or the CAS could fail incorrectly.
codegen->GenerateReferenceLoadWithBakerReadBarrier(
@@ -2391,7 +2391,7 @@ static void GenCAS(DataType::Type type, HInvoke* invoke, CodeGeneratorX86* codeg
if (type == DataType::Type::kReference) {
// The only read barrier implementation supporting the
// UnsafeCASObject intrinsic is the Baker-style read barriers.
- DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
Register temp = locations->GetTemp(0).AsRegister<Register>();
Register temp2 = locations->GetTemp(1).AsRegister<Register>();
@@ -2413,7 +2413,7 @@ void IntrinsicCodeGeneratorX86::VisitUnsafeCASLong(HInvoke* invoke) {
void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) {
// The only read barrier implementation supporting the
// UnsafeCASObject intrinsic is the Baker-style read barriers.
- DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
GenCAS(DataType::Type::kReference, invoke, codegen_);
}
@@ -2443,7 +2443,7 @@ void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke)
void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) {
// The only supported read barrier implementation is the Baker-style read barriers.
- DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
GenCAS(DataType::Type::kReference, invoke, codegen_);
}
@@ -2843,7 +2843,7 @@ static void GenSystemArrayCopyEndAddress(X86Assembler* assembler,
void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) {
// The only read barrier implementation supporting the
// SystemArrayCopy intrinsic is the Baker-style read barriers.
- if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+ if (gUseReadBarrier && !kUseBakerReadBarrier) {
return;
}
@@ -2875,7 +2875,7 @@ void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) {
void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
// The only read barrier implementation supporting the
// SystemArrayCopy intrinsic is the Baker-style read barriers.
- DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
X86Assembler* assembler = GetAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -2995,7 +2995,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
// slow path.
if (!optimizations.GetSourceIsNonPrimitiveArray()) {
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
// /* HeapReference<Class> */ temp1 = src->klass_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
invoke, temp1_loc, src, class_offset, /* needs_null_check= */ false);
@@ -3022,7 +3022,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
__ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
}
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
if (length.Equals(Location::RegisterLocation(temp3))) {
// When Baker read barriers are enabled, register `temp3`,
// which in the present case contains the `length` parameter,
@@ -3120,7 +3120,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
} else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
// Bail out if the source is not a non primitive array.
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
// /* HeapReference<Class> */ temp1 = src->klass_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
invoke, temp1_loc, src, class_offset, /* needs_null_check= */ false);
@@ -3151,7 +3151,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
// Compute the base source address in `temp1`.
GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1);
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
// If it is needed (in the case of the fast-path loop), the base
// destination address is computed later, as `temp2` is used for
// intermediate computations.
@@ -3259,7 +3259,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
}
// We only need one card marking on the destination array.
- codegen_->MarkGCCard(temp1, temp2, dest, Register(kNoRegister), /* value_can_be_null= */ false);
+ codegen_->MarkGCCard(temp1, temp2, dest, Register(kNoRegister), /* emit_null_check= */ false);
__ Bind(intrinsic_slow_path->GetExitLabel());
}
@@ -3377,7 +3377,7 @@ void IntrinsicCodeGeneratorX86::VisitReferenceGetReferent(HInvoke* invoke) {
SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
codegen_->AddSlowPath(slow_path);
- if (kEmitCompilerReadBarrier) {
+ if (gUseReadBarrier) {
// Check self->GetWeakRefAccessEnabled().
ThreadOffset32 offset = Thread::WeakRefAccessEnabledOffset<kX86PointerSize>();
__ fs()->cmpl(Address::Absolute(offset),
@@ -3400,7 +3400,7 @@ void IntrinsicCodeGeneratorX86::VisitReferenceGetReferent(HInvoke* invoke) {
// Load the value from the field.
uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
out,
obj.AsRegister<Register>(),
@@ -3442,7 +3442,7 @@ void IntrinsicCodeGeneratorX86::VisitReferenceRefersTo(HInvoke* invoke) {
NearLabel end, return_true, return_false;
__ cmpl(out, other);
- if (kEmitCompilerReadBarrier) {
+ if (gUseReadBarrier) {
DCHECK(kUseBakerReadBarrier);
__ j(kEqual, &return_true);
@@ -3781,7 +3781,7 @@ static Register GenerateVarHandleFieldReference(HInvoke* invoke,
Location::RegisterLocation(temp),
Address(temp, declaring_class_offset),
/* fixup_label= */ nullptr,
- kCompilerReadBarrierOption);
+ gCompilerReadBarrierOption);
return temp;
}
@@ -3794,7 +3794,7 @@ static Register GenerateVarHandleFieldReference(HInvoke* invoke,
static void CreateVarHandleGetLocations(HInvoke* invoke) {
// The only read barrier implementation supporting the
// VarHandleGet intrinsic is the Baker-style read barriers.
- if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+ if (gUseReadBarrier && !kUseBakerReadBarrier) {
return;
}
@@ -3836,7 +3836,7 @@ static void CreateVarHandleGetLocations(HInvoke* invoke) {
static void GenerateVarHandleGet(HInvoke* invoke, CodeGeneratorX86* codegen) {
// The only read barrier implementation supporting the
// VarHandleGet intrinsic is the Baker-style read barriers.
- DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
X86Assembler* assembler = codegen->GetAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -3860,7 +3860,7 @@ static void GenerateVarHandleGet(HInvoke* invoke, CodeGeneratorX86* codegen) {
Address field_addr(ref, offset, TIMES_1, 0);
// Load the value from the field
- if (type == DataType::Type::kReference && kCompilerReadBarrierOption == kWithReadBarrier) {
+ if (type == DataType::Type::kReference && gCompilerReadBarrierOption == kWithReadBarrier) {
codegen->GenerateReferenceLoadWithBakerReadBarrier(
invoke, out, ref, field_addr, /* needs_null_check= */ false);
} else if (type == DataType::Type::kInt64 &&
@@ -3917,7 +3917,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleGetOpaque(HInvoke* invoke) {
static void CreateVarHandleSetLocations(HInvoke* invoke) {
// The only read barrier implementation supporting the
// VarHandleGet intrinsic is the Baker-style read barriers.
- if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+ if (gUseReadBarrier && !kUseBakerReadBarrier) {
return;
}
@@ -3963,7 +3963,7 @@ static void CreateVarHandleSetLocations(HInvoke* invoke) {
case DataType::Type::kInt64:
// We only handle constant non-atomic int64 values.
DCHECK(value->IsConstant());
- locations->SetInAt(value_index, Location::ConstantLocation(value->AsConstant()));
+ locations->SetInAt(value_index, Location::ConstantLocation(value));
break;
case DataType::Type::kReference:
locations->SetInAt(value_index, Location::RequiresRegister());
@@ -3990,7 +3990,7 @@ static void CreateVarHandleSetLocations(HInvoke* invoke) {
static void GenerateVarHandleSet(HInvoke* invoke, CodeGeneratorX86* codegen) {
// The only read barrier implementation supporting the
// VarHandleGet intrinsic is the Baker-style read barriers.
- DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
X86Assembler* assembler = codegen->GetAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -4041,13 +4041,16 @@ static void GenerateVarHandleSet(HInvoke* invoke, CodeGeneratorX86* codegen) {
InstructionCodeGeneratorX86* instr_codegen =
down_cast<InstructionCodeGeneratorX86*>(codegen->GetInstructionVisitor());
// Store the value to the field
- instr_codegen->HandleFieldSet(invoke,
- value_index,
- value_type,
- Address(reference, offset, TIMES_1, 0),
- reference,
- is_volatile,
- /* value_can_be_null */ true);
+ instr_codegen->HandleFieldSet(
+ invoke,
+ value_index,
+ value_type,
+ Address(reference, offset, TIMES_1, 0),
+ reference,
+ is_volatile,
+ /* value_can_be_null */ true,
+ // Value can be null, and this write barrier is not being relied on for other sets.
+ WriteBarrierKind::kEmitWithNullCheck);
__ Bind(slow_path->GetExitLabel());
}
@@ -4087,7 +4090,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleSetOpaque(HInvoke* invoke) {
static void CreateVarHandleGetAndSetLocations(HInvoke* invoke) {
// The only read barrier implementation supporting the
// VarHandleGet intrinsic is the Baker-style read barriers.
- if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+ if (gUseReadBarrier && !kUseBakerReadBarrier) {
return;
}
@@ -4135,7 +4138,7 @@ static void CreateVarHandleGetAndSetLocations(HInvoke* invoke) {
static void GenerateVarHandleGetAndSet(HInvoke* invoke, CodeGeneratorX86* codegen) {
// The only read barrier implementation supporting the
// VarHandleGet intrinsic is the Baker-style read barriers.
- DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
X86Assembler* assembler = codegen->GetAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -4194,7 +4197,7 @@ static void GenerateVarHandleGetAndSet(HInvoke* invoke, CodeGeneratorX86* codege
__ movd(locations->Out().AsFpuRegister<XmmRegister>(), EAX);
break;
case DataType::Type::kReference: {
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
// Need to make sure the reference stored in the field is a to-space
// one before attempting the CAS or the CAS could fail incorrectly.
codegen->GenerateReferenceLoadWithBakerReadBarrier(
@@ -4208,7 +4211,7 @@ static void GenerateVarHandleGetAndSet(HInvoke* invoke, CodeGeneratorX86* codege
&temp2);
}
codegen->MarkGCCard(
- temp, temp2, reference, value.AsRegister<Register>(), /* value_can_be_null= */ false);
+ temp, temp2, reference, value.AsRegister<Register>(), /* emit_null_check= */ false);
if (kPoisonHeapReferences) {
__ movl(temp, value.AsRegister<Register>());
__ PoisonHeapReference(temp);
@@ -4258,7 +4261,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndSetRelease(HInvoke* invoke)
static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke) {
// The only read barrier implementation supporting the
// VarHandleGet intrinsic is the Baker-style read barriers.
- if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+ if (gUseReadBarrier && !kUseBakerReadBarrier) {
return;
}
@@ -4322,7 +4325,7 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke) {
static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke, CodeGeneratorX86* codegen) {
// The only read barrier implementation supporting the
// VarHandleGet intrinsic is the Baker-style read barriers.
- DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
X86Assembler* assembler = codegen->GetAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -4441,7 +4444,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleCompareAndExchangeRelease(HInvoke*
static void CreateVarHandleGetAndAddLocations(HInvoke* invoke) {
// The only read barrier implementation supporting the
// VarHandleGet intrinsic is the Baker-style read barriers.
- if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+ if (gUseReadBarrier && !kUseBakerReadBarrier) {
return;
}
@@ -4490,7 +4493,7 @@ static void CreateVarHandleGetAndAddLocations(HInvoke* invoke) {
static void GenerateVarHandleGetAndAdd(HInvoke* invoke, CodeGeneratorX86* codegen) {
// The only read barrier implementation supporting the
// VarHandleGet intrinsic is the Baker-style read barriers.
- DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
X86Assembler* assembler = codegen->GetAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -4591,7 +4594,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndAddRelease(HInvoke* invoke)
static void CreateVarHandleGetAndBitwiseOpLocations(HInvoke* invoke) {
// The only read barrier implementation supporting the
// VarHandleGet intrinsic is the Baker-style read barriers.
- if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+ if (gUseReadBarrier && !kUseBakerReadBarrier) {
return;
}
@@ -4659,7 +4662,7 @@ static void GenerateBitwiseOp(HInvoke* invoke,
static void GenerateVarHandleGetAndBitwiseOp(HInvoke* invoke, CodeGeneratorX86* codegen) {
// The only read barrier implementation supporting the
// VarHandleGet intrinsic is the Baker-style read barriers.
- DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
X86Assembler* assembler = codegen->GetAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -4829,64 +4832,9 @@ void IntrinsicLocationsBuilderX86::VisitMathFmaFloat(HInvoke* invoke) {
}
}
-UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble)
-UNIMPLEMENTED_INTRINSIC(X86, FloatIsInfinite)
-UNIMPLEMENTED_INTRINSIC(X86, DoubleIsInfinite)
-UNIMPLEMENTED_INTRINSIC(X86, IntegerHighestOneBit)
-UNIMPLEMENTED_INTRINSIC(X86, LongHighestOneBit)
-UNIMPLEMENTED_INTRINSIC(X86, LongDivideUnsigned)
-UNIMPLEMENTED_INTRINSIC(X86, CRC32Update)
-UNIMPLEMENTED_INTRINSIC(X86, CRC32UpdateBytes)
-UNIMPLEMENTED_INTRINSIC(X86, CRC32UpdateByteBuffer)
-UNIMPLEMENTED_INTRINSIC(X86, FP16ToFloat)
-UNIMPLEMENTED_INTRINSIC(X86, FP16ToHalf)
-UNIMPLEMENTED_INTRINSIC(X86, FP16Floor)
-UNIMPLEMENTED_INTRINSIC(X86, FP16Ceil)
-UNIMPLEMENTED_INTRINSIC(X86, FP16Rint)
-UNIMPLEMENTED_INTRINSIC(X86, FP16Greater)
-UNIMPLEMENTED_INTRINSIC(X86, FP16GreaterEquals)
-UNIMPLEMENTED_INTRINSIC(X86, FP16Less)
-UNIMPLEMENTED_INTRINSIC(X86, FP16LessEquals)
-UNIMPLEMENTED_INTRINSIC(X86, FP16Compare)
-UNIMPLEMENTED_INTRINSIC(X86, FP16Min)
-UNIMPLEMENTED_INTRINSIC(X86, FP16Max)
-UNIMPLEMENTED_INTRINSIC(X86, MathMultiplyHigh)
-
-UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOf);
-UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOfAfter);
-UNIMPLEMENTED_INTRINSIC(X86, StringBufferAppend);
-UNIMPLEMENTED_INTRINSIC(X86, StringBufferLength);
-UNIMPLEMENTED_INTRINSIC(X86, StringBufferToString);
-UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendObject);
-UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendString);
-UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendCharSequence);
-UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendCharArray);
-UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendBoolean);
-UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendChar);
-UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendInt);
-UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendLong);
-UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendFloat);
-UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendDouble);
-UNIMPLEMENTED_INTRINSIC(X86, StringBuilderLength);
-UNIMPLEMENTED_INTRINSIC(X86, StringBuilderToString);
-
-// 1.8.
-
-UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddInt)
-UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddLong)
-UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetInt)
-UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetLong)
-UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetObject)
-
-UNIMPLEMENTED_INTRINSIC(X86, MethodHandleInvokeExact)
-UNIMPLEMENTED_INTRINSIC(X86, MethodHandleInvoke)
-
-// OpenJDK 11
-UNIMPLEMENTED_INTRINSIC(X86, JdkUnsafeGetAndAddInt)
-UNIMPLEMENTED_INTRINSIC(X86, JdkUnsafeGetAndAddLong)
-UNIMPLEMENTED_INTRINSIC(X86, JdkUnsafeGetAndSetInt)
-UNIMPLEMENTED_INTRINSIC(X86, JdkUnsafeGetAndSetLong)
-UNIMPLEMENTED_INTRINSIC(X86, JdkUnsafeGetAndSetObject)
+#define MARK_UNIMPLEMENTED(Name) UNIMPLEMENTED_INTRINSIC(X86, Name)
+UNIMPLEMENTED_INTRINSIC_LIST_X86(MARK_UNIMPLEMENTED);
+#undef MARK_UNIMPLEMENTED
UNREACHABLE_INTRINSICS(X86)
diff --git a/compiler/optimizing/intrinsics_x86.h b/compiler/optimizing/intrinsics_x86.h
index ae150dad43..77c236d244 100644
--- a/compiler/optimizing/intrinsics_x86.h
+++ b/compiler/optimizing/intrinsics_x86.h
@@ -17,9 +17,10 @@
#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_X86_H_
#define ART_COMPILER_OPTIMIZING_INTRINSICS_X86_H_
+#include "base/macros.h"
#include "intrinsics.h"
-namespace art {
+namespace art HIDDEN {
class ArenaAllocator;
class HInvokeStaticOrDirect;
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 3c31374f67..9d0d5f155e 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -36,7 +36,7 @@
#include "utils/x86_64/assembler_x86_64.h"
#include "utils/x86_64/constants_x86_64.h"
-namespace art {
+namespace art HIDDEN {
namespace x86_64 {
@@ -71,7 +71,7 @@ class ReadBarrierSystemArrayCopySlowPathX86_64 : public SlowPathCode {
public:
explicit ReadBarrierSystemArrayCopySlowPathX86_64(HInstruction* instruction)
: SlowPathCode(instruction) {
- DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(gUseReadBarrier);
DCHECK(kUseBakerReadBarrier);
}
@@ -836,7 +836,7 @@ void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopyInt(HInvoke* invoke) {
void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
// The only read barrier implementation supporting the
// SystemArrayCopy intrinsic is the Baker-style read barriers.
- if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+ if (gUseReadBarrier && !kUseBakerReadBarrier) {
return;
}
@@ -887,7 +887,7 @@ static void GenSystemArrayCopyAddresses(X86_64Assembler* assembler,
void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
// The only read barrier implementation supporting the
// SystemArrayCopy intrinsic is the Baker-style read barriers.
- DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
X86_64Assembler* assembler = GetAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -1002,7 +1002,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
// slow path.
bool did_unpoison = false;
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
// /* HeapReference<Class> */ temp1 = dest->klass_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
invoke, temp1_loc, dest, class_offset, /* needs_null_check= */ false);
@@ -1034,7 +1034,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
// Bail out if the destination is not a non primitive array.
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
// /* HeapReference<Class> */ TMP = temp1->component_type_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
invoke, TMP_loc, temp1, component_offset, /* needs_null_check= */ false);
@@ -1055,7 +1055,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
if (!optimizations.GetSourceIsNonPrimitiveArray()) {
// Bail out if the source is not a non primitive array.
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
// For the same reason given earlier, `temp1` is not trashed by the
// read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
// /* HeapReference<Class> */ TMP = temp2->component_type_
@@ -1081,7 +1081,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
if (optimizations.GetDestinationIsTypedObjectArray()) {
NearLabel do_copy;
__ j(kEqual, &do_copy);
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
// /* HeapReference<Class> */ temp1 = temp1->component_type_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
invoke, temp1_loc, temp1, component_offset, /* needs_null_check= */ false);
@@ -1109,7 +1109,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
} else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
// Bail out if the source is not a non primitive array.
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
// /* HeapReference<Class> */ temp1 = src->klass_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
invoke, temp1_loc, src, class_offset, /* needs_null_check= */ false);
@@ -1141,7 +1141,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
GenSystemArrayCopyAddresses(
GetAssembler(), type, src, src_pos, dest, dest_pos, length, temp1, temp2, temp3);
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
// SystemArrayCopy implementation for Baker read barriers (see
// also CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier):
//
@@ -1224,7 +1224,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
}
// We only need one card marking on the destination array.
- codegen_->MarkGCCard(temp1, temp2, dest, CpuRegister(kNoRegister), /* value_can_be_null= */ false);
+ codegen_->MarkGCCard(temp1, temp2, dest, CpuRegister(kNoRegister), /* emit_null_check= */ false);
__ Bind(intrinsic_slow_path->GetExitLabel());
}
@@ -1888,7 +1888,7 @@ static void GenUnsafeGet(HInvoke* invoke,
break;
case DataType::Type::kReference: {
- if (kEmitCompilerReadBarrier) {
+ if (gUseReadBarrier) {
if (kUseBakerReadBarrier) {
Address src(base, offset, ScaleFactor::TIMES_1, 0);
codegen->GenerateReferenceLoadWithBakerReadBarrier(
@@ -1930,7 +1930,7 @@ static bool UnsafeGetIntrinsicOnCallList(Intrinsics intrinsic) {
}
static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- bool can_call = kEmitCompilerReadBarrier && UnsafeGetIntrinsicOnCallList(invoke->GetIntrinsic());
+ bool can_call = gUseReadBarrier && UnsafeGetIntrinsicOnCallList(invoke->GetIntrinsic());
LocationSummary* locations =
new (allocator) LocationSummary(invoke,
can_call
@@ -2230,7 +2230,7 @@ void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutLongRelease(HInvoke* invoke)
static void CreateUnsafeCASLocations(ArenaAllocator* allocator,
DataType::Type type,
HInvoke* invoke) {
- const bool can_call = kEmitCompilerReadBarrier &&
+ const bool can_call = gUseReadBarrier &&
kUseBakerReadBarrier &&
IsUnsafeCASObject(invoke);
LocationSummary* locations =
@@ -2253,7 +2253,7 @@ static void CreateUnsafeCASLocations(ArenaAllocator* allocator,
// Need two temporaries for MarkGCCard.
locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
locations->AddTemp(Location::RequiresRegister());
- if (kEmitCompilerReadBarrier) {
+ if (gUseReadBarrier) {
// Need three temporaries for GenerateReferenceLoadWithBakerReadBarrier.
DCHECK(kUseBakerReadBarrier);
locations->AddTemp(Location::RequiresRegister());
@@ -2298,7 +2298,7 @@ void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCompareAndSetLong(HInvoke* i
void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) {
// The only supported read barrier implementation is the Baker-style read barriers.
- if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+ if (gUseReadBarrier && !kUseBakerReadBarrier) {
return;
}
@@ -2438,7 +2438,7 @@ static void GenCompareAndSetOrExchangeRef(CodeGeneratorX86_64* codegen,
CpuRegister temp3,
bool is_cmpxchg) {
// The only supported read barrier implementation is the Baker-style read barriers.
- DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
@@ -2447,7 +2447,7 @@ static void GenCompareAndSetOrExchangeRef(CodeGeneratorX86_64* codegen,
codegen->MarkGCCard(temp1, temp2, base, value, value_can_be_null);
Address field_addr(base, offset, TIMES_1, 0);
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
// Need to make sure the reference stored in the field is a to-space
// one before attempting the CAS or the CAS could fail incorrectly.
codegen->GenerateReferenceLoadWithBakerReadBarrier(
@@ -2556,7 +2556,7 @@ static void GenCompareAndSetOrExchange(CodeGeneratorX86_64* codegen,
CpuRegister new_value_reg = new_value.AsRegister<CpuRegister>();
CpuRegister temp1 = locations->GetTemp(temp1_index).AsRegister<CpuRegister>();
CpuRegister temp2 = locations->GetTemp(temp2_index).AsRegister<CpuRegister>();
- CpuRegister temp3 = kEmitCompilerReadBarrier
+ CpuRegister temp3 = gUseReadBarrier
? locations->GetTemp(temp3_index).AsRegister<CpuRegister>()
: CpuRegister(kNoRegister);
DCHECK(RegsAreAllDifferent({base, offset, temp1, temp2, temp3}));
@@ -2624,7 +2624,7 @@ void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCompareAndSetLong(HInvoke* invo
void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) {
// The only supported read barrier implementation is the Baker-style read barriers.
- DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
GenCAS(DataType::Type::kReference, invoke, codegen_);
}
@@ -3128,7 +3128,7 @@ void IntrinsicCodeGeneratorX86_64::VisitReferenceGetReferent(HInvoke* invoke) {
SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
codegen_->AddSlowPath(slow_path);
- if (kEmitCompilerReadBarrier) {
+ if (gUseReadBarrier) {
// Check self->GetWeakRefAccessEnabled().
ThreadOffset64 offset = Thread::WeakRefAccessEnabledOffset<kX86_64PointerSize>();
__ gs()->cmpl(Address::Absolute(offset, /* no_rip= */ true),
@@ -3150,7 +3150,7 @@ void IntrinsicCodeGeneratorX86_64::VisitReferenceGetReferent(HInvoke* invoke) {
// Load the value from the field.
uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
out,
obj.AsRegister<CpuRegister>(),
@@ -3191,7 +3191,7 @@ void IntrinsicCodeGeneratorX86_64::VisitReferenceRefersTo(HInvoke* invoke) {
__ cmpl(out, other);
- if (kEmitCompilerReadBarrier) {
+ if (gUseReadBarrier) {
DCHECK(kUseBakerReadBarrier);
NearLabel calculate_result;
@@ -3771,7 +3771,7 @@ static void GenerateVarHandleTarget(HInvoke* invoke,
Location::RegisterLocation(target.object),
Address(method, ArtField::DeclaringClassOffset()),
/*fixup_label=*/ nullptr,
- kCompilerReadBarrierOption);
+ gCompilerReadBarrierOption);
}
}
} else {
@@ -3790,7 +3790,7 @@ static void GenerateVarHandleTarget(HInvoke* invoke,
static bool HasVarHandleIntrinsicImplementation(HInvoke* invoke) {
// The only supported read barrier implementation is the Baker-style read barriers.
- if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+ if (gUseReadBarrier && !kUseBakerReadBarrier) {
return false;
}
@@ -3876,7 +3876,7 @@ static void GenerateVarHandleGet(HInvoke* invoke,
Location out = locations->Out();
if (type == DataType::Type::kReference) {
- if (kEmitCompilerReadBarrier) {
+ if (gUseReadBarrier) {
DCHECK(kUseBakerReadBarrier);
codegen->GenerateReferenceLoadWithBakerReadBarrier(
invoke, out, CpuRegister(target.object), src, /* needs_null_check= */ false);
@@ -3985,16 +3985,19 @@ static void GenerateVarHandleSet(HInvoke* invoke,
Address dst(CpuRegister(target.object), CpuRegister(target.offset), TIMES_1, 0);
// Store the value to the field.
- codegen->GetInstructionCodegen()->HandleFieldSet(invoke,
- value_index,
- last_temp_index,
- value_type,
- dst,
- CpuRegister(target.object),
- is_volatile,
- is_atomic,
- /*value_can_be_null=*/ true,
- byte_swap);
+ codegen->GetInstructionCodegen()->HandleFieldSet(
+ invoke,
+ value_index,
+ last_temp_index,
+ value_type,
+ dst,
+ CpuRegister(target.object),
+ is_volatile,
+ is_atomic,
+ /*value_can_be_null=*/true,
+ byte_swap,
+ // Value can be null, and this write barrier is not being relied on for other sets.
+ WriteBarrierKind::kEmitWithNullCheck);
// setVolatile needs kAnyAny barrier, but HandleFieldSet takes care of that.
@@ -4070,7 +4073,7 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke) {
// Need two temporaries for MarkGCCard.
locations->AddTemp(Location::RequiresRegister());
locations->AddTemp(Location::RequiresRegister());
- if (kEmitCompilerReadBarrier) {
+ if (gUseReadBarrier) {
// Need three temporaries for GenerateReferenceLoadWithBakerReadBarrier.
DCHECK(kUseBakerReadBarrier);
locations->AddTemp(Location::RequiresRegister());
@@ -4085,7 +4088,7 @@ static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
CodeGeneratorX86_64* codegen,
bool is_cmpxchg,
bool byte_swap = false) {
- DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
X86_64Assembler* assembler = codegen->GetAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -4218,7 +4221,7 @@ static void CreateVarHandleGetAndSetLocations(HInvoke* invoke) {
// Need two temporaries for MarkGCCard.
locations->AddTemp(Location::RequiresRegister());
locations->AddTemp(Location::RequiresRegister());
- if (kEmitCompilerReadBarrier) {
+ if (gUseReadBarrier) {
// Need a third temporary for GenerateReferenceLoadWithBakerReadBarrier.
DCHECK(kUseBakerReadBarrier);
locations->AddTemp(Location::RequiresRegister());
@@ -4267,7 +4270,7 @@ static void GenerateVarHandleGetAndSet(HInvoke* invoke,
CpuRegister temp2 = locations->GetTemp(temp_count - 2).AsRegister<CpuRegister>();
CpuRegister valreg = value.AsRegister<CpuRegister>();
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
codegen->GenerateReferenceLoadWithBakerReadBarrier(
invoke,
locations->GetTemp(temp_count - 3),
@@ -4278,7 +4281,7 @@ static void GenerateVarHandleGetAndSet(HInvoke* invoke,
&temp1,
&temp2);
}
- codegen->MarkGCCard(temp1, temp2, ref, valreg, /*value_can_be_null=*/ false);
+ codegen->MarkGCCard(temp1, temp2, ref, valreg, /* emit_null_check= */ false);
DCHECK_EQ(valreg, out.AsRegister<CpuRegister>());
if (kPoisonHeapReferences) {
@@ -4647,7 +4650,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
bool need_any_store_barrier,
bool need_any_any_barrier,
bool byte_swap = false) {
- DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier);
+ DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
X86_64Assembler* assembler = codegen->GetAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -4987,57 +4990,9 @@ void VarHandleSlowPathX86_64::EmitByteArrayViewCode(CodeGeneratorX86_64* codegen
__ jmp(GetExitLabel());
}
-UNIMPLEMENTED_INTRINSIC(X86_64, CRC32Update)
-UNIMPLEMENTED_INTRINSIC(X86_64, CRC32UpdateBytes)
-UNIMPLEMENTED_INTRINSIC(X86_64, CRC32UpdateByteBuffer)
-UNIMPLEMENTED_INTRINSIC(X86_64, FP16ToFloat)
-UNIMPLEMENTED_INTRINSIC(X86_64, FP16ToHalf)
-UNIMPLEMENTED_INTRINSIC(X86_64, FP16Floor)
-UNIMPLEMENTED_INTRINSIC(X86_64, FP16Ceil)
-UNIMPLEMENTED_INTRINSIC(X86_64, FP16Rint)
-UNIMPLEMENTED_INTRINSIC(X86_64, FP16Greater)
-UNIMPLEMENTED_INTRINSIC(X86_64, FP16GreaterEquals)
-UNIMPLEMENTED_INTRINSIC(X86_64, FP16Less)
-UNIMPLEMENTED_INTRINSIC(X86_64, FP16LessEquals)
-UNIMPLEMENTED_INTRINSIC(X86_64, FP16Compare)
-UNIMPLEMENTED_INTRINSIC(X86_64, FP16Min)
-UNIMPLEMENTED_INTRINSIC(X86_64, FP16Max)
-
-UNIMPLEMENTED_INTRINSIC(X86_64, StringStringIndexOf);
-UNIMPLEMENTED_INTRINSIC(X86_64, StringStringIndexOfAfter);
-UNIMPLEMENTED_INTRINSIC(X86_64, StringBufferAppend);
-UNIMPLEMENTED_INTRINSIC(X86_64, StringBufferLength);
-UNIMPLEMENTED_INTRINSIC(X86_64, StringBufferToString);
-UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendObject);
-UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendString);
-UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendCharSequence);
-UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendCharArray);
-UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendBoolean);
-UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendChar);
-UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendInt);
-UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendLong);
-UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendFloat);
-UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendDouble);
-UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderLength);
-UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderToString);
-
-// 1.8.
-
-UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndAddInt)
-UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndAddLong)
-UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetInt)
-UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetLong)
-UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetObject)
-
-UNIMPLEMENTED_INTRINSIC(X86_64, MethodHandleInvokeExact)
-UNIMPLEMENTED_INTRINSIC(X86_64, MethodHandleInvoke)
-
-// OpenJDK 11
-UNIMPLEMENTED_INTRINSIC(X86_64, JdkUnsafeGetAndAddInt)
-UNIMPLEMENTED_INTRINSIC(X86_64, JdkUnsafeGetAndAddLong)
-UNIMPLEMENTED_INTRINSIC(X86_64, JdkUnsafeGetAndSetInt)
-UNIMPLEMENTED_INTRINSIC(X86_64, JdkUnsafeGetAndSetLong)
-UNIMPLEMENTED_INTRINSIC(X86_64, JdkUnsafeGetAndSetObject)
+#define MARK_UNIMPLEMENTED(Name) UNIMPLEMENTED_INTRINSIC(X86_64, Name)
+UNIMPLEMENTED_INTRINSIC_LIST_X86_64(MARK_UNIMPLEMENTED);
+#undef MARK_UNIMPLEMENTED
UNREACHABLE_INTRINSICS(X86_64)
diff --git a/compiler/optimizing/intrinsics_x86_64.h b/compiler/optimizing/intrinsics_x86_64.h
index 199cfede1a..59fe815a94 100644
--- a/compiler/optimizing/intrinsics_x86_64.h
+++ b/compiler/optimizing/intrinsics_x86_64.h
@@ -17,9 +17,10 @@
#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_X86_64_H_
#define ART_COMPILER_OPTIMIZING_INTRINSICS_X86_64_H_
+#include "base/macros.h"
#include "intrinsics.h"
-namespace art {
+namespace art HIDDEN {
class ArenaAllocator;
class HInvokeStaticOrDirect;
diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc
index 0edb23b857..0c791b640d 100644
--- a/compiler/optimizing/licm.cc
+++ b/compiler/optimizing/licm.cc
@@ -18,7 +18,7 @@
#include "side_effects_analysis.h"
-namespace art {
+namespace art HIDDEN {
static bool IsPhiOf(HInstruction* instruction, HBasicBlock* block) {
return instruction->IsPhi() && instruction->GetBlock() == block;
diff --git a/compiler/optimizing/licm.h b/compiler/optimizing/licm.h
index 9cafddb05a..1a86b6eb9f 100644
--- a/compiler/optimizing/licm.h
+++ b/compiler/optimizing/licm.h
@@ -17,10 +17,11 @@
#ifndef ART_COMPILER_OPTIMIZING_LICM_H_
#define ART_COMPILER_OPTIMIZING_LICM_H_
+#include "base/macros.h"
#include "nodes.h"
#include "optimization.h"
-namespace art {
+namespace art HIDDEN {
class SideEffectsAnalysis;
diff --git a/compiler/optimizing/licm_test.cc b/compiler/optimizing/licm_test.cc
index adc3cabe87..f8481099f4 100644
--- a/compiler/optimizing/licm_test.cc
+++ b/compiler/optimizing/licm_test.cc
@@ -17,12 +17,13 @@
#include "licm.h"
#include "base/arena_allocator.h"
+#include "base/macros.h"
#include "builder.h"
#include "nodes.h"
#include "optimizing_unit_test.h"
#include "side_effects_analysis.h"
-namespace art {
+namespace art HIDDEN {
/**
* Fixture class for the LICM tests.
diff --git a/compiler/optimizing/linear_order.cc b/compiler/optimizing/linear_order.cc
index 58e00a810d..25ca866b2c 100644
--- a/compiler/optimizing/linear_order.cc
+++ b/compiler/optimizing/linear_order.cc
@@ -19,7 +19,7 @@
#include "base/scoped_arena_allocator.h"
#include "base/scoped_arena_containers.h"
-namespace art {
+namespace art HIDDEN {
static bool InSameLoop(HLoopInformation* first_loop, HLoopInformation* second_loop) {
return first_loop == second_loop;
diff --git a/compiler/optimizing/linear_order.h b/compiler/optimizing/linear_order.h
index 151db001e1..75e75048a3 100644
--- a/compiler/optimizing/linear_order.h
+++ b/compiler/optimizing/linear_order.h
@@ -19,9 +19,10 @@
#include <type_traits>
+#include "base/macros.h"
#include "nodes.h"
-namespace art {
+namespace art HIDDEN {
void LinearizeGraphInternal(const HGraph* graph, ArrayRef<HBasicBlock*> linear_order);
diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc
index d56ae11ca9..01daa23511 100644
--- a/compiler/optimizing/linearize_test.cc
+++ b/compiler/optimizing/linearize_test.cc
@@ -17,6 +17,7 @@
#include <fstream>
#include "base/arena_allocator.h"
+#include "base/macros.h"
#include "builder.h"
#include "code_generator.h"
#include "dex/dex_file.h"
@@ -28,9 +29,9 @@
#include "pretty_printer.h"
#include "ssa_liveness_analysis.h"
-namespace art {
+namespace art HIDDEN {
-class LinearizeTest : public OptimizingUnitTest {
+class LinearizeTest : public CommonCompilerTest, public OptimizingUnitTestHelper {
protected:
template <size_t number_of_blocks>
void TestCode(const std::vector<uint16_t>& data,
diff --git a/compiler/optimizing/live_interval_test.cc b/compiler/optimizing/live_interval_test.cc
index c60386d7b7..b5d1336d4a 100644
--- a/compiler/optimizing/live_interval_test.cc
+++ b/compiler/optimizing/live_interval_test.cc
@@ -15,12 +15,13 @@
*/
#include "base/arena_allocator.h"
+#include "base/macros.h"
#include "optimizing_unit_test.h"
#include "ssa_liveness_analysis.h"
#include "gtest/gtest.h"
-namespace art {
+namespace art HIDDEN {
TEST(LiveInterval, GetStart) {
ArenaPoolAndAllocator pool;
diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc
index bb8a4dc08e..fb1a23eef4 100644
--- a/compiler/optimizing/live_ranges_test.cc
+++ b/compiler/optimizing/live_ranges_test.cc
@@ -15,6 +15,7 @@
*/
#include "base/arena_allocator.h"
+#include "base/macros.h"
#include "builder.h"
#include "code_generator.h"
#include "dex/dex_file.h"
@@ -25,9 +26,9 @@
#include "prepare_for_register_allocation.h"
#include "ssa_liveness_analysis.h"
-namespace art {
+namespace art HIDDEN {
-class LiveRangesTest : public OptimizingUnitTest {
+class LiveRangesTest : public CommonCompilerTest, public OptimizingUnitTestHelper {
protected:
HGraph* BuildGraph(const std::vector<uint16_t>& data);
diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc
index ba3787e9be..0b421cf9e6 100644
--- a/compiler/optimizing/liveness_test.cc
+++ b/compiler/optimizing/liveness_test.cc
@@ -15,6 +15,7 @@
*/
#include "base/arena_allocator.h"
+#include "base/macros.h"
#include "builder.h"
#include "code_generator.h"
#include "dex/dex_file.h"
@@ -25,9 +26,9 @@
#include "prepare_for_register_allocation.h"
#include "ssa_liveness_analysis.h"
-namespace art {
+namespace art HIDDEN {
-class LivenessTest : public OptimizingUnitTest {
+class LivenessTest : public CommonCompilerTest, public OptimizingUnitTestHelper {
protected:
void TestCode(const std::vector<uint16_t>& data, const char* expected);
};
diff --git a/compiler/optimizing/load_store_analysis.cc b/compiler/optimizing/load_store_analysis.cc
index 3fe42aff2e..f1c50ac03c 100644
--- a/compiler/optimizing/load_store_analysis.cc
+++ b/compiler/optimizing/load_store_analysis.cc
@@ -19,7 +19,7 @@
#include "base/scoped_arena_allocator.h"
#include "optimizing/escape.h"
-namespace art {
+namespace art HIDDEN {
// A cap for the number of heap locations to prevent pathological time/space consumption.
// The number of heap locations for most of the methods stays below this threshold.
@@ -283,14 +283,6 @@ bool LoadStoreAnalysis::Run() {
heap_location_collector_.CleanUp();
return false;
}
- if (heap_location_collector_.HasVolatile() || heap_location_collector_.HasMonitorOps()) {
- // Don't do load/store elimination if the method has volatile field accesses or
- // monitor operations, for now.
- // TODO: do it right.
- heap_location_collector_.CleanUp();
- return false;
- }
-
heap_location_collector_.BuildAliasingMatrix();
heap_location_collector_.DumpReferenceStats(stats_);
return true;
diff --git a/compiler/optimizing/load_store_analysis.h b/compiler/optimizing/load_store_analysis.h
index 4975bae2a2..c46a5b9cc1 100644
--- a/compiler/optimizing/load_store_analysis.h
+++ b/compiler/optimizing/load_store_analysis.h
@@ -20,6 +20,7 @@
#include "base/arena_allocator.h"
#include "base/arena_bit_vector.h"
#include "base/bit_vector-inl.h"
+#include "base/macros.h"
#include "base/scoped_arena_allocator.h"
#include "base/scoped_arena_containers.h"
#include "base/stl_util.h"
@@ -28,7 +29,7 @@
#include "nodes.h"
#include "optimizing/optimizing_compiler_stats.h"
-namespace art {
+namespace art HIDDEN {
enum class LoadStoreAnalysisType {
kBasic,
@@ -170,14 +171,16 @@ class HeapLocation : public ArenaObject<kArenaAllocLSA> {
size_t offset,
HInstruction* index,
size_t vector_length,
- int16_t declaring_class_def_index)
+ int16_t declaring_class_def_index,
+ bool is_vec_op)
: ref_info_(ref_info),
type_(DataType::ToSigned(type)),
offset_(offset),
index_(index),
vector_length_(vector_length),
declaring_class_def_index_(declaring_class_def_index),
- has_aliased_locations_(false) {
+ has_aliased_locations_(false),
+ is_vec_op_(is_vec_op) {
DCHECK(ref_info != nullptr);
DCHECK((offset == kInvalidFieldOffset && index != nullptr) ||
(offset != kInvalidFieldOffset && index == nullptr));
@@ -188,6 +191,7 @@ class HeapLocation : public ArenaObject<kArenaAllocLSA> {
size_t GetOffset() const { return offset_; }
HInstruction* GetIndex() const { return index_; }
size_t GetVectorLength() const { return vector_length_; }
+ bool IsVecOp() const { return is_vec_op_; }
// Returns the definition of declaring class' dex index.
// It's kDeclaringClassDefIndexForArrays for an array element.
@@ -226,11 +230,12 @@ class HeapLocation : public ArenaObject<kArenaAllocLSA> {
// Declaring class's def's dex index.
// Invalid when this HeapLocation is not field access.
const int16_t declaring_class_def_index_;
-
// Has aliased heap locations in the method, due to either the
// reference is aliased or the array element is aliased via different
// index names.
bool has_aliased_locations_;
+ // Whether this HeapLocation represents a vector operation.
+ bool is_vec_op_;
DISALLOW_COPY_AND_ASSIGN(HeapLocation);
};
@@ -253,8 +258,6 @@ class HeapLocationCollector : public HGraphVisitor {
heap_locations_(allocator->Adapter(kArenaAllocLSA)),
aliasing_matrix_(allocator, kInitialAliasingMatrixBitVectorSize, true, kArenaAllocLSA),
has_heap_stores_(false),
- has_volatile_(false),
- has_monitor_operations_(false),
lse_type_(lse_type) {
aliasing_matrix_.ClearAllBits();
}
@@ -319,7 +322,8 @@ class HeapLocationCollector : public HGraphVisitor {
field->GetFieldOffset().SizeValue(),
nullptr,
HeapLocation::kScalar,
- field->GetDeclaringClassDefIndex());
+ field->GetDeclaringClassDefIndex(),
+ /*is_vec_op=*/false);
}
size_t GetArrayHeapLocation(HInstruction* instruction) const {
@@ -328,10 +332,10 @@ class HeapLocationCollector : public HGraphVisitor {
HInstruction* index = instruction->InputAt(1);
DataType::Type type = instruction->GetType();
size_t vector_length = HeapLocation::kScalar;
+ const bool is_vec_op = instruction->IsVecStore() || instruction->IsVecLoad();
if (instruction->IsArraySet()) {
type = instruction->AsArraySet()->GetComponentType();
- } else if (instruction->IsVecStore() ||
- instruction->IsVecLoad()) {
+ } else if (is_vec_op) {
HVecOperation* vec_op = instruction->AsVecOperation();
type = vec_op->GetPackedType();
vector_length = vec_op->GetVectorLength();
@@ -343,21 +347,14 @@ class HeapLocationCollector : public HGraphVisitor {
HeapLocation::kInvalidFieldOffset,
index,
vector_length,
- HeapLocation::kDeclaringClassDefIndexForArrays);
+ HeapLocation::kDeclaringClassDefIndexForArrays,
+ is_vec_op);
}
bool HasHeapStores() const {
return has_heap_stores_;
}
- bool HasVolatile() const {
- return has_volatile_;
- }
-
- bool HasMonitorOps() const {
- return has_monitor_operations_;
- }
-
// Find and return the heap location index in heap_locations_.
// NOTE: When heap locations are created, potentially aliasing/overlapping
// accesses are given different indexes. This find function also
@@ -373,7 +370,8 @@ class HeapLocationCollector : public HGraphVisitor {
size_t offset,
HInstruction* index,
size_t vector_length,
- int16_t declaring_class_def_index) const {
+ int16_t declaring_class_def_index,
+ bool is_vec_op) const {
DataType::Type lookup_type = DataType::ToSigned(type);
for (size_t i = 0; i < heap_locations_.size(); i++) {
HeapLocation* loc = heap_locations_[i];
@@ -382,7 +380,8 @@ class HeapLocationCollector : public HGraphVisitor {
loc->GetOffset() == offset &&
loc->GetIndex() == index &&
loc->GetVectorLength() == vector_length &&
- loc->GetDeclaringClassDefIndex() == declaring_class_def_index) {
+ loc->GetDeclaringClassDefIndex() == declaring_class_def_index &&
+ loc->IsVecOp() == is_vec_op) {
return i;
}
}
@@ -527,22 +526,20 @@ class HeapLocationCollector : public HGraphVisitor {
size_t offset,
HInstruction* index,
size_t vector_length,
- int16_t declaring_class_def_index) {
+ int16_t declaring_class_def_index,
+ bool is_vec_op) {
HInstruction* original_ref = HuntForOriginalReference(ref);
ReferenceInfo* ref_info = GetOrCreateReferenceInfo(original_ref);
size_t heap_location_idx = FindHeapLocationIndex(
- ref_info, type, offset, index, vector_length, declaring_class_def_index);
+ ref_info, type, offset, index, vector_length, declaring_class_def_index, is_vec_op);
if (heap_location_idx == kHeapLocationNotFound) {
- HeapLocation* heap_loc = new (allocator_)
- HeapLocation(ref_info, type, offset, index, vector_length, declaring_class_def_index);
+ HeapLocation* heap_loc = new (allocator_) HeapLocation(
+ ref_info, type, offset, index, vector_length, declaring_class_def_index, is_vec_op);
heap_locations_.push_back(heap_loc);
}
}
void VisitFieldAccess(HInstruction* ref, const FieldInfo& field_info) {
- if (field_info.IsVolatile()) {
- has_volatile_ = true;
- }
DataType::Type type = field_info.GetFieldType();
const uint16_t declaring_class_def_index = field_info.GetDeclaringClassDefIndex();
const size_t offset = field_info.GetFieldOffset().SizeValue();
@@ -551,19 +548,22 @@ class HeapLocationCollector : public HGraphVisitor {
offset,
nullptr,
HeapLocation::kScalar,
- declaring_class_def_index);
+ declaring_class_def_index,
+ /*is_vec_op=*/false);
}
void VisitArrayAccess(HInstruction* array,
HInstruction* index,
DataType::Type type,
- size_t vector_length) {
+ size_t vector_length,
+ bool is_vec_op) {
MaybeCreateHeapLocation(array,
type,
HeapLocation::kInvalidFieldOffset,
index,
vector_length,
- HeapLocation::kDeclaringClassDefIndexForArrays);
+ HeapLocation::kDeclaringClassDefIndexForArrays,
+ is_vec_op);
}
void VisitPredicatedInstanceFieldGet(HPredicatedInstanceFieldGet* instruction) override {
@@ -597,7 +597,7 @@ class HeapLocationCollector : public HGraphVisitor {
HInstruction* array = instruction->InputAt(0);
HInstruction* index = instruction->InputAt(1);
DataType::Type type = instruction->GetType();
- VisitArrayAccess(array, index, type, HeapLocation::kScalar);
+ VisitArrayAccess(array, index, type, HeapLocation::kScalar, /*is_vec_op=*/false);
CreateReferenceInfoForReferenceType(instruction);
}
@@ -605,7 +605,7 @@ class HeapLocationCollector : public HGraphVisitor {
HInstruction* array = instruction->InputAt(0);
HInstruction* index = instruction->InputAt(1);
DataType::Type type = instruction->GetComponentType();
- VisitArrayAccess(array, index, type, HeapLocation::kScalar);
+ VisitArrayAccess(array, index, type, HeapLocation::kScalar, /*is_vec_op=*/false);
has_heap_stores_ = true;
}
@@ -613,7 +613,7 @@ class HeapLocationCollector : public HGraphVisitor {
HInstruction* array = instruction->InputAt(0);
HInstruction* index = instruction->InputAt(1);
DataType::Type type = instruction->GetPackedType();
- VisitArrayAccess(array, index, type, instruction->GetVectorLength());
+ VisitArrayAccess(array, index, type, instruction->GetVectorLength(), /*is_vec_op=*/true);
CreateReferenceInfoForReferenceType(instruction);
}
@@ -621,7 +621,7 @@ class HeapLocationCollector : public HGraphVisitor {
HInstruction* array = instruction->InputAt(0);
HInstruction* index = instruction->InputAt(1);
DataType::Type type = instruction->GetPackedType();
- VisitArrayAccess(array, index, type, instruction->GetVectorLength());
+ VisitArrayAccess(array, index, type, instruction->GetVectorLength(), /*is_vec_op=*/true);
has_heap_stores_ = true;
}
@@ -637,18 +637,12 @@ class HeapLocationCollector : public HGraphVisitor {
CreateReferenceInfoForReferenceType(instruction);
}
- void VisitMonitorOperation(HMonitorOperation* monitor ATTRIBUTE_UNUSED) override {
- has_monitor_operations_ = true;
- }
-
ScopedArenaAllocator* allocator_;
ScopedArenaVector<ReferenceInfo*> ref_info_array_; // All references used for heap accesses.
ScopedArenaVector<HeapLocation*> heap_locations_; // All heap locations.
ArenaBitVector aliasing_matrix_; // aliasing info between each pair of locations.
bool has_heap_stores_; // If there is no heap stores, LSE acts as GVN with better
// alias analysis and won't be as effective.
- bool has_volatile_; // If there are volatile field accesses.
- bool has_monitor_operations_; // If there are monitor operations.
LoadStoreAnalysisType lse_type_;
DISALLOW_COPY_AND_ASSIGN(HeapLocationCollector);
diff --git a/compiler/optimizing/load_store_analysis_test.cc b/compiler/optimizing/load_store_analysis_test.cc
index 3c26c8d6ce..865febbd31 100644
--- a/compiler/optimizing/load_store_analysis_test.cc
+++ b/compiler/optimizing/load_store_analysis_test.cc
@@ -14,6 +14,7 @@
* limitations under the License.
*/
+#include "base/macros.h"
#include "load_store_analysis.h"
#include <array>
@@ -36,7 +37,7 @@
#include "optimizing_unit_test.h"
#include "scoped_thread_state_change.h"
-namespace art {
+namespace art HIDDEN {
class LoadStoreAnalysisTest : public CommonCompilerTest, public OptimizingUnitTestHelper {
public:
@@ -117,12 +118,13 @@ TEST_F(LoadStoreAnalysisTest, ArrayHeapLocations) {
size_t field = HeapLocation::kInvalidFieldOffset;
size_t vec = HeapLocation::kScalar;
size_t class_def = HeapLocation::kDeclaringClassDefIndexForArrays;
+ const bool is_vec_op = false;
size_t loc1 = heap_location_collector.FindHeapLocationIndex(
- ref, type, field, c1, vec, class_def);
+ ref, type, field, c1, vec, class_def, is_vec_op);
size_t loc2 = heap_location_collector.FindHeapLocationIndex(
- ref, type, field, c2, vec, class_def);
+ ref, type, field, c2, vec, class_def, is_vec_op);
size_t loc3 = heap_location_collector.FindHeapLocationIndex(
- ref, type, field, index, vec, class_def);
+ ref, type, field, index, vec, class_def, is_vec_op);
// must find this reference info for array in HeapLocationCollector.
ASSERT_TRUE(ref != nullptr);
// must find these heap locations;
@@ -142,7 +144,7 @@ TEST_F(LoadStoreAnalysisTest, ArrayHeapLocations) {
ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc3));
ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc3));
- EXPECT_TRUE(CheckGraph(graph_));
+ EXPECT_TRUE(CheckGraph());
}
TEST_F(LoadStoreAnalysisTest, FieldHeapLocations) {
@@ -223,15 +225,14 @@ TEST_F(LoadStoreAnalysisTest, FieldHeapLocations) {
// accesses to different fields of the same object should not alias.
ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2));
- EXPECT_TRUE(CheckGraph(graph_));
+ EXPECT_TRUE(CheckGraph());
}
TEST_F(LoadStoreAnalysisTest, ArrayIndexAliasingTest) {
CreateGraph();
- HBasicBlock* entry = new (GetAllocator()) HBasicBlock(graph_);
- graph_->AddBlock(entry);
- graph_->SetEntryBlock(entry);
- graph_->BuildDominatorTree();
+ AdjacencyListGraph blks(
+ SetupFromAdjacencyList("entry", "exit", {{"entry", "body"}, {"body", "exit"}}));
+ HBasicBlock* body = blks.Get("body");
HInstruction* array = new (GetAllocator()) HParameterValue(
graph_->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kReference);
@@ -261,23 +262,25 @@ TEST_F(LoadStoreAnalysisTest, ArrayIndexAliasingTest) {
HInstruction* arr_set8 =
new (GetAllocator()) HArraySet(array, sub_neg1, c0, DataType::Type::kInt32, 0);
- entry->AddInstruction(array);
- entry->AddInstruction(index);
- entry->AddInstruction(add0);
- entry->AddInstruction(add1);
- entry->AddInstruction(sub0);
- entry->AddInstruction(sub1);
- entry->AddInstruction(sub_neg1);
- entry->AddInstruction(rev_sub1);
-
- entry->AddInstruction(arr_set1); // array[0] = c0
- entry->AddInstruction(arr_set2); // array[1] = c0
- entry->AddInstruction(arr_set3); // array[i+0] = c0
- entry->AddInstruction(arr_set4); // array[i+1] = c0
- entry->AddInstruction(arr_set5); // array[i-0] = c0
- entry->AddInstruction(arr_set6); // array[i-1] = c0
- entry->AddInstruction(arr_set7); // array[1-i] = c0
- entry->AddInstruction(arr_set8); // array[i-(-1)] = c0
+ body->AddInstruction(array);
+ body->AddInstruction(index);
+ body->AddInstruction(add0);
+ body->AddInstruction(add1);
+ body->AddInstruction(sub0);
+ body->AddInstruction(sub1);
+ body->AddInstruction(sub_neg1);
+ body->AddInstruction(rev_sub1);
+
+ body->AddInstruction(arr_set1); // array[0] = c0
+ body->AddInstruction(arr_set2); // array[1] = c0
+ body->AddInstruction(arr_set3); // array[i+0] = c0
+ body->AddInstruction(arr_set4); // array[i+1] = c0
+ body->AddInstruction(arr_set5); // array[i-0] = c0
+ body->AddInstruction(arr_set6); // array[i-1] = c0
+ body->AddInstruction(arr_set7); // array[1-i] = c0
+ body->AddInstruction(arr_set8); // array[i-(-1)] = c0
+
+ body->AddInstruction(new (GetAllocator()) HReturnVoid());
ScopedArenaAllocator allocator(graph_->GetArenaStack());
LoadStoreAnalysis lsa(graph_, nullptr, &allocator, LoadStoreAnalysisType::kBasic);
@@ -317,7 +320,7 @@ TEST_F(LoadStoreAnalysisTest, ArrayIndexAliasingTest) {
loc2 = heap_location_collector.GetArrayHeapLocation(arr_set8);
ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2));
- EXPECT_TRUE(CheckGraphSkipRefTypeInfoChecks(graph_));
+ EXPECT_TRUE(CheckGraph());
}
TEST_F(LoadStoreAnalysisTest, ArrayAliasingTest) {
@@ -891,7 +894,8 @@ TEST_F(LoadStoreAnalysisTest, PartialEscape) {
{},
InvokeType::kStatic,
{ nullptr, 0 },
- HInvokeStaticOrDirect::ClinitCheckRequirement::kNone);
+ HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
+ !graph_->IsDebuggable());
HInstruction* goto_left = new (GetAllocator()) HGoto();
call_left->AsInvoke()->SetRawInputAt(0, new_inst);
left->AddInstruction(call_left);
@@ -1000,7 +1004,8 @@ TEST_F(LoadStoreAnalysisTest, PartialEscape2) {
{},
InvokeType::kStatic,
{ nullptr, 0 },
- HInvokeStaticOrDirect::ClinitCheckRequirement::kNone);
+ HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
+ !graph_->IsDebuggable());
HInstruction* goto_left = new (GetAllocator()) HGoto();
call_left->AsInvoke()->SetRawInputAt(0, new_inst);
left->AddInstruction(call_left);
@@ -1123,7 +1128,8 @@ TEST_F(LoadStoreAnalysisTest, PartialEscape3) {
{},
InvokeType::kStatic,
{ nullptr, 0 },
- HInvokeStaticOrDirect::ClinitCheckRequirement::kNone);
+ HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
+ !graph_->IsDebuggable());
HInstruction* goto_left = new (GetAllocator()) HGoto();
call_left->AsInvoke()->SetRawInputAt(0, new_inst);
left->AddInstruction(call_left);
@@ -1403,7 +1409,8 @@ TEST_F(LoadStoreAnalysisTest, TotalEscapeAdjacentNoPredicated) {
{},
InvokeType::kStatic,
{nullptr, 0},
- HInvokeStaticOrDirect::ClinitCheckRequirement::kNone);
+ HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
+ !graph_->IsDebuggable());
HInstruction* goto_left = new (GetAllocator()) HGoto();
call_left->AsInvoke()->SetRawInputAt(0, new_inst);
left->AddInstruction(call_left);
@@ -1504,7 +1511,8 @@ TEST_F(LoadStoreAnalysisTest, TotalEscapeAdjacent) {
{},
InvokeType::kStatic,
{ nullptr, 0 },
- HInvokeStaticOrDirect::ClinitCheckRequirement::kNone);
+ HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
+ !graph_->IsDebuggable());
HInstruction* goto_left = new (GetAllocator()) HGoto();
call_left->AsInvoke()->SetRawInputAt(0, new_inst);
left->AddInstruction(call_left);
@@ -1615,7 +1623,8 @@ TEST_F(LoadStoreAnalysisTest, TotalEscape) {
{},
InvokeType::kStatic,
{ nullptr, 0 },
- HInvokeStaticOrDirect::ClinitCheckRequirement::kNone);
+ HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
+ !graph_->IsDebuggable());
HInstruction* goto_left = new (GetAllocator()) HGoto();
call_left->AsInvoke()->SetRawInputAt(0, new_inst);
left->AddInstruction(call_left);
@@ -1631,7 +1640,8 @@ TEST_F(LoadStoreAnalysisTest, TotalEscape) {
{},
InvokeType::kStatic,
{ nullptr, 0 },
- HInvokeStaticOrDirect::ClinitCheckRequirement::kNone);
+ HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
+ !graph_->IsDebuggable());
HInstruction* write_right = new (GetAllocator()) HInstanceFieldSet(new_inst,
c0,
nullptr,
@@ -1800,7 +1810,8 @@ TEST_F(LoadStoreAnalysisTest, DoubleDiamondEscape) {
{},
InvokeType::kStatic,
{ nullptr, 0 },
- HInvokeStaticOrDirect::ClinitCheckRequirement::kNone);
+ HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
+ !graph_->IsDebuggable());
HInstruction* goto_left = new (GetAllocator()) HGoto();
call_left->AsInvoke()->SetRawInputAt(0, new_inst);
high_left->AddInstruction(call_left);
@@ -1856,7 +1867,8 @@ TEST_F(LoadStoreAnalysisTest, DoubleDiamondEscape) {
{},
InvokeType::kStatic,
{ nullptr, 0 },
- HInvokeStaticOrDirect::ClinitCheckRequirement::kNone);
+ HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
+ !graph_->IsDebuggable());
HInstruction* goto_low_left = new (GetAllocator()) HGoto();
call_low_left->AsInvoke()->SetRawInputAt(0, new_inst);
low_left->AddInstruction(call_low_left);
@@ -2013,7 +2025,8 @@ TEST_F(LoadStoreAnalysisTest, PartialPhiPropagation1) {
{},
InvokeType::kStatic,
{nullptr, 0},
- HInvokeStaticOrDirect::ClinitCheckRequirement::kNone);
+ HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
+ !graph_->IsDebuggable());
HInstruction* goto_left_merge = new (GetAllocator()) HGoto();
left_phi->SetRawInputAt(0, obj_param);
left_phi->SetRawInputAt(1, new_inst);
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index 9b8f07e969..9cabb12a9f 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -319,7 +319,7 @@
* a hash map to the HeapLocationCollector.
*/
-namespace art {
+namespace art HIDDEN {
#define LSE_VLOG \
if (::art::LoadStoreElimination::kVerboseLoggingMode && VLOG_IS_ON(compiler)) LOG(INFO)
@@ -855,25 +855,6 @@ class LSEVisitor final : private HGraphDelegateVisitor {
}
}
- // `instruction` is being removed. Try to see if the null check on it
- // can be removed. This can happen if the same value is set in two branches
- // but not in dominators. Such as:
- // int[] a = foo();
- // if () {
- // a[0] = 2;
- // } else {
- // a[0] = 2;
- // }
- // // a[0] can now be replaced with constant 2, and the null check on it can be removed.
- void TryRemovingNullCheck(HInstruction* instruction) {
- HInstruction* prev = instruction->GetPrevious();
- if ((prev != nullptr) && prev->IsNullCheck() && (prev == instruction->InputAt(0))) {
- // Previous instruction is a null check for this instruction. Remove the null check.
- prev->ReplaceWith(prev->InputAt(0));
- prev->GetBlock()->RemoveInstruction(prev);
- }
- }
-
HInstruction* GetDefaultValue(DataType::Type type) {
switch (type) {
case DataType::Type::kReference:
@@ -993,13 +974,63 @@ class LSEVisitor final : private HGraphDelegateVisitor {
<< " but LSE should be the only source of predicated-ifield-gets!";
}
+ void HandleAcquireLoad(HInstruction* instruction) {
+ DCHECK((instruction->IsInstanceFieldGet() && instruction->AsInstanceFieldGet()->IsVolatile()) ||
+ (instruction->IsStaticFieldGet() && instruction->AsStaticFieldGet()->IsVolatile()) ||
+ (instruction->IsMonitorOperation() && instruction->AsMonitorOperation()->IsEnter()))
+ << "Unexpected instruction " << instruction->GetId() << ": " << instruction->DebugName();
+
+ // Acquire operations e.g. MONITOR_ENTER change the thread's view of the memory, so we must
+ // invalidate all current values.
+ ScopedArenaVector<ValueRecord>& heap_values =
+ heap_values_for_[instruction->GetBlock()->GetBlockId()];
+ for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
+ KeepStores(heap_values[i].stored_by);
+ heap_values[i].stored_by = Value::PureUnknown();
+ heap_values[i].value = Value::PartialUnknown(heap_values[i].value);
+ }
+
+ // Note that there's no need to record the load as subsequent acquire loads shouldn't be
+ // eliminated either.
+ }
+
+ void HandleReleaseStore(HInstruction* instruction) {
+ DCHECK((instruction->IsInstanceFieldSet() && instruction->AsInstanceFieldSet()->IsVolatile()) ||
+ (instruction->IsStaticFieldSet() && instruction->AsStaticFieldSet()->IsVolatile()) ||
+ (instruction->IsMonitorOperation() && !instruction->AsMonitorOperation()->IsEnter()))
+ << "Unexpected instruction " << instruction->GetId() << ": " << instruction->DebugName();
+
+ // Release operations e.g. MONITOR_EXIT do not affect this thread's view of the memory, but
+ // they will push the modifications for other threads to see. Therefore, we must keep the
+ // stores but there's no need to clobber the value.
+ ScopedArenaVector<ValueRecord>& heap_values =
+ heap_values_for_[instruction->GetBlock()->GetBlockId()];
+ for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
+ KeepStores(heap_values[i].stored_by);
+ heap_values[i].stored_by = Value::PureUnknown();
+ }
+
+ // Note that there's no need to record the store as subsequent release store shouldn't be
+ // eliminated either.
+ }
+
void VisitInstanceFieldGet(HInstanceFieldGet* instruction) override {
+ if (instruction->IsVolatile()) {
+ HandleAcquireLoad(instruction);
+ return;
+ }
+
HInstruction* object = instruction->InputAt(0);
const FieldInfo& field = instruction->GetFieldInfo();
VisitGetLocation(instruction, heap_location_collector_.GetFieldHeapLocation(object, &field));
}
void VisitInstanceFieldSet(HInstanceFieldSet* instruction) override {
+ if (instruction->IsVolatile()) {
+ HandleReleaseStore(instruction);
+ return;
+ }
+
HInstruction* object = instruction->InputAt(0);
const FieldInfo& field = instruction->GetFieldInfo();
HInstruction* value = instruction->InputAt(1);
@@ -1008,12 +1039,22 @@ class LSEVisitor final : private HGraphDelegateVisitor {
}
void VisitStaticFieldGet(HStaticFieldGet* instruction) override {
+ if (instruction->IsVolatile()) {
+ HandleAcquireLoad(instruction);
+ return;
+ }
+
HInstruction* cls = instruction->InputAt(0);
const FieldInfo& field = instruction->GetFieldInfo();
VisitGetLocation(instruction, heap_location_collector_.GetFieldHeapLocation(cls, &field));
}
void VisitStaticFieldSet(HStaticFieldSet* instruction) override {
+ if (instruction->IsVolatile()) {
+ HandleReleaseStore(instruction);
+ return;
+ }
+
HInstruction* cls = instruction->InputAt(0);
const FieldInfo& field = instruction->GetFieldInfo();
HInstruction* value = instruction->InputAt(1);
@@ -1021,6 +1062,14 @@ class LSEVisitor final : private HGraphDelegateVisitor {
VisitSetLocation(instruction, idx, value);
}
+ void VisitMonitorOperation(HMonitorOperation* monitor_op) override {
+ if (monitor_op->IsEnter()) {
+ HandleAcquireLoad(monitor_op);
+ } else {
+ HandleReleaseStore(monitor_op);
+ }
+ }
+
void VisitArrayGet(HArrayGet* instruction) override {
VisitGetLocation(instruction, heap_location_collector_.GetArrayHeapLocation(instruction));
}
@@ -1040,8 +1089,8 @@ class LSEVisitor final : private HGraphDelegateVisitor {
}
void VisitDeoptimize(HDeoptimize* instruction) override {
- // If we are in a try catch, even singletons are observable.
- const bool in_try_catch = instruction->GetBlock()->GetTryCatchInformation() != nullptr;
+ // If we are in a try, even singletons are observable.
+ const bool inside_a_try = instruction->GetBlock()->IsTryBlock();
HBasicBlock* block = instruction->GetBlock();
ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
@@ -1053,7 +1102,7 @@ class LSEVisitor final : private HGraphDelegateVisitor {
// for singletons that don't escape in the deoptimization environment.
bool observable = true;
ReferenceInfo* info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
- if (!in_try_catch && info->IsSingleton()) {
+ if (!inside_a_try && info->IsSingleton()) {
HInstruction* reference = info->GetReference();
// Finalizable objects always escape.
const bool finalizable_object =
@@ -1099,10 +1148,8 @@ class LSEVisitor final : private HGraphDelegateVisitor {
void HandleThrowingInstruction(HInstruction* instruction) {
DCHECK(instruction->CanThrow());
- // If we are inside of a try catch, singletons can become visible since we may not exit the
- // method.
- HandleExit(instruction->GetBlock(),
- instruction->GetBlock()->GetTryCatchInformation() != nullptr);
+ // If we are inside of a try, singletons can become visible since we may not exit the method.
+ HandleExit(instruction->GetBlock(), instruction->GetBlock()->IsTryBlock());
}
void VisitMethodEntryHook(HMethodEntryHook* method_entry) override {
@@ -1137,6 +1184,14 @@ class LSEVisitor final : private HGraphDelegateVisitor {
}
}
+ void VisitLoadMethodHandle(HLoadMethodHandle* load_method_handle) override {
+ HandleThrowingInstruction(load_method_handle);
+ }
+
+ void VisitLoadMethodType(HLoadMethodType* load_method_type) override {
+ HandleThrowingInstruction(load_method_type);
+ }
+
void VisitStringBuilderAppend(HStringBuilderAppend* sb_append) override {
HandleThrowingInstruction(sb_append);
}
@@ -1149,18 +1204,11 @@ class LSEVisitor final : private HGraphDelegateVisitor {
HandleThrowingInstruction(check_cast);
}
- void VisitMonitorOperation(HMonitorOperation* monitor_op) override {
- if (monitor_op->CanThrow()) {
- HandleThrowingInstruction(monitor_op);
- }
- }
-
void HandleInvoke(HInstruction* instruction) {
// If `instruction` can throw we have to presume all stores are visible.
const bool can_throw = instruction->CanThrow();
- // If we are in a try catch, even singletons are observable.
- const bool can_throw_in_try_catch =
- can_throw && instruction->GetBlock()->GetTryCatchInformation() != nullptr;
+ // If we are in a try, even singletons are observable.
+ const bool can_throw_inside_a_try = can_throw && instruction->GetBlock()->IsTryBlock();
SideEffects side_effects = instruction->GetSideEffects();
ScopedArenaVector<ValueRecord>& heap_values =
heap_values_for_[instruction->GetBlock()->GetBlockId()];
@@ -1186,7 +1234,7 @@ class LSEVisitor final : private HGraphDelegateVisitor {
return cohort.PrecedesBlock(blk);
});
};
- if (!can_throw_in_try_catch &&
+ if (!can_throw_inside_a_try &&
(ref_info->IsSingleton() ||
// partial and we aren't currently escaping and we haven't escaped yet.
(ref_info->IsPartialSingleton() && partial_singleton_did_not_escape(ref_info, blk)))) {
@@ -1235,8 +1283,8 @@ class LSEVisitor final : private HGraphDelegateVisitor {
}
void VisitNewInstance(HNewInstance* new_instance) override {
- // If we are in a try catch, even singletons are observable.
- const bool in_try_catch = new_instance->GetBlock()->GetTryCatchInformation() != nullptr;
+ // If we are in a try, even singletons are observable.
+ const bool inside_a_try = new_instance->GetBlock()->IsTryBlock();
ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(new_instance);
if (ref_info == nullptr) {
// new_instance isn't used for field accesses. No need to process it.
@@ -1265,7 +1313,7 @@ class LSEVisitor final : private HGraphDelegateVisitor {
heap_values[i].value = Value::ForInstruction(new_instance->GetLoadClass());
heap_values[i].stored_by = Value::PureUnknown();
}
- } else if (in_try_catch || IsEscapingObject(info, block, i)) {
+ } else if (inside_a_try || IsEscapingObject(info, block, i)) {
// Since NewInstance can throw, we presume all previous stores could be visible.
KeepStores(heap_values[i].stored_by);
heap_values[i].stored_by = Value::PureUnknown();
@@ -1274,8 +1322,8 @@ class LSEVisitor final : private HGraphDelegateVisitor {
}
void VisitNewArray(HNewArray* new_array) override {
- // If we are in a try catch, even singletons are observable.
- const bool in_try_catch = new_array->GetBlock()->GetTryCatchInformation() != nullptr;
+ // If we are in a try, even singletons are observable.
+ const bool inside_a_try = new_array->GetBlock()->IsTryBlock();
ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(new_array);
if (ref_info == nullptr) {
// new_array isn't used for array accesses. No need to process it.
@@ -1300,7 +1348,7 @@ class LSEVisitor final : private HGraphDelegateVisitor {
// Array elements are set to default heap values.
heap_values[i].value = Value::Default();
heap_values[i].stored_by = Value::PureUnknown();
- } else if (in_try_catch || IsEscapingObject(info, block, i)) {
+ } else if (inside_a_try || IsEscapingObject(info, block, i)) {
// Since NewArray can throw, we presume all previous stores could be visible.
KeepStores(heap_values[i].stored_by);
heap_values[i].stored_by = Value::PureUnknown();
@@ -1704,8 +1752,7 @@ void LSEVisitor::MergePredecessorRecords(HBasicBlock* block) {
ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
DCHECK(heap_values.empty());
size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations();
- if (block->GetPredecessors().empty() || (block->GetTryCatchInformation() != nullptr &&
- block->GetTryCatchInformation()->IsCatchBlock())) {
+ if (block->GetPredecessors().empty() || block->IsCatchBlock()) {
DCHECK_IMPLIES(block->GetPredecessors().empty(), block->IsEntryBlock());
heap_values.resize(num_heap_locations,
{/*value=*/Value::PureUnknown(), /*stored_by=*/Value::PureUnknown()});
@@ -1764,7 +1811,6 @@ static HInstruction* FindOrConstructNonLoopPhi(
if (type == DataType::Type::kReference) {
// Update reference type information. Pass invalid handles, these are not used for Phis.
ReferenceTypePropagation rtp_fixup(block->GetGraph(),
- Handle<mirror::ClassLoader>(),
Handle<mirror::DexCache>(),
/* is_first_run= */ false);
rtp_fixup.Visit(phi);
@@ -1877,7 +1923,6 @@ void LSEVisitor::VisitGetLocation(HInstruction* instruction, size_t idx) {
}
HInstruction* heap_value = FindSubstitute(record.value.GetInstruction());
AddRemovedLoad(instruction, heap_value);
- TryRemovingNullCheck(instruction);
}
}
@@ -2068,9 +2113,15 @@ bool LSEVisitor::TryReplacingLoopPhiPlaceholderWithDefault(
HInstruction* replacement = GetDefaultValue(type);
for (uint32_t phi_placeholder_index : visited.Indexes()) {
DCHECK(phi_placeholder_replacements_[phi_placeholder_index].IsInvalid());
- phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement);
+ PhiPlaceholder curr = GetPhiPlaceholderAt(phi_placeholder_index);
+ HeapLocation* hl = heap_location_collector_.GetHeapLocation(curr.GetHeapLocation());
+ // We use both vector and non vector operations to analyze the information. However, we replace
+ // only non vector operations in this code path.
+ if (!hl->IsVecOp()) {
+ phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement);
+ phi_placeholders_to_materialize->ClearBit(phi_placeholder_index);
+ }
}
- phi_placeholders_to_materialize->Subtract(&visited);
return true;
}
@@ -2125,9 +2176,15 @@ bool LSEVisitor::TryReplacingLoopPhiPlaceholderWithSingleInput(
DCHECK(replacement != nullptr);
for (uint32_t phi_placeholder_index : visited.Indexes()) {
DCHECK(phi_placeholder_replacements_[phi_placeholder_index].IsInvalid());
- phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement);
+ PhiPlaceholder curr = GetPhiPlaceholderAt(phi_placeholder_index);
+ HeapLocation* hl = heap_location_collector_.GetHeapLocation(curr.GetHeapLocation());
+ // We use both vector and non vector operations to analyze the information. However, we replace
+ // only vector operations in this code path.
+ if (hl->IsVecOp()) {
+ phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement);
+ phi_placeholders_to_materialize->ClearBit(phi_placeholder_index);
+ }
}
- phi_placeholders_to_materialize->Subtract(&visited);
return true;
}
@@ -2352,7 +2409,6 @@ bool LSEVisitor::MaterializeLoopPhis(ArrayRef<const size_t> phi_placeholder_inde
}
// Update reference type information. Pass invalid handles, these are not used for Phis.
ReferenceTypePropagation rtp_fixup(GetGraph(),
- Handle<mirror::ClassLoader>(),
Handle<mirror::DexCache>(),
/* is_first_run= */ false);
rtp_fixup.Visit(ArrayRef<HInstruction* const>(phis));
@@ -2639,7 +2695,6 @@ void LSEVisitor::ProcessLoopPhiWithUnknownInput(PhiPlaceholder loop_phi_with_unk
record.value = local_heap_values[idx];
HInstruction* heap_value = local_heap_values[idx].GetInstruction();
AddRemovedLoad(load_or_store, heap_value);
- TryRemovingNullCheck(load_or_store);
}
}
}
@@ -2698,7 +2753,6 @@ void LSEVisitor::ProcessLoadsRequiringLoopPhis() {
record.value = Replacement(record.value);
HInstruction* heap_value = record.value.GetInstruction();
AddRemovedLoad(load, heap_value);
- TryRemovingNullCheck(load);
}
}
}
@@ -3013,7 +3067,6 @@ class PartialLoadStoreEliminationHelper {
return;
}
ReferenceTypePropagation rtp_fixup(GetGraph(),
- Handle<mirror::ClassLoader>(),
Handle<mirror::DexCache>(),
/* is_first_run= */ false);
rtp_fixup.Visit(ArrayRef<HInstruction* const>(new_ref_phis_));
@@ -3333,7 +3386,7 @@ class PartialLoadStoreEliminationHelper {
ins->GetBlock()->InsertInstructionBefore(new_fget, ins);
if (ins->GetType() == DataType::Type::kReference) {
// Reference info is the same
- new_fget->SetReferenceTypeInfo(ins->GetReferenceTypeInfo());
+ new_fget->SetReferenceTypeInfoIfValid(ins->GetReferenceTypeInfo());
}
// In this phase, substitute instructions are used only for the predicated get
// default values which are used only if the partial singleton did not escape,
diff --git a/compiler/optimizing/load_store_elimination.h b/compiler/optimizing/load_store_elimination.h
index 6ad2eb2c51..42de803ebd 100644
--- a/compiler/optimizing/load_store_elimination.h
+++ b/compiler/optimizing/load_store_elimination.h
@@ -17,9 +17,10 @@
#ifndef ART_COMPILER_OPTIMIZING_LOAD_STORE_ELIMINATION_H_
#define ART_COMPILER_OPTIMIZING_LOAD_STORE_ELIMINATION_H_
+#include "base/macros.h"
#include "optimization.h"
-namespace art {
+namespace art HIDDEN {
class SideEffectsAnalysis;
diff --git a/compiler/optimizing/load_store_elimination_test.cc b/compiler/optimizing/load_store_elimination_test.cc
index 02dc939878..1ee109980f 100644
--- a/compiler/optimizing/load_store_elimination_test.cc
+++ b/compiler/optimizing/load_store_elimination_test.cc
@@ -36,7 +36,9 @@
#include "optimizing_unit_test.h"
#include "scoped_thread_state_change.h"
-namespace art {
+namespace art HIDDEN {
+
+static constexpr bool kDebugLseTests = false;
#define CHECK_SUBROUTINE_FAILURE() \
do { \
@@ -54,12 +56,16 @@ class LoadStoreEliminationTestBase : public SuperTest, public OptimizingUnitTest
void SetUp() override {
SuperTest::SetUp();
- gLogVerbosity.compiler = true;
+ if (kDebugLseTests) {
+ gLogVerbosity.compiler = true;
+ }
}
void TearDown() override {
SuperTest::TearDown();
- gLogVerbosity.compiler = false;
+ if (kDebugLseTests) {
+ gLogVerbosity.compiler = false;
+ }
}
void PerformLSE(bool with_partial = true) {
@@ -67,15 +73,40 @@ class LoadStoreEliminationTestBase : public SuperTest, public OptimizingUnitTest
LoadStoreElimination lse(graph_, /*stats=*/nullptr);
lse.Run(with_partial);
std::ostringstream oss;
- EXPECT_TRUE(CheckGraphSkipRefTypeInfoChecks(oss)) << oss.str();
+ EXPECT_TRUE(CheckGraph(oss)) << oss.str();
}
- void PerformLSEWithPartial() {
- PerformLSE(true);
+ void PerformLSEWithPartial(const AdjacencyListGraph& blks) {
+ // PerformLSE expects this to be empty.
+ graph_->ClearDominanceInformation();
+ if (kDebugLseTests) {
+ LOG(INFO) << "Pre LSE " << blks;
+ }
+ PerformLSE(/*with_partial=*/ true);
+ if (kDebugLseTests) {
+ LOG(INFO) << "Post LSE " << blks;
+ }
}
- void PerformLSENoPartial() {
- PerformLSE(false);
+ void PerformLSENoPartial(const AdjacencyListGraph& blks) {
+ // PerformLSE expects this to be empty.
+ graph_->ClearDominanceInformation();
+ if (kDebugLseTests) {
+ LOG(INFO) << "Pre LSE " << blks;
+ }
+ PerformLSE(/*with_partial=*/ false);
+ if (kDebugLseTests) {
+ LOG(INFO) << "Post LSE " << blks;
+ }
+ }
+
+ void PerformSimplifications(const AdjacencyListGraph& blks) {
+ InstructionSimplifier simp(graph_, /*codegen=*/nullptr);
+ simp.Run();
+
+ if (kDebugLseTests) {
+ LOG(INFO) << "Post simplification " << blks;
+ }
}
// Create instructions shared among tests.
@@ -542,6 +573,7 @@ TEST_F(LoadStoreEliminationTest, SameHeapValue2) {
AddVecStore(entry_block_, array_, j_);
HInstruction* vstore = AddVecStore(entry_block_, array_, i_);
+ graph_->SetHasSIMD(true);
PerformLSE();
ASSERT_FALSE(IsRemoved(vstore));
@@ -557,6 +589,7 @@ TEST_F(LoadStoreEliminationTest, SameHeapValue3) {
AddVecStore(entry_block_, array_, i_add1_);
HInstruction* vstore = AddVecStore(entry_block_, array_, i_);
+ graph_->SetHasSIMD(true);
PerformLSE();
ASSERT_FALSE(IsRemoved(vstore));
@@ -601,6 +634,7 @@ TEST_F(LoadStoreEliminationTest, OverlappingLoadStore) {
AddArraySet(entry_block_, array_, i_, c1);
HInstruction* vload5 = AddVecLoad(entry_block_, array_, i_);
+ graph_->SetHasSIMD(true);
PerformLSE();
ASSERT_TRUE(IsRemoved(load1));
@@ -634,6 +668,7 @@ TEST_F(LoadStoreEliminationTest, StoreAfterLoopWithoutSideEffects) {
// a[j] = 1;
HInstruction* array_set = AddArraySet(return_block_, array_, j_, c1);
+ graph_->SetHasSIMD(true);
PerformLSE();
ASSERT_TRUE(IsRemoved(array_set));
@@ -671,6 +706,7 @@ TEST_F(LoadStoreEliminationTest, StoreAfterSIMDLoopWithSideEffects) {
// a[j] = 0;
HInstruction* a_set = AddArraySet(return_block_, array_, j_, c0);
+ graph_->SetHasSIMD(true);
PerformLSE();
ASSERT_TRUE(IsRemoved(vload));
@@ -709,6 +745,7 @@ TEST_F(LoadStoreEliminationTest, LoadAfterSIMDLoopWithSideEffects) {
// x = a[j];
HInstruction* load = AddArrayGet(return_block_, array_, j_);
+ graph_->SetHasSIMD(true);
PerformLSE();
ASSERT_TRUE(IsRemoved(vload));
@@ -749,6 +786,7 @@ TEST_F(LoadStoreEliminationTest, MergePredecessorVecStores) {
// down: a[i,... i + 3] = [1,...1]
HInstruction* vstore4 = AddVecStore(down, array_, i_, vdata);
+ graph_->SetHasSIMD(true);
PerformLSE();
ASSERT_TRUE(IsRemoved(vstore2));
@@ -839,6 +877,7 @@ TEST_F(LoadStoreEliminationTest, RedundantVStoreVLoadInLoop) {
HInstruction* vstore2 = AddVecStore(loop_, array_b, phi_, vload->AsVecLoad());
HInstruction* vstore3 = AddVecStore(loop_, array_a, phi_, vstore1->InputAt(2));
+ graph_->SetHasSIMD(true);
PerformLSE();
ASSERT_FALSE(IsRemoved(vstore1));
@@ -894,7 +933,7 @@ TEST_F(LoadStoreEliminationTest, StoreAfterLoopWithSideEffects2) {
// loop:
// array2[i] = array[i]
// array[0] = 2
- HInstruction* store1 = AddArraySet(entry_block_, array_, c0, c2);
+ HInstruction* store1 = AddArraySet(pre_header_, array_, c0, c2);
HInstruction* load = AddArrayGet(loop_, array_, phi_);
HInstruction* store2 = AddArraySet(loop_, array2, phi_, load);
@@ -926,6 +965,7 @@ TEST_F(LoadStoreEliminationTest, VLoadDefaultValueInLoopWithoutWriteSideEffects)
HInstruction* vload = AddVecLoad(loop_, array_a, phi_);
HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload->AsVecLoad());
+ graph_->SetHasSIMD(true);
PerformLSE();
ASSERT_FALSE(IsRemoved(vload));
@@ -949,6 +989,7 @@ TEST_F(LoadStoreEliminationTest, VLoadDefaultValue) {
HInstruction* vload = AddVecLoad(pre_header_, array_a, c0);
HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload->AsVecLoad());
+ graph_->SetHasSIMD(true);
PerformLSE();
ASSERT_FALSE(IsRemoved(vload));
@@ -1025,6 +1066,7 @@ TEST_F(LoadStoreEliminationTest, VLoadAndLoadDefaultValueInLoopWithoutWriteSideE
HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload->AsVecLoad());
HInstruction* store = AddArraySet(return_block_, array_, c0, load);
+ graph_->SetHasSIMD(true);
PerformLSE();
ASSERT_FALSE(IsRemoved(vload));
@@ -1055,6 +1097,7 @@ TEST_F(LoadStoreEliminationTest, VLoadAndLoadDefaultValue) {
HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload->AsVecLoad());
HInstruction* store = AddArraySet(return_block_, array_, c0, load);
+ graph_->SetHasSIMD(true);
PerformLSE();
ASSERT_FALSE(IsRemoved(vload));
@@ -1086,6 +1129,7 @@ TEST_F(LoadStoreEliminationTest, VLoadDefaultValueAndVLoadInLoopWithoutWriteSide
HInstruction* vstore1 = AddVecStore(return_block_, array_, c0, vload1->AsVecLoad());
HInstruction* vstore2 = AddVecStore(return_block_, array_, c128, vload2->AsVecLoad());
+ graph_->SetHasSIMD(true);
PerformLSE();
ASSERT_FALSE(IsRemoved(vload1));
@@ -1116,6 +1160,7 @@ TEST_F(LoadStoreEliminationTest, VLoadDefaultValueAndVLoad) {
HInstruction* vstore1 = AddVecStore(return_block_, array_, c0, vload1->AsVecLoad());
HInstruction* vstore2 = AddVecStore(return_block_, array_, c128, vload2->AsVecLoad());
+ graph_->SetHasSIMD(true);
PerformLSE();
ASSERT_FALSE(IsRemoved(vload1));
@@ -2024,10 +2069,7 @@ TEST_F(LoadStoreEliminationTest, PartialUnknownMerge) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSENoPartial();
+ PerformLSENoPartial(blks);
EXPECT_INS_RETAINED(read_bottom);
EXPECT_INS_RETAINED(write_c1);
@@ -2174,9 +2216,8 @@ TEST_F(LoadStoreEliminationTest, PartialLoadPreserved) {
HInstruction* return_exit = new (GetAllocator()) HReturn(read_bottom);
exit->AddInstruction(read_bottom);
exit->AddInstruction(return_exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- PerformLSENoPartial();
+
+ PerformLSENoPartial(blks);
EXPECT_INS_RETAINED(read_bottom) << *read_bottom;
EXPECT_INS_RETAINED(write_right) << *write_right;
@@ -2266,9 +2307,8 @@ TEST_F(LoadStoreEliminationTest, PartialLoadPreserved2) {
HInstruction* return_exit = new (GetAllocator()) HReturn(read_bottom);
exit->AddInstruction(read_bottom);
exit->AddInstruction(return_exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- PerformLSENoPartial();
+
+ PerformLSENoPartial(blks);
EXPECT_INS_RETAINED(read_bottom);
EXPECT_INS_RETAINED(write_right_first);
@@ -2499,11 +2539,7 @@ TEST_F(LoadStoreEliminationTest, PartialPhiPropagation) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
HPredicatedInstanceFieldGet* pred_get =
FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_);
@@ -2656,11 +2692,7 @@ TEST_P(OrderDependentTestGroup, PredicatedUse) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
EXPECT_INS_RETAINED(call_left_left);
EXPECT_INS_REMOVED(read1);
@@ -2814,11 +2846,7 @@ TEST_P(OrderDependentTestGroup, PredicatedEnvUse) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
HNewInstance* moved_new_inst1;
HInstanceFieldSet* moved_set1;
@@ -2954,11 +2982,7 @@ TEST_P(OrderDependentTestGroup, FieldSetOrderEnv) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
EXPECT_INS_REMOVED(write_entry1);
EXPECT_INS_REMOVED(write_entry2);
@@ -3115,11 +3139,7 @@ TEST_P(OrderDependentTestGroup, MaterializationMovedUse) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
EXPECT_INS_REMOVED(new_inst1);
EXPECT_INS_REMOVED(new_inst2);
@@ -3205,11 +3225,7 @@ TEST_F(LoadStoreEliminationTest, MovePredicatedAlloc) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
HNewInstance* moved_new_inst = nullptr;
HInstanceFieldSet* moved_set = nullptr;
@@ -3320,11 +3336,7 @@ TEST_F(LoadStoreEliminationTest, MutiPartialLoadStore) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
HNewInstance* moved_new_inst = nullptr;
HInstanceFieldSet* moved_set = nullptr;
@@ -3497,11 +3509,7 @@ TEST_F(LoadStoreEliminationTest, MutiPartialLoadStore2) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
HNewInstance* moved_new_inst = nullptr;
HInstanceFieldSet* moved_set = nullptr;
@@ -3639,11 +3647,7 @@ TEST_F(LoadStoreEliminationTest, MovePredicatedAlloc2) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
HNewInstance* moved_new_inst;
HInstanceFieldSet* moved_set;
@@ -3746,11 +3750,7 @@ TEST_F(LoadStoreEliminationTest, MovePredicatedAlloc3) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
// Each escaping switch path gets its own materialization block.
// Blocks:
@@ -3877,11 +3877,7 @@ TEST_F(LoadStoreEliminationTest, MovePredicatedAlloc4) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
EXPECT_INS_REMOVED(read_early);
EXPECT_EQ(return_early->InputAt(0), c0);
@@ -4013,11 +4009,7 @@ TEST_F(LoadStoreEliminationTest, MovePredicatedAlloc5) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
// Normal LSE can get rid of these two.
EXPECT_INS_REMOVED(store_one);
@@ -4504,9 +4496,7 @@ TEST_F(LoadStoreEliminationTest, PartialLoadPreserved3) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- PerformLSENoPartial();
+ PerformLSENoPartial(blks);
EXPECT_INS_RETAINED(write_left_pre) << *write_left_pre;
EXPECT_INS_RETAINED(read_return) << *read_return;
@@ -4612,9 +4602,7 @@ TEST_F(LoadStoreEliminationTest, DISABLED_PartialLoadPreserved4) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- PerformLSENoPartial();
+ PerformLSENoPartial(blks);
EXPECT_INS_RETAINED(read_return);
EXPECT_INS_RETAINED(write_right);
@@ -4700,9 +4688,7 @@ TEST_F(LoadStoreEliminationTest, PartialLoadPreserved5) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- PerformLSENoPartial();
+ PerformLSENoPartial(blks);
EXPECT_INS_RETAINED(read_bottom);
EXPECT_INS_RETAINED(write_right);
@@ -4785,12 +4771,7 @@ TEST_F(LoadStoreEliminationTest, DISABLED_PartialLoadPreserved6) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
-
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSENoPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSENoPartial(blks);
EXPECT_INS_REMOVED(read_bottom);
EXPECT_INS_REMOVED(write_right);
@@ -4829,8 +4810,9 @@ TEST_P(PartialComparisonTestGroup, PartialComparisonBeforeCohort) {
CreateGraph(/*handles=*/&vshs);
AdjacencyListGraph blks(SetupFromAdjacencyList("entry",
"exit",
- {{"entry", "critical_break"},
- {"entry", "partial"},
+ {{"entry", "first_block"},
+ {"first_block", "critical_break"},
+ {"first_block", "partial"},
{"partial", "merge"},
{"critical_break", "merge"},
{"merge", "left"},
@@ -4839,7 +4821,7 @@ TEST_P(PartialComparisonTestGroup, PartialComparisonBeforeCohort) {
{"right", "breturn"},
{"breturn", "exit"}}));
#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name)
- GET_BLOCK(entry);
+ GET_BLOCK(first_block);
GET_BLOCK(merge);
GET_BLOCK(partial);
GET_BLOCK(critical_break);
@@ -4858,12 +4840,12 @@ TEST_P(PartialComparisonTestGroup, PartialComparisonBeforeCohort) {
HInstruction* write_entry = MakeIFieldSet(new_inst, c3, MemberOffset(32));
ComparisonInstructions cmp_instructions = GetComparisonInstructions(new_inst);
HInstruction* if_inst = new (GetAllocator()) HIf(cmp_instructions.cmp_);
- entry->AddInstruction(cls);
- entry->AddInstruction(new_inst);
- entry->AddInstruction(write_entry);
- cmp_instructions.AddSetup(entry);
- entry->AddInstruction(cmp_instructions.cmp_);
- entry->AddInstruction(if_inst);
+ first_block->AddInstruction(cls);
+ first_block->AddInstruction(new_inst);
+ first_block->AddInstruction(write_entry);
+ cmp_instructions.AddSetup(first_block);
+ first_block->AddInstruction(cmp_instructions.cmp_);
+ first_block->AddInstruction(if_inst);
ManuallyBuildEnvFor(cls, {});
cmp_instructions.AddEnvironment(cls->GetEnvironment());
new_inst->CopyEnvironmentFrom(cls->GetEnvironment());
@@ -4897,12 +4879,7 @@ TEST_P(PartialComparisonTestGroup, PartialComparisonBeforeCohort) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
-
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
std::vector<HPhi*> merges;
HPredicatedInstanceFieldGet* pred_get;
@@ -5026,11 +5003,7 @@ TEST_P(PartialComparisonTestGroup, PartialComparisonInCohortBeforeEscape) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
std::vector<HPhi*> merges;
HInstanceFieldSet* init_set =
@@ -5157,11 +5130,7 @@ TEST_P(PartialComparisonTestGroup, PartialComparisonAfterCohort) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
std::vector<HPhi*> merges;
HInstanceFieldSet* init_set =
@@ -5290,12 +5259,7 @@ TEST_P(PartialComparisonTestGroup, PartialComparisonInCohortAfterEscape) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
-
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
std::vector<HPhi*> merges;
std::vector<HInstanceFieldSet*> sets;
@@ -5424,12 +5388,7 @@ TEST_F(LoadStoreEliminationTest, PredicatedStore1) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
-
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
EXPECT_INS_RETAINED(write_bottom);
EXPECT_TRUE(write_bottom->AsInstanceFieldSet()->GetIsPredicatedSet());
@@ -5539,11 +5498,7 @@ TEST_F(LoadStoreEliminationTest, PredicatedStore2) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
EXPECT_INS_RETAINED(write_bottom);
EXPECT_TRUE(write_bottom->AsInstanceFieldSet()->GetIsPredicatedSet()) << *write_bottom;
@@ -5627,11 +5582,7 @@ TEST_F(LoadStoreEliminationTest, PredicatedLoad1) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
EXPECT_INS_REMOVED(read_bottom);
EXPECT_INS_REMOVED(write_right);
@@ -5748,11 +5699,7 @@ TEST_F(LoadStoreEliminationTest, MultiPredicatedLoad1) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
EXPECT_INS_REMOVED(read_bottom1);
EXPECT_INS_REMOVED(read_bottom2);
@@ -5901,11 +5848,7 @@ TEST_F(LoadStoreEliminationTest, MultiPredicatedLoad2) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
EXPECT_INS_REMOVED(read_bottom1);
EXPECT_INS_REMOVED(read_bottom2);
@@ -6078,11 +6021,7 @@ TEST_F(LoadStoreEliminationTest, MultiPredicatedLoad3) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
EXPECT_INS_REMOVED(early_exit_left_read);
EXPECT_INS_REMOVED(early_exit_right_read);
@@ -6212,11 +6151,7 @@ TEST_F(LoadStoreEliminationTest, PredicatedLoad4) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
EXPECT_INS_REMOVED(read_bottom);
EXPECT_INS_REMOVED(read_right);
@@ -6334,11 +6269,7 @@ TEST_F(LoadStoreEliminationTest, MultiPredicatedLoad4) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
EXPECT_INS_REMOVED(read_bottom);
EXPECT_INS_REMOVED(read_early_return);
@@ -6447,11 +6378,7 @@ TEST_F(LoadStoreEliminationTest, PredicatedLoad2) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
EXPECT_INS_REMOVED(read_bottom);
EXPECT_INS_REMOVED(write_right);
@@ -6585,11 +6512,7 @@ TEST_F(LoadStoreEliminationTest, PredicatedLoad3) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
EXPECT_INS_REMOVED(read_bottom);
EXPECT_INS_REMOVED(write_right);
@@ -6688,11 +6611,7 @@ TEST_F(LoadStoreEliminationTest, PredicatedLoadDefaultValue) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
EXPECT_INS_REMOVED(read_bottom);
EXPECT_INS_RETAINED(write_left);
@@ -6861,11 +6780,7 @@ TEST_F(LoadStoreEliminationTest, PartialLoopPhis1) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
HPredicatedInstanceFieldGet* pred_get =
FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn);
@@ -7045,11 +6960,7 @@ TEST_F(LoadStoreEliminationTest, PartialLoopPhis2) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
HPredicatedInstanceFieldGet* pred_get =
FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn);
@@ -7196,11 +7107,7 @@ TEST_F(LoadStoreEliminationTest, PartialLoopPhis3) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
HPredicatedInstanceFieldGet* pred_get =
FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn);
@@ -7344,11 +7251,7 @@ TEST_F(LoadStoreEliminationTest, PartialLoopPhis4) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
HPredicatedInstanceFieldGet* pred_get =
FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn);
@@ -7492,11 +7395,7 @@ TEST_F(LoadStoreEliminationTest, PartialLoopPhis5) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
HPredicatedInstanceFieldGet* pred_get =
FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn);
@@ -7657,11 +7556,7 @@ TEST_F(LoadStoreEliminationTest, PartialLoopPhis6) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSEWithPartial();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
HPredicatedInstanceFieldGet* pred_get =
FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn);
@@ -7757,17 +7652,10 @@ TEST_F(LoadStoreEliminationTest, SimplifyTest) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSE();
+ PerformLSEWithPartial(blks);
// Run the code-simplifier too
- LOG(INFO) << "Pre simplification " << blks;
- InstructionSimplifier simp(graph_, /*codegen=*/nullptr);
- simp.Run();
-
- LOG(INFO) << "Post LSE " << blks;
+ PerformSimplifications(blks);
EXPECT_INS_REMOVED(write_right);
EXPECT_INS_REMOVED(write_start);
@@ -7851,17 +7739,10 @@ TEST_F(LoadStoreEliminationTest, SimplifyTest2) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSE();
+ PerformLSEWithPartial(blks);
// Run the code-simplifier too
- LOG(INFO) << "Pre simplification " << blks;
- InstructionSimplifier simp(graph_, /*codegen=*/nullptr);
- simp.Run();
-
- LOG(INFO) << "Post LSE " << blks;
+ PerformSimplifications(blks);
EXPECT_INS_REMOVED(write_right);
EXPECT_INS_REMOVED(write_start);
@@ -7961,17 +7842,10 @@ TEST_F(LoadStoreEliminationTest, SimplifyTest3) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSE();
+ PerformLSEWithPartial(blks);
// Run the code-simplifier too
- LOG(INFO) << "Pre simplification " << blks;
- InstructionSimplifier simp(graph_, /*codegen=*/nullptr);
- simp.Run();
-
- LOG(INFO) << "Post LSE " << blks;
+ PerformSimplifications(blks);
EXPECT_INS_REMOVED(write_case2);
EXPECT_INS_REMOVED(write_case3);
@@ -8069,17 +7943,10 @@ TEST_F(LoadStoreEliminationTest, SimplifyTest4) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSE();
+ PerformLSEWithPartial(blks);
// Run the code-simplifier too
- LOG(INFO) << "Pre simplification " << blks;
- InstructionSimplifier simp(graph_, /*codegen=*/nullptr);
- simp.Run();
-
- LOG(INFO) << "Post LSE " << blks;
+ PerformSimplifications(blks);
EXPECT_INS_REMOVED(write_case2);
EXPECT_INS_REMOVED(write_case3);
@@ -8225,11 +8092,7 @@ TEST_F(LoadStoreEliminationTest, PartialIrreducibleLoop) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSE();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
EXPECT_TRUE(loop_header->IsLoopHeader());
EXPECT_TRUE(loop_header->GetLoopInformation()->IsIrreducible());
@@ -8382,11 +8245,7 @@ TEST_P(UsesOrderDependentTestGroup, RecordPredicatedReplacements1) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSE();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
EXPECT_INS_RETAINED(cls);
EXPECT_INS_REMOVED(new_inst);
@@ -8544,11 +8403,7 @@ TEST_P(UsesOrderDependentTestGroup, RecordPredicatedReplacements2) {
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSE();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
EXPECT_INS_RETAINED(cls);
EXPECT_INS_REMOVED(new_inst);
@@ -8752,11 +8607,7 @@ TEST_P(UsesOrderDependentTestGroupForThreeItems, RecordPredicatedReplacements3)
SetupExit(exit);
- // PerformLSE expects this to be empty.
- graph_->ClearDominanceInformation();
- LOG(INFO) << "Pre LSE " << blks;
- PerformLSE();
- LOG(INFO) << "Post LSE " << blks;
+ PerformLSEWithPartial(blks);
EXPECT_INS_RETAINED(cls);
EXPECT_INS_REMOVED(new_inst);
diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc
index 5879c6fa07..f40b7f4f0c 100644
--- a/compiler/optimizing/locations.cc
+++ b/compiler/optimizing/locations.cc
@@ -21,7 +21,7 @@
#include "code_generator.h"
#include "nodes.h"
-namespace art {
+namespace art HIDDEN {
// Verify that Location is trivially copyable.
static_assert(std::is_trivially_copyable<Location>::value, "Location should be trivially copyable");
@@ -57,7 +57,7 @@ LocationSummary::LocationSummary(HInstruction* instruction,
Location Location::RegisterOrConstant(HInstruction* instruction) {
return instruction->IsConstant()
- ? Location::ConstantLocation(instruction->AsConstant())
+ ? Location::ConstantLocation(instruction)
: Location::RequiresRegister();
}
@@ -85,16 +85,23 @@ Location Location::FpuRegisterOrInt32Constant(HInstruction* instruction) {
Location Location::ByteRegisterOrConstant(int reg, HInstruction* instruction) {
return instruction->IsConstant()
- ? Location::ConstantLocation(instruction->AsConstant())
+ ? Location::ConstantLocation(instruction)
: Location::RegisterLocation(reg);
}
Location Location::FpuRegisterOrConstant(HInstruction* instruction) {
return instruction->IsConstant()
- ? Location::ConstantLocation(instruction->AsConstant())
+ ? Location::ConstantLocation(instruction)
: Location::RequiresFpuRegister();
}
+void Location::DCheckInstructionIsConstant(HInstruction* instruction) {
+ DCHECK(instruction != nullptr);
+ DCHECK(instruction->IsConstant());
+ DCHECK_EQ(reinterpret_cast<uintptr_t>(instruction),
+ reinterpret_cast<uintptr_t>(instruction->AsConstant()));
+}
+
std::ostream& operator<<(std::ostream& os, const Location& location) {
os << location.DebugString();
if (location.IsRegister() || location.IsFpuRegister()) {
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index acaea71a49..7ee076f442 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -22,9 +22,10 @@
#include "base/bit_field.h"
#include "base/bit_utils.h"
#include "base/bit_vector.h"
+#include "base/macros.h"
#include "base/value_object.h"
-namespace art {
+namespace art HIDDEN {
class HConstant;
class HInstruction;
@@ -102,8 +103,12 @@ class Location : public ValueObject {
return (value_ & kLocationConstantMask) == kConstant;
}
- static Location ConstantLocation(HConstant* constant) {
+ static Location ConstantLocation(HInstruction* constant) {
DCHECK(constant != nullptr);
+ if (kIsDebugBuild) {
+ // Call out-of-line helper to avoid circular dependency with `nodes.h`.
+ DCheckInstructionIsConstant(constant);
+ }
return Location(kConstant | reinterpret_cast<uintptr_t>(constant));
}
@@ -425,6 +430,8 @@ class Location : public ValueObject {
return PayloadField::Decode(value_);
}
+ static void DCheckInstructionIsConstant(HInstruction* instruction);
+
using KindField = BitField<Kind, 0, kBitsForKind>;
using PayloadField = BitField<uintptr_t, kBitsForKind, kBitsForPayload>;
diff --git a/compiler/optimizing/loop_analysis.cc b/compiler/optimizing/loop_analysis.cc
index 76bd8493b2..95e81533da 100644
--- a/compiler/optimizing/loop_analysis.cc
+++ b/compiler/optimizing/loop_analysis.cc
@@ -20,7 +20,7 @@
#include "code_generator.h"
#include "induction_var_range.h"
-namespace art {
+namespace art HIDDEN {
void LoopAnalysis::CalculateLoopBasicProperties(HLoopInformation* loop_info,
LoopAnalysisInfo* analysis_results,
diff --git a/compiler/optimizing/loop_analysis.h b/compiler/optimizing/loop_analysis.h
index fbf1516f64..cec00fecf4 100644
--- a/compiler/optimizing/loop_analysis.h
+++ b/compiler/optimizing/loop_analysis.h
@@ -17,9 +17,10 @@
#ifndef ART_COMPILER_OPTIMIZING_LOOP_ANALYSIS_H_
#define ART_COMPILER_OPTIMIZING_LOOP_ANALYSIS_H_
+#include "base/macros.h"
#include "nodes.h"
-namespace art {
+namespace art HIDDEN {
class CodeGenerator;
class InductionVarRange;
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 2d7c20825c..7a52502562 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -27,7 +27,7 @@
#include "mirror/array-inl.h"
#include "mirror/string.h"
-namespace art {
+namespace art HIDDEN {
// Enables vectorization (SIMDization) in the loop optimizer.
static constexpr bool kEnableVectorization = true;
@@ -507,9 +507,8 @@ bool HLoopOptimization::Run() {
graph_->SetHasLoops(false); // no more loops
}
- // Detach.
+ // Detach allocator.
loop_allocator_ = nullptr;
- last_loop_ = top_loop_ = nullptr;
return did_loop_opt;
}
@@ -530,11 +529,7 @@ bool HLoopOptimization::LocalRun() {
AddLoop(block->GetLoopInformation());
}
}
-
- // TODO(solanes): How can `top_loop_` be null if `graph_->HasLoops()` is true?
- if (top_loop_ == nullptr) {
- return false;
- }
+ DCHECK(top_loop_ != nullptr);
// Traverse the loop hierarchy inner-to-outer and optimize. Traversal can use
// temporary data structures using the phase-local allocator. All new HIR
@@ -681,6 +676,50 @@ void HLoopOptimization::CalculateAndSetTryCatchKind(LoopNode* node) {
}
//
+// This optimization applies to loops with plain simple operations
+// (I.e. no calls to java code or runtime) with a known small trip_count * instr_count
+// value.
+//
+bool HLoopOptimization::TryToRemoveSuspendCheckFromLoopHeader(LoopAnalysisInfo* analysis_info,
+ bool generate_code) {
+ if (!graph_->SuspendChecksAreAllowedToNoOp()) {
+ return false;
+ }
+
+ int64_t trip_count = analysis_info->GetTripCount();
+
+ if (trip_count == LoopAnalysisInfo::kUnknownTripCount) {
+ return false;
+ }
+
+ int64_t instruction_count = analysis_info->GetNumberOfInstructions();
+ int64_t total_instruction_count = trip_count * instruction_count;
+
+ // The inclusion of the HasInstructionsPreventingScalarOpts() prevents this
+ // optimization from being applied to loops that have calls.
+ bool can_optimize =
+ total_instruction_count <= HLoopOptimization::kMaxTotalInstRemoveSuspendCheck &&
+ !analysis_info->HasInstructionsPreventingScalarOpts();
+
+ if (!can_optimize) {
+ return false;
+ }
+
+ // If we should do the optimization, disable codegen for the SuspendCheck.
+ if (generate_code) {
+ HLoopInformation* loop_info = analysis_info->GetLoopInfo();
+ HBasicBlock* header = loop_info->GetHeader();
+ HSuspendCheck* instruction = header->GetLoopInformation()->GetSuspendCheck();
+ // As other optimizations depend on SuspendCheck
+ // (e.g: CHAGuardVisitor::HoistGuard), disable its codegen instead of
+ // removing the SuspendCheck instruction.
+ instruction->SetIsNoOp(true);
+ }
+
+ return true;
+}
+
+//
// Optimization.
//
@@ -824,7 +863,7 @@ bool HLoopOptimization::TryOptimizeInnerLoopFinite(LoopNode* node) {
}
bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) {
- return TryOptimizeInnerLoopFinite(node) || TryPeelingAndUnrolling(node);
+ return TryOptimizeInnerLoopFinite(node) || TryLoopScalarOpts(node);
}
//
@@ -928,7 +967,7 @@ bool HLoopOptimization::TryFullUnrolling(LoopAnalysisInfo* analysis_info, bool g
return true;
}
-bool HLoopOptimization::TryPeelingAndUnrolling(LoopNode* node) {
+bool HLoopOptimization::TryLoopScalarOpts(LoopNode* node) {
HLoopInformation* loop_info = node->loop_info;
int64_t trip_count = LoopAnalysis::GetLoopTripCount(loop_info, &induction_range_);
LoopAnalysisInfo analysis_info(loop_info);
@@ -941,10 +980,16 @@ bool HLoopOptimization::TryPeelingAndUnrolling(LoopNode* node) {
if (!TryFullUnrolling(&analysis_info, /*generate_code*/ false) &&
!TryPeelingForLoopInvariantExitsElimination(&analysis_info, /*generate_code*/ false) &&
- !TryUnrollingForBranchPenaltyReduction(&analysis_info, /*generate_code*/ false)) {
+ !TryUnrollingForBranchPenaltyReduction(&analysis_info, /*generate_code*/ false) &&
+ !TryToRemoveSuspendCheckFromLoopHeader(&analysis_info, /*generate_code*/ false)) {
return false;
}
+ // Try the suspend check removal even for non-clonable loops. Also this
+ // optimization doesn't interfere with other scalar loop optimizations so it can
+ // be done prior to them.
+ bool removed_suspend_check = TryToRemoveSuspendCheckFromLoopHeader(&analysis_info);
+
// Run 'IsLoopClonable' the last as it might be time-consuming.
if (!LoopClonerHelper::IsLoopClonable(loop_info)) {
return false;
@@ -952,7 +997,7 @@ bool HLoopOptimization::TryPeelingAndUnrolling(LoopNode* node) {
return TryFullUnrolling(&analysis_info) ||
TryPeelingForLoopInvariantExitsElimination(&analysis_info) ||
- TryUnrollingForBranchPenaltyReduction(&analysis_info);
+ TryUnrollingForBranchPenaltyReduction(&analysis_info) || removed_suspend_check;
}
//
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index b17861648f..6dd778ba74 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -17,6 +17,7 @@
#ifndef ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_
#define ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_
+#include "base/macros.h"
#include "base/scoped_arena_allocator.h"
#include "base/scoped_arena_containers.h"
#include "induction_var_range.h"
@@ -25,7 +26,7 @@
#include "optimization.h"
#include "superblock_cloner.h"
-namespace art {
+namespace art HIDDEN {
class CompilerOptions;
class ArchNoOptsLoopHelper;
@@ -47,6 +48,11 @@ class HLoopOptimization : public HOptimization {
static constexpr const char* kLoopOptimizationPassName = "loop_optimization";
+ // The maximum number of total instructions (trip_count * instruction_count),
+ // where the optimization of removing SuspendChecks from the loop header could
+ // be performed.
+ static constexpr int64_t kMaxTotalInstRemoveSuspendCheck = 128;
+
private:
/**
* A single loop inside the loop hierarchy representation.
@@ -179,8 +185,19 @@ class HLoopOptimization : public HOptimization {
// should be actually applied.
bool TryFullUnrolling(LoopAnalysisInfo* analysis_info, bool generate_code = true);
- // Tries to apply scalar loop peeling and unrolling.
- bool TryPeelingAndUnrolling(LoopNode* node);
+ // Tries to remove SuspendCheck for plain loops with a low trip count. The
+ // SuspendCheck in the codegen makes sure that the thread can be interrupted
+ // during execution for GC. Not being able to do so might decrease the
+ // responsiveness of GC when a very long loop or a long recursion is being
+ // executed. However, for plain loops with a small trip count, the removal of
+ // SuspendCheck should not affect the GC's responsiveness by a large margin.
+ // Consequently, since the thread won't be interrupted for plain loops, it is
+ // assumed that the performance might increase by removing SuspendCheck.
+ bool TryToRemoveSuspendCheckFromLoopHeader(LoopAnalysisInfo* analysis_info,
+ bool generate_code = true);
+
+ // Tries to apply scalar loop optimizations.
+ bool TryLoopScalarOpts(LoopNode* node);
//
// Vectorization analysis and synthesis.
diff --git a/compiler/optimizing/loop_optimization_test.cc b/compiler/optimizing/loop_optimization_test.cc
index bda25283f5..7f694fb655 100644
--- a/compiler/optimizing/loop_optimization_test.cc
+++ b/compiler/optimizing/loop_optimization_test.cc
@@ -14,12 +14,13 @@
* limitations under the License.
*/
+#include "base/macros.h"
#include "code_generator.h"
#include "driver/compiler_options.h"
#include "loop_optimization.h"
#include "optimizing_unit_test.h"
-namespace art {
+namespace art HIDDEN {
/**
* Fixture class for the loop optimization tests. These unit tests focus
@@ -94,10 +95,7 @@ class LoopOptimizationTest : public OptimizingUnitTest {
void PerformAnalysis() {
graph_->BuildDominatorTree();
iva_->Run();
- // Do not release the loop hierarchy.
- ScopedArenaAllocator loop_allocator(GetArenaStack());
- loop_opt_->loop_allocator_ = &loop_allocator;
- loop_opt_->LocalRun();
+ loop_opt_->Run();
}
/** Constructs string representation of computed loop hierarchy. */
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index d35ed1c543..3790058879 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -40,7 +40,7 @@
#include "scoped_thread_state_change-inl.h"
#include "ssa_builder.h"
-namespace art {
+namespace art HIDDEN {
// Enable floating-point static evaluation during constant folding
// only if all floating-point operations and constants evaluate in the
@@ -150,30 +150,54 @@ static void RemoveAsUser(HInstruction* instruction) {
RemoveEnvironmentUses(instruction);
}
-void HGraph::RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const {
+void HGraph::RemoveDeadBlocksInstructionsAsUsersAndDisconnect(const ArenaBitVector& visited) const {
for (size_t i = 0; i < blocks_.size(); ++i) {
if (!visited.IsBitSet(i)) {
HBasicBlock* block = blocks_[i];
if (block == nullptr) continue;
+
+ // Remove as user.
for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
RemoveAsUser(it.Current());
}
for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
RemoveAsUser(it.Current());
}
+
+ // Remove non-catch phi uses, and disconnect the block.
+ block->DisconnectFromSuccessors(&visited);
+ }
+ }
+}
+
+// This method assumes `insn` has been removed from all users with the exception of catch
+// phis because of missing exceptional edges in the graph. It removes the
+// instruction from catch phi uses, together with inputs of other catch phis in
+// the catch block at the same index, as these must be dead too.
+static void RemoveCatchPhiUsesOfDeadInstruction(HInstruction* insn) {
+ DCHECK(!insn->HasEnvironmentUses());
+ while (insn->HasNonEnvironmentUses()) {
+ const HUseListNode<HInstruction*>& use = insn->GetUses().front();
+ size_t use_index = use.GetIndex();
+ HBasicBlock* user_block = use.GetUser()->GetBlock();
+ DCHECK(use.GetUser()->IsPhi());
+ DCHECK(user_block->IsCatchBlock());
+ for (HInstructionIterator phi_it(user_block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
+ phi_it.Current()->AsPhi()->RemoveInputAt(use_index);
}
}
}
void HGraph::RemoveDeadBlocks(const ArenaBitVector& visited) {
+ DCHECK(reverse_post_order_.empty()) << "We shouldn't have dominance information.";
for (size_t i = 0; i < blocks_.size(); ++i) {
if (!visited.IsBitSet(i)) {
HBasicBlock* block = blocks_[i];
if (block == nullptr) continue;
- // We only need to update the successor, which might be live.
- for (HBasicBlock* successor : block->GetSuccessors()) {
- successor->RemovePredecessor(block);
- }
+
+ // Remove all remaining uses (which should be only catch phi uses), and the instructions.
+ block->RemoveCatchPhiUsesAndInstruction(/* building_dominator_tree = */ true);
+
// Remove the block from the list of blocks, so that further analyses
// never see it.
blocks_[i] = nullptr;
@@ -200,7 +224,8 @@ GraphAnalysisResult HGraph::BuildDominatorTree() {
// (2) Remove instructions and phis from blocks not visited during
// the initial DFS as users from other instructions, so that
// users can be safely removed before uses later.
- RemoveInstructionsAsUsersFromDeadBlocks(visited);
+ // Also disconnect the block from its successors, updating the successor's phis if needed.
+ RemoveDeadBlocksInstructionsAsUsersAndDisconnect(visited);
// (3) Remove blocks not visited during the initial DFS.
// Step (5) requires dead blocks to be removed from the
@@ -237,6 +262,7 @@ void HGraph::ClearDominanceInformation() {
}
void HGraph::ClearLoopInformation() {
+ SetHasLoops(false);
SetHasIrreducibleLoops(false);
for (HBasicBlock* block : GetActiveBlocks()) {
block->SetLoopInformation(nullptr);
@@ -544,6 +570,15 @@ void HGraph::SplitCriticalEdge(HBasicBlock* block, HBasicBlock* successor) {
}
}
+HBasicBlock* HGraph::SplitEdgeAndUpdateRPO(HBasicBlock* block, HBasicBlock* successor) {
+ HBasicBlock* new_block = SplitEdge(block, successor);
+ // In the RPO we have {... , block, ... , successor}. We want to insert `new_block` right after
+ // `block` to have a consistent RPO without recomputing the whole graph's RPO.
+ reverse_post_order_.insert(
+ reverse_post_order_.begin() + IndexOfElement(reverse_post_order_, block) + 1, new_block);
+ return new_block;
+}
+
// Reorder phi inputs to match reordering of the block's predecessors.
static void FixPhisAfterPredecessorsReodering(HBasicBlock* block, size_t first, size_t second) {
for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
@@ -653,7 +688,7 @@ void HGraph::TransformLoopToSinglePreheaderFormat(HBasicBlock* header) {
0,
header_phi->GetType());
if (header_phi->GetType() == DataType::Type::kReference) {
- preheader_phi->SetReferenceTypeInfo(header_phi->GetReferenceTypeInfo());
+ preheader_phi->SetReferenceTypeInfoIfValid(header_phi->GetReferenceTypeInfo());
}
preheader->AddPhi(preheader_phi);
@@ -708,6 +743,8 @@ void HGraph::SimplifyLoop(HBasicBlock* header) {
void HGraph::ComputeTryBlockInformation() {
// Iterate in reverse post order to propagate try membership information from
// predecessors to their successors.
+ bool graph_has_try_catch = false;
+
for (HBasicBlock* block : GetReversePostOrder()) {
if (block->IsEntryBlock() || block->IsCatchBlock()) {
// Catch blocks after simplification have only exceptional predecessors
@@ -722,6 +759,7 @@ void HGraph::ComputeTryBlockInformation() {
DCHECK_IMPLIES(block->IsLoopHeader(),
!block->GetLoopInformation()->IsBackEdge(*first_predecessor));
const HTryBoundary* try_entry = first_predecessor->ComputeTryEntryOfSuccessors();
+ graph_has_try_catch |= try_entry != nullptr;
if (try_entry != nullptr &&
(block->GetTryCatchInformation() == nullptr ||
try_entry != &block->GetTryCatchInformation()->GetTryEntry())) {
@@ -730,6 +768,8 @@ void HGraph::ComputeTryBlockInformation() {
block->SetTryCatchInformation(new (allocator_) TryCatchInformation(*try_entry));
}
}
+
+ SetHasTryCatch(graph_has_try_catch);
}
void HGraph::SimplifyCFG() {
@@ -1459,6 +1499,10 @@ bool HInstructionList::FoundBefore(const HInstruction* instruction1,
UNREACHABLE();
}
+bool HInstruction::Dominates(HInstruction* other_instruction) const {
+ return other_instruction == this || StrictlyDominates(other_instruction);
+}
+
bool HInstruction::StrictlyDominates(HInstruction* other_instruction) const {
if (other_instruction == this) {
// An instruction does not strictly dominate itself.
@@ -1518,14 +1562,19 @@ void HInstruction::ReplaceWith(HInstruction* other) {
DCHECK(env_uses_.empty());
}
-void HInstruction::ReplaceUsesDominatedBy(HInstruction* dominator, HInstruction* replacement) {
+void HInstruction::ReplaceUsesDominatedBy(HInstruction* dominator,
+ HInstruction* replacement,
+ bool strictly_dominated) {
const HUseList<HInstruction*>& uses = GetUses();
for (auto it = uses.begin(), end = uses.end(); it != end; /* ++it below */) {
HInstruction* user = it->GetUser();
size_t index = it->GetIndex();
// Increment `it` now because `*it` may disappear thanks to user->ReplaceInput().
++it;
- if (dominator->StrictlyDominates(user)) {
+ const bool dominated =
+ strictly_dominated ? dominator->StrictlyDominates(user) : dominator->Dominates(user);
+
+ if (dominated) {
user->ReplaceInput(replacement, index);
} else if (user->IsPhi() && !user->AsPhi()->IsCatchPhi()) {
// If the input flows from a block dominated by `dominator`, we can replace it.
@@ -2108,8 +2157,9 @@ void HInstruction::MoveBeforeFirstUserAndOutOfLoops() {
MoveBefore(insert_pos);
}
-HBasicBlock* HBasicBlock::SplitBefore(HInstruction* cursor) {
- DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented.";
+HBasicBlock* HBasicBlock::SplitBefore(HInstruction* cursor, bool require_graph_not_in_ssa_form) {
+ DCHECK_IMPLIES(require_graph_not_in_ssa_form, !graph_->IsInSsaForm())
+ << "Support for SSA form not implemented.";
DCHECK_EQ(cursor->GetBlock(), this);
HBasicBlock* new_block =
@@ -2376,24 +2426,6 @@ void HInstructionList::Add(const HInstructionList& instruction_list) {
}
}
-// Should be called on instructions in a dead block in post order. This method
-// assumes `insn` has been removed from all users with the exception of catch
-// phis because of missing exceptional edges in the graph. It removes the
-// instruction from catch phi uses, together with inputs of other catch phis in
-// the catch block at the same index, as these must be dead too.
-static void RemoveUsesOfDeadInstruction(HInstruction* insn) {
- DCHECK(!insn->HasEnvironmentUses());
- while (insn->HasNonEnvironmentUses()) {
- const HUseListNode<HInstruction*>& use = insn->GetUses().front();
- size_t use_index = use.GetIndex();
- HBasicBlock* user_block = use.GetUser()->GetBlock();
- DCHECK(use.GetUser()->IsPhi() && user_block->IsCatchBlock());
- for (HInstructionIterator phi_it(user_block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
- phi_it.Current()->AsPhi()->RemoveInputAt(use_index);
- }
- }
-}
-
void HBasicBlock::DisconnectAndDelete() {
// Dominators must be removed after all the blocks they dominate. This way
// a loop header is removed last, a requirement for correct loop information
@@ -2418,52 +2450,14 @@ void HBasicBlock::DisconnectAndDelete() {
}
// (2) Disconnect the block from its successors and update their phis.
- for (HBasicBlock* successor : successors_) {
- // Delete this block from the list of predecessors.
- size_t this_index = successor->GetPredecessorIndexOf(this);
- successor->predecessors_.erase(successor->predecessors_.begin() + this_index);
-
- // Check that `successor` has other predecessors, otherwise `this` is the
- // dominator of `successor` which violates the order DCHECKed at the top.
- DCHECK(!successor->predecessors_.empty());
-
- // Remove this block's entries in the successor's phis. Skip exceptional
- // successors because catch phi inputs do not correspond to predecessor
- // blocks but throwing instructions. The inputs of the catch phis will be
- // updated in step (3).
- if (!successor->IsCatchBlock()) {
- if (successor->predecessors_.size() == 1u) {
- // The successor has just one predecessor left. Replace phis with the only
- // remaining input.
- for (HInstructionIterator phi_it(successor->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
- HPhi* phi = phi_it.Current()->AsPhi();
- phi->ReplaceWith(phi->InputAt(1 - this_index));
- successor->RemovePhi(phi);
- }
- } else {
- for (HInstructionIterator phi_it(successor->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
- phi_it.Current()->AsPhi()->RemoveInputAt(this_index);
- }
- }
- }
- }
- successors_.clear();
+ DisconnectFromSuccessors();
// (3) Remove instructions and phis. Instructions should have no remaining uses
// except in catch phis. If an instruction is used by a catch phi at `index`,
// remove `index`-th input of all phis in the catch block since they are
// guaranteed dead. Note that we may miss dead inputs this way but the
// graph will always remain consistent.
- for (HBackwardInstructionIterator it(GetInstructions()); !it.Done(); it.Advance()) {
- HInstruction* insn = it.Current();
- RemoveUsesOfDeadInstruction(insn);
- RemoveInstruction(insn);
- }
- for (HInstructionIterator it(GetPhis()); !it.Done(); it.Advance()) {
- HPhi* insn = it.Current()->AsPhi();
- RemoveUsesOfDeadInstruction(insn);
- RemovePhi(insn);
- }
+ RemoveCatchPhiUsesAndInstruction(/* building_dominator_tree = */ false);
// (4) Disconnect the block from its predecessors and update their
// control-flow instructions.
@@ -2537,6 +2531,70 @@ void HBasicBlock::DisconnectAndDelete() {
SetGraph(nullptr);
}
+void HBasicBlock::DisconnectFromSuccessors(const ArenaBitVector* visited) {
+ for (HBasicBlock* successor : successors_) {
+ // Delete this block from the list of predecessors.
+ size_t this_index = successor->GetPredecessorIndexOf(this);
+ successor->predecessors_.erase(successor->predecessors_.begin() + this_index);
+
+ if (visited != nullptr && !visited->IsBitSet(successor->GetBlockId())) {
+ // `successor` itself is dead. Therefore, there is no need to update its phis.
+ continue;
+ }
+
+ DCHECK(!successor->predecessors_.empty());
+
+ // Remove this block's entries in the successor's phis. Skips exceptional
+ // successors because catch phi inputs do not correspond to predecessor
+ // blocks but throwing instructions. They are removed in `RemoveCatchPhiUses`.
+ if (!successor->IsCatchBlock()) {
+ if (successor->predecessors_.size() == 1u) {
+ // The successor has just one predecessor left. Replace phis with the only
+ // remaining input.
+ for (HInstructionIterator phi_it(successor->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
+ HPhi* phi = phi_it.Current()->AsPhi();
+ phi->ReplaceWith(phi->InputAt(1 - this_index));
+ successor->RemovePhi(phi);
+ }
+ } else {
+ for (HInstructionIterator phi_it(successor->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
+ phi_it.Current()->AsPhi()->RemoveInputAt(this_index);
+ }
+ }
+ }
+ }
+ successors_.clear();
+}
+
+void HBasicBlock::RemoveCatchPhiUsesAndInstruction(bool building_dominator_tree) {
+ for (HBackwardInstructionIterator it(GetInstructions()); !it.Done(); it.Advance()) {
+ HInstruction* insn = it.Current();
+ RemoveCatchPhiUsesOfDeadInstruction(insn);
+
+ // If we are building the dominator tree, we removed all input records previously.
+ // `RemoveInstruction` will try to remove them again but that's not something we support and we
+ // will crash. We check here since we won't be checking that in RemoveInstruction.
+ if (building_dominator_tree) {
+ DCHECK(insn->GetUses().empty());
+ DCHECK(insn->GetEnvUses().empty());
+ }
+ RemoveInstruction(insn, /* ensure_safety= */ !building_dominator_tree);
+ }
+ for (HInstructionIterator it(GetPhis()); !it.Done(); it.Advance()) {
+ HPhi* insn = it.Current()->AsPhi();
+ RemoveCatchPhiUsesOfDeadInstruction(insn);
+
+ // If we are building the dominator tree, we removed all input records previously.
+ // `RemovePhi` will try to remove them again but that's not something we support and we
+ // will crash. We check here since we won't be checking that in RemovePhi.
+ if (building_dominator_tree) {
+ DCHECK(insn->GetUses().empty());
+ DCHECK(insn->GetEnvUses().empty());
+ }
+ RemovePhi(insn, /* ensure_safety= */ !building_dominator_tree);
+ }
+}
+
void HBasicBlock::MergeInstructionsWith(HBasicBlock* other) {
DCHECK(EndsWithControlFlowInstruction());
RemoveInstruction(GetLastInstruction());
@@ -2660,7 +2718,8 @@ void HGraph::DeleteDeadEmptyBlock(HBasicBlock* block) {
void HGraph::UpdateLoopAndTryInformationOfNewBlock(HBasicBlock* block,
HBasicBlock* reference,
- bool replace_if_back_edge) {
+ bool replace_if_back_edge,
+ bool has_more_specific_try_catch_info) {
if (block->IsLoopHeader()) {
// Clear the information of which blocks are contained in that loop. Since the
// information is stored as a bit vector based on block ids, we have to update
@@ -2687,11 +2746,16 @@ void HGraph::UpdateLoopAndTryInformationOfNewBlock(HBasicBlock* block,
}
}
- // Copy TryCatchInformation if `reference` is a try block, not if it is a catch block.
- TryCatchInformation* try_catch_info = reference->IsTryBlock()
- ? reference->GetTryCatchInformation()
- : nullptr;
- block->SetTryCatchInformation(try_catch_info);
+ DCHECK_IMPLIES(has_more_specific_try_catch_info, !reference->IsTryBlock())
+ << "We don't allow to inline try catches inside of other try blocks.";
+
+ // Update the TryCatchInformation, if we are not inlining a try catch.
+ if (!has_more_specific_try_catch_info) {
+ // Copy TryCatchInformation if `reference` is a try block, not if it is a catch block.
+ TryCatchInformation* try_catch_info =
+ reference->IsTryBlock() ? reference->GetTryCatchInformation() : nullptr;
+ block->SetTryCatchInformation(try_catch_info);
+ }
}
HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
@@ -2730,9 +2794,15 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
if (HasTryCatch()) {
outer_graph->SetHasTryCatch(true);
}
+ if (HasMonitorOperations()) {
+ outer_graph->SetHasMonitorOperations(true);
+ }
if (HasSIMD()) {
outer_graph->SetHasSIMD(true);
}
+ if (HasAlwaysThrowingInvokes()) {
+ outer_graph->SetHasAlwaysThrowingInvokes(true);
+ }
HInstruction* return_value = nullptr;
if (GetBlocks().size() == 3) {
@@ -2771,6 +2841,7 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
HBasicBlock* first = entry_block_->GetSuccessors()[0];
DCHECK(!first->IsInLoop());
+ DCHECK(first->GetTryCatchInformation() == nullptr);
at->MergeWithInlined(first);
exit_block_->ReplaceWith(to);
@@ -2801,12 +2872,14 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
// and (4) to the blocks that apply.
for (HBasicBlock* current : GetReversePostOrder()) {
if (current != exit_block_ && current != entry_block_ && current != first) {
- DCHECK(current->GetTryCatchInformation() == nullptr);
DCHECK(current->GetGraph() == this);
current->SetGraph(outer_graph);
outer_graph->AddBlock(current);
outer_graph->reverse_post_order_[++index_of_at] = current;
- UpdateLoopAndTryInformationOfNewBlock(current, at, /* replace_if_back_edge= */ false);
+ UpdateLoopAndTryInformationOfNewBlock(current,
+ at,
+ /* replace_if_back_edge= */ false,
+ current->GetTryCatchInformation() != nullptr);
}
}
@@ -2820,25 +2893,62 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
// Update all predecessors of the exit block (now the `to` block)
// to not `HReturn` but `HGoto` instead. Special case throwing blocks
- // to now get the outer graph exit block as successor. Note that the inliner
- // currently doesn't support inlining methods with try/catch.
+ // to now get the outer graph exit block as successor.
HPhi* return_value_phi = nullptr;
bool rerun_dominance = false;
bool rerun_loop_analysis = false;
for (size_t pred = 0; pred < to->GetPredecessors().size(); ++pred) {
HBasicBlock* predecessor = to->GetPredecessors()[pred];
HInstruction* last = predecessor->GetLastInstruction();
+
+ // At this point we might either have:
+ // A) Return/ReturnVoid/Throw as the last instruction, or
+ // B) `Return/ReturnVoid/Throw->TryBoundary` as the last instruction chain
+
+ const bool saw_try_boundary = last->IsTryBoundary();
+ if (saw_try_boundary) {
+ DCHECK(predecessor->IsSingleTryBoundary());
+ DCHECK(!last->AsTryBoundary()->IsEntry());
+ predecessor = predecessor->GetSinglePredecessor();
+ last = predecessor->GetLastInstruction();
+ }
+
if (last->IsThrow()) {
- DCHECK(!at->IsTryBlock());
- predecessor->ReplaceSuccessor(to, outer_graph->GetExitBlock());
+ if (at->IsTryBlock()) {
+ DCHECK(!saw_try_boundary) << "We don't support inlining of try blocks into try blocks.";
+ // Create a TryBoundary of kind:exit and point it to the Exit block.
+ HBasicBlock* new_block = outer_graph->SplitEdge(predecessor, to);
+ new_block->AddInstruction(
+ new (allocator) HTryBoundary(HTryBoundary::BoundaryKind::kExit, last->GetDexPc()));
+ new_block->ReplaceSuccessor(to, outer_graph->GetExitBlock());
+
+ // Copy information from the predecessor.
+ new_block->SetLoopInformation(predecessor->GetLoopInformation());
+ TryCatchInformation* try_catch_info = predecessor->GetTryCatchInformation();
+ new_block->SetTryCatchInformation(try_catch_info);
+ for (HBasicBlock* xhandler :
+ try_catch_info->GetTryEntry().GetBlock()->GetExceptionalSuccessors()) {
+ new_block->AddSuccessor(xhandler);
+ }
+ DCHECK(try_catch_info->GetTryEntry().HasSameExceptionHandlersAs(
+ *new_block->GetLastInstruction()->AsTryBoundary()));
+ } else {
+ // We either have `Throw->TryBoundary` or `Throw`. We want to point the whole chain to the
+ // exit, so we recompute `predecessor`
+ predecessor = to->GetPredecessors()[pred];
+ predecessor->ReplaceSuccessor(to, outer_graph->GetExitBlock());
+ }
+
--pred;
// We need to re-run dominance information, as the exit block now has
- // a new dominator.
+ // a new predecessor and potential new dominator.
+ // TODO(solanes): See if it's worth it to hand-modify the domination chain instead of
+ // rerunning the dominance for the whole graph.
rerun_dominance = true;
if (predecessor->GetLoopInformation() != nullptr) {
- // The exit block and blocks post dominated by the exit block do not belong
- // to any loop. Because we do not compute the post dominators, we need to re-run
- // loop analysis to get the loop information correct.
+ // The loop information might have changed e.g. `predecessor` might not be in a loop
+ // anymore. We only do this if `predecessor` has loop information as it is impossible for
+ // predecessor to end up in a loop if it wasn't in one before.
rerun_loop_analysis = true;
}
} else {
@@ -2863,6 +2973,19 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
}
predecessor->AddInstruction(new (allocator) HGoto(last->GetDexPc()));
predecessor->RemoveInstruction(last);
+
+ if (saw_try_boundary) {
+ predecessor = to->GetPredecessors()[pred];
+ DCHECK(predecessor->EndsWithTryBoundary());
+ DCHECK_EQ(predecessor->GetNormalSuccessors().size(), 1u);
+ if (predecessor->GetSuccessors()[0]->GetPredecessors().size() > 1) {
+ outer_graph->SplitCriticalEdge(predecessor, to);
+ rerun_dominance = true;
+ if (predecessor->GetLoopInformation() != nullptr) {
+ rerun_loop_analysis = true;
+ }
+ }
+ }
}
}
if (rerun_loop_analysis) {
@@ -3047,6 +3170,7 @@ HBasicBlock* HGraph::TransformLoopForVectorization(HBasicBlock* header,
HSuspendCheck* suspend_check = new (allocator_) HSuspendCheck(header->GetDexPc());
new_header->AddInstruction(suspend_check);
new_body->AddInstruction(new (allocator_) HGoto());
+ DCHECK(loop->GetSuspendCheck() != nullptr);
suspend_check->CopyEnvironmentFromWithLoopPhiAdjustment(
loop->GetSuspendCheck()->GetEnvironment(), header);
@@ -3091,6 +3215,12 @@ void HInstruction::SetReferenceTypeInfo(ReferenceTypeInfo rti) {
SetPackedFlag<kFlagReferenceTypeIsExact>(rti.IsExact());
}
+void HInstruction::SetReferenceTypeInfoIfValid(ReferenceTypeInfo rti) {
+ if (rti.IsValid()) {
+ SetReferenceTypeInfo(rti);
+ }
+}
+
bool HBoundType::InstructionDataEquals(const HInstruction* other) const {
const HBoundType* other_bt = other->AsBoundType();
ScopedObjectAccess soa(Thread::Current());
@@ -3441,8 +3571,8 @@ static inline IntrinsicExceptions GetExceptionsIntrinsic(Intrinsics i) {
return kCanThrow;
}
-void HInvoke::SetResolvedMethod(ArtMethod* method) {
- if (method != nullptr && method->IsIntrinsic()) {
+void HInvoke::SetResolvedMethod(ArtMethod* method, bool enable_intrinsic_opt) {
+ if (method != nullptr && method->IsIntrinsic() && enable_intrinsic_opt) {
Intrinsics intrinsic = static_cast<Intrinsics>(method->GetIntrinsic());
SetIntrinsic(intrinsic,
NeedsEnvironmentIntrinsic(intrinsic),
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 7a0059f616..28112d176a 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -29,6 +29,7 @@
#include "base/array_ref.h"
#include "base/intrusive_forward_list.h"
#include "base/iteration_range.h"
+#include "base/macros.h"
#include "base/mutex.h"
#include "base/quasi_atomic.h"
#include "base/stl_util.h"
@@ -51,7 +52,7 @@
#include "mirror/method_type.h"
#include "offsets.h"
-namespace art {
+namespace art HIDDEN {
class ArenaStack;
class CodeGenerator;
@@ -406,6 +407,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
has_loops_(false),
has_irreducible_loops_(false),
has_direct_critical_native_call_(false),
+ has_always_throwing_invokes_(false),
dead_reference_safe_(dead_reference_safe),
debuggable_(debuggable),
current_instruction_id_(start_instruction_id),
@@ -485,9 +487,11 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
// Update the loop and try membership of `block`, which was spawned from `reference`.
// In case `reference` is a back edge, `replace_if_back_edge` notifies whether `block`
// should be the new back edge.
+ // `has_more_specific_try_catch_info` will be set to true when inlining a try catch.
void UpdateLoopAndTryInformationOfNewBlock(HBasicBlock* block,
HBasicBlock* reference,
- bool replace_if_back_edge);
+ bool replace_if_back_edge,
+ bool has_more_specific_try_catch_info = false);
// Need to add a couple of blocks to test if the loop body is entered and
// put deoptimization instructions, etc.
@@ -510,6 +514,11 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
HBasicBlock* SplitEdge(HBasicBlock* block, HBasicBlock* successor);
void SplitCriticalEdge(HBasicBlock* block, HBasicBlock* successor);
+
+ // Splits the edge between `block` and `successor` and then updates the graph's RPO to keep
+ // consistency without recomputing the whole graph.
+ HBasicBlock* SplitEdgeAndUpdateRPO(HBasicBlock* block, HBasicBlock* successor);
+
void OrderLoopHeaderPredecessors(HBasicBlock* header);
// Transform a loop into a format with a single preheader.
@@ -678,6 +687,13 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
return cha_single_implementation_list_;
}
+ // In case of OSR we intend to use SuspendChecks as an entry point to the
+ // function; for debuggable graphs we might deoptimize to interpreter from
+ // SuspendChecks. In these cases we should always generate code for them.
+ bool SuspendChecksAreAllowedToNoOp() const {
+ return !IsDebuggable() && !IsCompilingOsr();
+ }
+
void AddCHASingleImplementationDependency(ArtMethod* method) {
cha_single_implementation_list_.insert(method);
}
@@ -704,6 +720,9 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
bool HasDirectCriticalNativeCall() const { return has_direct_critical_native_call_; }
void SetHasDirectCriticalNativeCall(bool value) { has_direct_critical_native_call_ = value; }
+ bool HasAlwaysThrowingInvokes() const { return has_always_throwing_invokes_; }
+ void SetHasAlwaysThrowingInvokes(bool value) { has_always_throwing_invokes_ = value; }
+
ArtMethod* GetArtMethod() const { return art_method_; }
void SetArtMethod(ArtMethod* method) { art_method_ = method; }
@@ -719,12 +738,12 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
return ReferenceTypeInfo::Create(handle_cache_.GetObjectClassHandle(), /* is_exact= */ false);
}
- uint32_t GetNumberOfCHAGuards() { return number_of_cha_guards_; }
+ uint32_t GetNumberOfCHAGuards() const { return number_of_cha_guards_; }
void SetNumberOfCHAGuards(uint32_t num) { number_of_cha_guards_ = num; }
void IncrementNumberOfCHAGuards() { number_of_cha_guards_++; }
private:
- void RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const;
+ void RemoveDeadBlocksInstructionsAsUsersAndDisconnect(const ArenaBitVector& visited) const;
void RemoveDeadBlocks(const ArenaBitVector& visited);
template <class InstructionType, typename ValueType>
@@ -792,14 +811,11 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
size_t temporaries_vreg_slots_;
// Flag whether there are bounds checks in the graph. We can skip
- // BCE if it's false. It's only best effort to keep it up to date in
- // the presence of code elimination so there might be false positives.
+ // BCE if it's false.
bool has_bounds_checks_;
// Flag whether there are try/catch blocks in the graph. We will skip
- // try/catch-related passes if it's false. It's only best effort to keep
- // it up to date in the presence of code elimination so there might be
- // false positives.
+ // try/catch-related passes if it's false.
bool has_try_catch_;
// Flag whether there are any HMonitorOperation in the graph. If yes this will mandate
@@ -812,20 +828,19 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
bool has_simd_;
// Flag whether there are any loops in the graph. We can skip loop
- // optimization if it's false. It's only best effort to keep it up
- // to date in the presence of code elimination so there might be false
- // positives.
+ // optimization if it's false.
bool has_loops_;
- // Flag whether there are any irreducible loops in the graph. It's only
- // best effort to keep it up to date in the presence of code elimination
- // so there might be false positives.
+ // Flag whether there are any irreducible loops in the graph.
bool has_irreducible_loops_;
// Flag whether there are any direct calls to native code registered
// for @CriticalNative methods.
bool has_direct_critical_native_call_;
+ // Flag whether the graph contains invokes that always throw.
+ bool has_always_throwing_invokes_;
+
// Is the code known to be robust against eliminating dead references
// and the effects of early finalization? If false, dead reference variables
// are kept if they might be visible to the garbage collector.
@@ -1291,7 +1306,7 @@ class HBasicBlock : public ArenaObject<kArenaAllocBasicBlock> {
// graph, create a Goto at the end of the former block and will create an edge
// between the blocks. It will not, however, update the reverse post order or
// loop and try/catch information.
- HBasicBlock* SplitBefore(HInstruction* cursor);
+ HBasicBlock* SplitBefore(HInstruction* cursor, bool require_graph_not_in_ssa_form = true);
// Split the block into two blocks just before `cursor`. Returns the newly
// created block. Note that this method just updates raw block information,
@@ -1332,6 +1347,20 @@ class HBasicBlock : public ArenaObject<kArenaAllocBasicBlock> {
// are safely updated.
void DisconnectAndDelete();
+ // Disconnects `this` from all its successors and updates their phis, if the successors have them.
+ // If `visited` is provided, it will use the information to know if a successor is reachable and
+ // skip updating those phis.
+ void DisconnectFromSuccessors(const ArenaBitVector* visited = nullptr);
+
+ // Removes the catch phi uses of the instructions in `this`, and then remove the instruction
+ // itself. If `building_dominator_tree` is true, it will not remove the instruction as user, since
+ // we do it in a previous step. This is a special case for building up the dominator tree: we want
+ // to eliminate uses before inputs but we don't have domination information, so we remove all
+ // connections from input/uses first before removing any instruction.
+ // This method assumes the instructions have been removed from all users with the exception of
+ // catch phis because of missing exceptional edges in the graph.
+ void RemoveCatchPhiUsesAndInstruction(bool building_dominator_tree);
+
void AddInstruction(HInstruction* instruction);
// Insert `instruction` before/after an existing instruction `cursor`.
void InsertInstructionBefore(HInstruction* instruction, HInstruction* cursor);
@@ -1540,10 +1569,10 @@ class HLoopInformationOutwardIterator : public ValueObject {
M(Min, BinaryOperation) \
M(MonitorOperation, Instruction) \
M(Mul, BinaryOperation) \
- M(NativeDebugInfo, Instruction) \
M(Neg, UnaryOperation) \
M(NewArray, Instruction) \
M(NewInstance, Instruction) \
+ M(Nop, Instruction) \
M(Not, UnaryOperation) \
M(NotEqual, Condition) \
M(NullConstant, Instruction) \
@@ -2348,7 +2377,10 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
return GetType() == DataType::Type::kReference;
}
+ // Sets the ReferenceTypeInfo. The RTI must be valid.
void SetReferenceTypeInfo(ReferenceTypeInfo rti);
+ // Same as above, but we only set it if it's valid. Otherwise, we don't change the current RTI.
+ void SetReferenceTypeInfoIfValid(ReferenceTypeInfo rti);
ReferenceTypeInfo GetReferenceTypeInfo() const {
DCHECK_EQ(GetType(), DataType::Type::kReference);
@@ -2408,7 +2440,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
!CanThrow() &&
!IsSuspendCheck() &&
!IsControlFlow() &&
- !IsNativeDebugInfo() &&
+ !IsNop() &&
!IsParameterValue() &&
// If we added an explicit barrier then we should keep it.
!IsMemoryBarrier() &&
@@ -2419,9 +2451,12 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
return IsRemovable() && !HasUses();
}
- // Does this instruction strictly dominate `other_instruction`?
- // Returns false if this instruction and `other_instruction` are the same.
- // Aborts if this instruction and `other_instruction` are both phis.
+ // Does this instruction dominate `other_instruction`?
+ // Aborts if this instruction and `other_instruction` are different phis.
+ bool Dominates(HInstruction* other_instruction) const;
+
+ // Same but with `strictly dominates` i.e. returns false if this instruction and
+ // `other_instruction` are the same.
bool StrictlyDominates(HInstruction* other_instruction) const;
int GetId() const { return id_; }
@@ -2486,7 +2521,9 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
void SetLocations(LocationSummary* locations) { locations_ = locations; }
void ReplaceWith(HInstruction* instruction);
- void ReplaceUsesDominatedBy(HInstruction* dominator, HInstruction* replacement);
+ void ReplaceUsesDominatedBy(HInstruction* dominator,
+ HInstruction* replacement,
+ bool strictly_dominated = true);
void ReplaceEnvUsesDominatedBy(HInstruction* dominator, HInstruction* replacement);
void ReplaceInput(HInstruction* replacement, size_t index);
@@ -3730,7 +3767,7 @@ class HClassTableGet final : public HExpression<1> {
static constexpr size_t kNumberOfClassTableGetPackedBits = kFieldTableKind + kFieldTableKindSize;
static_assert(kNumberOfClassTableGetPackedBits <= kMaxNumberOfPackedBits,
"Too many packed fields.");
- using TableKindField = BitField<TableKind, kFieldTableKind, kFieldTableKind>;
+ using TableKindField = BitField<TableKind, kFieldTableKind, kFieldTableKindSize>;
// The index of the ArtMethod in the table.
const size_t index_;
@@ -4700,7 +4737,7 @@ class HInvoke : public HVariableInputSizeInstruction {
void SetAlwaysThrows(bool always_throws) { SetPackedFlag<kFlagAlwaysThrows>(always_throws); }
- bool AlwaysThrows() const override { return GetPackedFlag<kFlagAlwaysThrows>(); }
+ bool AlwaysThrows() const override final { return GetPackedFlag<kFlagAlwaysThrows>(); }
bool CanBeMoved() const override { return IsIntrinsic() && !DoesAnyWrite(); }
@@ -4719,7 +4756,7 @@ class HInvoke : public HVariableInputSizeInstruction {
bool IsIntrinsic() const { return intrinsic_ != Intrinsics::kNone; }
ArtMethod* GetResolvedMethod() const { return resolved_method_; }
- void SetResolvedMethod(ArtMethod* method);
+ void SetResolvedMethod(ArtMethod* method, bool enable_intrinsic_opt);
MethodReference GetMethodReference() const { return method_reference_; }
@@ -4748,7 +4785,8 @@ class HInvoke : public HVariableInputSizeInstruction {
MethodReference method_reference,
ArtMethod* resolved_method,
MethodReference resolved_method_reference,
- InvokeType invoke_type)
+ InvokeType invoke_type,
+ bool enable_intrinsic_opt)
: HVariableInputSizeInstruction(
kind,
return_type,
@@ -4764,7 +4802,7 @@ class HInvoke : public HVariableInputSizeInstruction {
intrinsic_optimizations_(0) {
SetPackedField<InvokeTypeField>(invoke_type);
SetPackedFlag<kFlagCanThrow>(true);
- SetResolvedMethod(resolved_method);
+ SetResolvedMethod(resolved_method, enable_intrinsic_opt);
}
DEFAULT_COPY_CONSTRUCTOR(Invoke);
@@ -4797,7 +4835,8 @@ class HInvokeUnresolved final : public HInvoke {
method_reference,
nullptr,
MethodReference(nullptr, 0u),
- invoke_type) {
+ invoke_type,
+ /* enable_intrinsic_opt= */ false) {
}
bool IsClonable() const override { return true; }
@@ -4820,7 +4859,8 @@ class HInvokePolymorphic final : public HInvoke {
// to pass intrinsic information to the HInvokePolymorphic node.
ArtMethod* resolved_method,
MethodReference resolved_method_reference,
- dex::ProtoIndex proto_idx)
+ dex::ProtoIndex proto_idx,
+ bool enable_intrinsic_opt)
: HInvoke(kInvokePolymorphic,
allocator,
number_of_arguments,
@@ -4830,7 +4870,8 @@ class HInvokePolymorphic final : public HInvoke {
method_reference,
resolved_method,
resolved_method_reference,
- kPolymorphic),
+ kPolymorphic,
+ enable_intrinsic_opt),
proto_idx_(proto_idx) {
}
@@ -4852,7 +4893,8 @@ class HInvokeCustom final : public HInvoke {
uint32_t call_site_index,
DataType::Type return_type,
uint32_t dex_pc,
- MethodReference method_reference)
+ MethodReference method_reference,
+ bool enable_intrinsic_opt)
: HInvoke(kInvokeCustom,
allocator,
number_of_arguments,
@@ -4862,7 +4904,8 @@ class HInvokeCustom final : public HInvoke {
method_reference,
/* resolved_method= */ nullptr,
MethodReference(nullptr, 0u),
- kStatic),
+ kStatic,
+ enable_intrinsic_opt),
call_site_index_(call_site_index) {
}
@@ -4909,7 +4952,8 @@ class HInvokeStaticOrDirect final : public HInvoke {
DispatchInfo dispatch_info,
InvokeType invoke_type,
MethodReference resolved_method_reference,
- ClinitCheckRequirement clinit_check_requirement)
+ ClinitCheckRequirement clinit_check_requirement,
+ bool enable_intrinsic_opt)
: HInvoke(kInvokeStaticOrDirect,
allocator,
number_of_arguments,
@@ -4922,7 +4966,8 @@ class HInvokeStaticOrDirect final : public HInvoke {
method_reference,
resolved_method,
resolved_method_reference,
- invoke_type),
+ invoke_type,
+ enable_intrinsic_opt),
dispatch_info_(dispatch_info) {
SetPackedField<ClinitCheckRequirementField>(clinit_check_requirement);
}
@@ -5134,7 +5179,8 @@ class HInvokeVirtual final : public HInvoke {
MethodReference method_reference,
ArtMethod* resolved_method,
MethodReference resolved_method_reference,
- uint32_t vtable_index)
+ uint32_t vtable_index,
+ bool enable_intrinsic_opt)
: HInvoke(kInvokeVirtual,
allocator,
number_of_arguments,
@@ -5144,7 +5190,8 @@ class HInvokeVirtual final : public HInvoke {
method_reference,
resolved_method,
resolved_method_reference,
- kVirtual),
+ kVirtual,
+ enable_intrinsic_opt),
vtable_index_(vtable_index) {
}
@@ -5196,7 +5243,8 @@ class HInvokeInterface final : public HInvoke {
ArtMethod* resolved_method,
MethodReference resolved_method_reference,
uint32_t imt_index,
- MethodLoadKind load_kind)
+ MethodLoadKind load_kind,
+ bool enable_intrinsic_opt)
: HInvoke(kInvokeInterface,
allocator,
number_of_arguments + (NeedsCurrentMethod(load_kind) ? 1 : 0),
@@ -5206,7 +5254,8 @@ class HInvokeInterface final : public HInvoke {
method_reference,
resolved_method,
resolved_method_reference,
- kInterface),
+ kInterface,
+ enable_intrinsic_opt),
imt_index_(imt_index),
hidden_argument_load_kind_(load_kind) {
}
@@ -5321,7 +5370,7 @@ class HNewArray final : public HExpression<2> {
kFieldComponentSizeShift + kFieldComponentSizeShiftSize;
static_assert(kNumberOfNewArrayPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
using ComponentSizeShiftField =
- BitField<size_t, kFieldComponentSizeShift, kFieldComponentSizeShift>;
+ BitField<size_t, kFieldComponentSizeShift, kFieldComponentSizeShiftSize>;
};
class HAdd final : public HBinaryOperation {
@@ -6362,6 +6411,27 @@ class HPredicatedInstanceFieldGet final : public HExpression<2> {
const FieldInfo field_info_;
};
+enum class WriteBarrierKind {
+ // Emit the write barrier, with a runtime optimization which checks if the value that it is being
+ // set is null.
+ kEmitWithNullCheck,
+ // Emit the write barrier, without the runtime null check optimization. This could be set because:
+ // A) It is a write barrier for an ArraySet (which does the optimization with the type check, so
+ // it never does the optimization at the write barrier stage)
+ // B) We know that the input can't be null
+ // C) This write barrier is actually several write barriers coalesced into one. Potentially we
+ // could ask if every value is null for a runtime optimization at the cost of compile time / code
+ // size. At the time of writing it was deemed not worth the effort.
+ kEmitNoNullCheck,
+ // Skip emitting the write barrier. This could be set because:
+ // A) The write barrier is not needed (e.g. it is not a reference, or the value is the null
+ // constant)
+ // B) This write barrier was coalesced into another one so there's no need to emit it.
+ kDontEmit,
+ kLast = kDontEmit
+};
+std::ostream& operator<<(std::ostream& os, WriteBarrierKind rhs);
+
class HInstanceFieldSet final : public HExpression<2> {
public:
HInstanceFieldSet(HInstruction* object,
@@ -6386,6 +6456,7 @@ class HInstanceFieldSet final : public HExpression<2> {
dex_file) {
SetPackedFlag<kFlagValueCanBeNull>(true);
SetPackedFlag<kFlagIsPredicatedSet>(false);
+ SetPackedField<WriteBarrierKindField>(WriteBarrierKind::kEmitWithNullCheck);
SetRawInputAt(0, object);
SetRawInputAt(1, value);
}
@@ -6406,6 +6477,12 @@ class HInstanceFieldSet final : public HExpression<2> {
void ClearValueCanBeNull() { SetPackedFlag<kFlagValueCanBeNull>(false); }
bool GetIsPredicatedSet() const { return GetPackedFlag<kFlagIsPredicatedSet>(); }
void SetIsPredicatedSet(bool value = true) { SetPackedFlag<kFlagIsPredicatedSet>(value); }
+ WriteBarrierKind GetWriteBarrierKind() { return GetPackedField<WriteBarrierKindField>(); }
+ void SetWriteBarrierKind(WriteBarrierKind kind) {
+ DCHECK(kind != WriteBarrierKind::kEmitWithNullCheck)
+ << "We shouldn't go back to the original value.";
+ SetPackedField<WriteBarrierKindField>(kind);
+ }
DECLARE_INSTRUCTION(InstanceFieldSet);
@@ -6415,11 +6492,17 @@ class HInstanceFieldSet final : public HExpression<2> {
private:
static constexpr size_t kFlagValueCanBeNull = kNumberOfGenericPackedBits;
static constexpr size_t kFlagIsPredicatedSet = kFlagValueCanBeNull + 1;
- static constexpr size_t kNumberOfInstanceFieldSetPackedBits = kFlagIsPredicatedSet + 1;
+ static constexpr size_t kWriteBarrierKind = kFlagIsPredicatedSet + 1;
+ static constexpr size_t kWriteBarrierKindSize =
+ MinimumBitsToStore(static_cast<size_t>(WriteBarrierKind::kLast));
+ static constexpr size_t kNumberOfInstanceFieldSetPackedBits =
+ kWriteBarrierKind + kWriteBarrierKindSize;
static_assert(kNumberOfInstanceFieldSetPackedBits <= kMaxNumberOfPackedBits,
"Too many packed fields.");
const FieldInfo field_info_;
+ using WriteBarrierKindField =
+ BitField<WriteBarrierKind, kWriteBarrierKind, kWriteBarrierKindSize>;
};
class HArrayGet final : public HExpression<2> {
@@ -6540,6 +6623,8 @@ class HArraySet final : public HExpression<3> {
SetPackedFlag<kFlagNeedsTypeCheck>(value->GetType() == DataType::Type::kReference);
SetPackedFlag<kFlagValueCanBeNull>(true);
SetPackedFlag<kFlagStaticTypeOfArrayIsObjectArray>(false);
+ // ArraySets never do the null check optimization at the write barrier stage.
+ SetPackedField<WriteBarrierKindField>(WriteBarrierKind::kEmitNoNullCheck);
SetRawInputAt(0, array);
SetRawInputAt(1, index);
SetRawInputAt(2, value);
@@ -6560,8 +6645,10 @@ class HArraySet final : public HExpression<3> {
return false;
}
- void ClearNeedsTypeCheck() {
+ void ClearTypeCheck() {
SetPackedFlag<kFlagNeedsTypeCheck>(false);
+ // Clear the `CanTriggerGC` flag too as we can only trigger a GC when doing a type check.
+ SetSideEffects(GetSideEffects().Exclusion(SideEffects::CanTriggerGC()));
}
void ClearValueCanBeNull() {
@@ -6610,6 +6697,16 @@ class HArraySet final : public HExpression<3> {
: SideEffects::None();
}
+ WriteBarrierKind GetWriteBarrierKind() { return GetPackedField<WriteBarrierKindField>(); }
+
+ void SetWriteBarrierKind(WriteBarrierKind kind) {
+ DCHECK(kind != WriteBarrierKind::kEmitNoNullCheck)
+ << "We shouldn't go back to the original value.";
+ DCHECK(kind != WriteBarrierKind::kEmitWithNullCheck)
+ << "We never do the null check optimization for ArraySets.";
+ SetPackedField<WriteBarrierKindField>(kind);
+ }
+
DECLARE_INSTRUCTION(ArraySet);
protected:
@@ -6625,11 +6722,16 @@ class HArraySet final : public HExpression<3> {
// Cached information for the reference_type_info_ so that codegen
// does not need to inspect the static type.
static constexpr size_t kFlagStaticTypeOfArrayIsObjectArray = kFlagValueCanBeNull + 1;
- static constexpr size_t kNumberOfArraySetPackedBits =
- kFlagStaticTypeOfArrayIsObjectArray + 1;
+ static constexpr size_t kWriteBarrierKind = kFlagStaticTypeOfArrayIsObjectArray + 1;
+ static constexpr size_t kWriteBarrierKindSize =
+ MinimumBitsToStore(static_cast<size_t>(WriteBarrierKind::kLast));
+ static constexpr size_t kNumberOfArraySetPackedBits = kWriteBarrierKind + kWriteBarrierKindSize;
static_assert(kNumberOfArraySetPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
using ExpectedComponentTypeField =
BitField<DataType::Type, kFieldExpectedComponentType, kFieldExpectedComponentTypeSize>;
+
+ using WriteBarrierKindField =
+ BitField<WriteBarrierKind, kWriteBarrierKind, kWriteBarrierKindSize>;
};
class HArrayLength final : public HExpression<1> {
@@ -6714,9 +6816,10 @@ class HBoundsCheck final : public HExpression<2> {
class HSuspendCheck final : public HExpression<0> {
public:
- explicit HSuspendCheck(uint32_t dex_pc = kNoDexPc)
+ explicit HSuspendCheck(uint32_t dex_pc = kNoDexPc, bool is_no_op = false)
: HExpression(kSuspendCheck, SideEffects::CanTriggerGC(), dex_pc),
slow_path_(nullptr) {
+ SetPackedFlag<kFlagIsNoOp>(is_no_op);
}
bool IsClonable() const override { return true; }
@@ -6725,6 +6828,10 @@ class HSuspendCheck final : public HExpression<0> {
return true;
}
+ void SetIsNoOp(bool is_no_op) { SetPackedFlag<kFlagIsNoOp>(is_no_op); }
+ bool IsNoOp() const { return GetPackedFlag<kFlagIsNoOp>(); }
+
+
void SetSlowPath(SlowPathCode* slow_path) { slow_path_ = slow_path; }
SlowPathCode* GetSlowPath() const { return slow_path_; }
@@ -6733,28 +6840,42 @@ class HSuspendCheck final : public HExpression<0> {
protected:
DEFAULT_COPY_CONSTRUCTOR(SuspendCheck);
+ // True if the HSuspendCheck should not emit any code during codegen. It is
+ // not possible to simply remove this instruction to disable codegen, as
+ // other optimizations (e.g: CHAGuardVisitor::HoistGuard) depend on
+ // HSuspendCheck being present in every loop.
+ static constexpr size_t kFlagIsNoOp = kNumberOfGenericPackedBits;
+ static constexpr size_t kNumberOfSuspendCheckPackedBits = kFlagIsNoOp + 1;
+ static_assert(kNumberOfSuspendCheckPackedBits <= HInstruction::kMaxNumberOfPackedBits,
+ "Too many packed fields.");
+
private:
// Only used for code generation, in order to share the same slow path between back edges
// of a same loop.
SlowPathCode* slow_path_;
};
-// Pseudo-instruction which provides the native debugger with mapping information.
-// It ensures that we can generate line number and local variables at this point.
-class HNativeDebugInfo : public HExpression<0> {
+// Pseudo-instruction which doesn't generate any code.
+// If `emit_environment` is true, it can be used to generate an environment. It is used, for
+// example, to provide the native debugger with mapping information. It ensures that we can generate
+// line number and local variables at this point.
+class HNop : public HExpression<0> {
public:
- explicit HNativeDebugInfo(uint32_t dex_pc)
- : HExpression<0>(kNativeDebugInfo, SideEffects::None(), dex_pc) {
+ explicit HNop(uint32_t dex_pc, bool needs_environment)
+ : HExpression<0>(kNop, SideEffects::None(), dex_pc), needs_environment_(needs_environment) {
}
bool NeedsEnvironment() const override {
- return true;
+ return needs_environment_;
}
- DECLARE_INSTRUCTION(NativeDebugInfo);
+ DECLARE_INSTRUCTION(Nop);
protected:
- DEFAULT_COPY_CONSTRUCTOR(NativeDebugInfo);
+ DEFAULT_COPY_CONSTRUCTOR(Nop);
+
+ private:
+ bool needs_environment_;
};
/**
@@ -7222,6 +7343,10 @@ class HLoadMethodHandle final : public HInstruction {
return SideEffects::CanTriggerGC();
}
+ bool CanThrow() const override { return true; }
+
+ bool NeedsEnvironment() const override { return true; }
+
DECLARE_INSTRUCTION(LoadMethodHandle);
protected:
@@ -7266,6 +7391,10 @@ class HLoadMethodType final : public HInstruction {
return SideEffects::CanTriggerGC();
}
+ bool CanThrow() const override { return true; }
+
+ bool NeedsEnvironment() const override { return true; }
+
DECLARE_INSTRUCTION(LoadMethodType);
protected:
@@ -7400,6 +7529,7 @@ class HStaticFieldSet final : public HExpression<2> {
declaring_class_def_index,
dex_file) {
SetPackedFlag<kFlagValueCanBeNull>(true);
+ SetPackedField<WriteBarrierKindField>(WriteBarrierKind::kEmitWithNullCheck);
SetRawInputAt(0, cls);
SetRawInputAt(1, value);
}
@@ -7415,6 +7545,13 @@ class HStaticFieldSet final : public HExpression<2> {
bool GetValueCanBeNull() const { return GetPackedFlag<kFlagValueCanBeNull>(); }
void ClearValueCanBeNull() { SetPackedFlag<kFlagValueCanBeNull>(false); }
+ WriteBarrierKind GetWriteBarrierKind() { return GetPackedField<WriteBarrierKindField>(); }
+ void SetWriteBarrierKind(WriteBarrierKind kind) {
+ DCHECK(kind != WriteBarrierKind::kEmitWithNullCheck)
+ << "We shouldn't go back to the original value.";
+ SetPackedField<WriteBarrierKindField>(kind);
+ }
+
DECLARE_INSTRUCTION(StaticFieldSet);
protected:
@@ -7422,25 +7559,34 @@ class HStaticFieldSet final : public HExpression<2> {
private:
static constexpr size_t kFlagValueCanBeNull = kNumberOfGenericPackedBits;
- static constexpr size_t kNumberOfStaticFieldSetPackedBits = kFlagValueCanBeNull + 1;
+ static constexpr size_t kWriteBarrierKind = kFlagValueCanBeNull + 1;
+ static constexpr size_t kWriteBarrierKindSize =
+ MinimumBitsToStore(static_cast<size_t>(WriteBarrierKind::kLast));
+ static constexpr size_t kNumberOfStaticFieldSetPackedBits =
+ kWriteBarrierKind + kWriteBarrierKindSize;
static_assert(kNumberOfStaticFieldSetPackedBits <= kMaxNumberOfPackedBits,
"Too many packed fields.");
const FieldInfo field_info_;
+ using WriteBarrierKindField =
+ BitField<WriteBarrierKind, kWriteBarrierKind, kWriteBarrierKindSize>;
};
class HStringBuilderAppend final : public HVariableInputSizeInstruction {
public:
HStringBuilderAppend(HIntConstant* format,
uint32_t number_of_arguments,
+ bool has_fp_args,
ArenaAllocator* allocator,
uint32_t dex_pc)
: HVariableInputSizeInstruction(
kStringBuilderAppend,
DataType::Type::kReference,
- // The runtime call may read memory from inputs. It never writes outside
- // of the newly allocated result object (or newly allocated helper objects).
- SideEffects::AllReads().Union(SideEffects::CanTriggerGC()),
+ SideEffects::CanTriggerGC().Union(
+ // The runtime call may read memory from inputs. It never writes outside
+ // of the newly allocated result object or newly allocated helper objects,
+ // except for float/double arguments where we reuse thread-local helper objects.
+ has_fp_args ? SideEffects::AllWritesAndReads() : SideEffects::AllReads()),
dex_pc,
allocator,
number_of_arguments + /* format */ 1u,
@@ -8393,7 +8539,7 @@ class HIntermediateAddress final : public HExpression<2> {
#include "nodes_x86.h"
#endif
-namespace art {
+namespace art HIDDEN {
class OptimizingCompilerStats;
@@ -8457,7 +8603,7 @@ HInstruction* ReplaceInstrOrPhiByClone(HInstruction* instr);
// Create a clone for each clonable instructions/phis and replace the original with the clone.
//
// Used for testing individual instruction cloner.
-class CloneAndReplaceInstructionVisitor : public HGraphDelegateVisitor {
+class CloneAndReplaceInstructionVisitor final : public HGraphDelegateVisitor {
public:
explicit CloneAndReplaceInstructionVisitor(HGraph* graph)
: HGraphDelegateVisitor(graph), instr_replaced_by_clones_count_(0) {}
diff --git a/compiler/optimizing/nodes_shared.cc b/compiler/optimizing/nodes_shared.cc
index eca97d7a70..b3a7ad9a05 100644
--- a/compiler/optimizing/nodes_shared.cc
+++ b/compiler/optimizing/nodes_shared.cc
@@ -23,7 +23,7 @@
#include "instruction_simplifier_shared.h"
-namespace art {
+namespace art HIDDEN {
using helpers::CanFitInShifterOperand;
diff --git a/compiler/optimizing/nodes_shared.h b/compiler/optimizing/nodes_shared.h
index 7dcac1787e..27e610328f 100644
--- a/compiler/optimizing/nodes_shared.h
+++ b/compiler/optimizing/nodes_shared.h
@@ -22,7 +22,7 @@
// (defining `HInstruction` and co).
#include "nodes.h"
-namespace art {
+namespace art HIDDEN {
class HMultiplyAccumulate final : public HExpression<3> {
public:
diff --git a/compiler/optimizing/nodes_test.cc b/compiler/optimizing/nodes_test.cc
index 34f0e9b1e1..29210fe10f 100644
--- a/compiler/optimizing/nodes_test.cc
+++ b/compiler/optimizing/nodes_test.cc
@@ -17,11 +17,12 @@
#include "nodes.h"
#include "base/arena_allocator.h"
+#include "base/macros.h"
#include "optimizing_unit_test.h"
#include "gtest/gtest.h"
-namespace art {
+namespace art HIDDEN {
class NodeTest : public OptimizingUnitTest {};
diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h
index a2cd86dc33..73f6c40a0d 100644
--- a/compiler/optimizing/nodes_vector.h
+++ b/compiler/optimizing/nodes_vector.h
@@ -21,7 +21,7 @@
// is included in the header file nodes.h itself. However it gives editing tools better context.
#include "nodes.h"
-namespace art {
+namespace art HIDDEN {
// Memory alignment, represented as an offset relative to a base, where 0 <= offset < base,
// and base is a power of two. For example, the value Alignment(16, 0) means memory is
diff --git a/compiler/optimizing/nodes_vector_test.cc b/compiler/optimizing/nodes_vector_test.cc
index b0a665d704..e0a48db84f 100644
--- a/compiler/optimizing/nodes_vector_test.cc
+++ b/compiler/optimizing/nodes_vector_test.cc
@@ -15,10 +15,11 @@
*/
#include "base/arena_allocator.h"
+#include "base/macros.h"
#include "nodes.h"
#include "optimizing_unit_test.h"
-namespace art {
+namespace art HIDDEN {
/**
* Fixture class for testing vector nodes.
diff --git a/compiler/optimizing/nodes_x86.h b/compiler/optimizing/nodes_x86.h
index 8e8fbc1581..e246390aa5 100644
--- a/compiler/optimizing/nodes_x86.h
+++ b/compiler/optimizing/nodes_x86.h
@@ -17,7 +17,7 @@
#ifndef ART_COMPILER_OPTIMIZING_NODES_X86_H_
#define ART_COMPILER_OPTIMIZING_NODES_X86_H_
-namespace art {
+namespace art HIDDEN {
// Compute the address of the method for X86 Constant area support.
class HX86ComputeBaseMethodAddress final : public HExpression<0> {
diff --git a/compiler/optimizing/optimization.cc b/compiler/optimizing/optimization.cc
index 2cac38b715..12e9a1046d 100644
--- a/compiler/optimizing/optimization.cc
+++ b/compiler/optimizing/optimization.cc
@@ -55,10 +55,11 @@
#include "select_generator.h"
#include "sharpening.h"
#include "side_effects_analysis.h"
+#include "write_barrier_elimination.h"
// Decide between default or alternative pass name.
-namespace art {
+namespace art HIDDEN {
const char* OptimizationPassName(OptimizationPass pass) {
switch (pass) {
@@ -76,6 +77,7 @@ const char* OptimizationPassName(OptimizationPass pass) {
return BoundsCheckElimination::kBoundsCheckEliminationPassName;
case OptimizationPass::kLoadStoreElimination:
return LoadStoreElimination::kLoadStoreEliminationPassName;
+ case OptimizationPass::kAggressiveConstantFolding:
case OptimizationPass::kConstantFolding:
return HConstantFolding::kConstantFoldingPassName;
case OptimizationPass::kDeadCodeElimination:
@@ -95,6 +97,8 @@ const char* OptimizationPassName(OptimizationPass pass) {
return ConstructorFenceRedundancyElimination::kCFREPassName;
case OptimizationPass::kScheduling:
return HInstructionScheduling::kInstructionSchedulingPassName;
+ case OptimizationPass::kWriteBarrierElimination:
+ return WriteBarrierElimination::kWBEPassName;
#ifdef ART_ENABLE_CODEGEN_arm
case OptimizationPass::kInstructionSimplifierArm:
return arm::InstructionSimplifierArm::kInstructionSimplifierArmPassName;
@@ -194,7 +198,8 @@ ArenaVector<HOptimization*> ConstructOptimizations(
opt = most_recent_side_effects = new (allocator) SideEffectsAnalysis(graph, pass_name);
break;
case OptimizationPass::kInductionVarAnalysis:
- opt = most_recent_induction = new (allocator) HInductionVarAnalysis(graph, pass_name);
+ opt = most_recent_induction =
+ new (allocator) HInductionVarAnalysis(graph, stats, pass_name);
break;
//
// Passes that need prior analysis.
@@ -221,7 +226,11 @@ ArenaVector<HOptimization*> ConstructOptimizations(
// Regular passes.
//
case OptimizationPass::kConstantFolding:
- opt = new (allocator) HConstantFolding(graph, pass_name);
+ opt = new (allocator) HConstantFolding(graph, stats, pass_name);
+ break;
+ case OptimizationPass::kAggressiveConstantFolding:
+ opt = new (allocator)
+ HConstantFolding(graph, stats, pass_name, /* use_all_optimizations_ = */ true);
break;
case OptimizationPass::kDeadCodeElimination:
opt = new (allocator) HDeadCodeElimination(graph, stats, pass_name);
@@ -239,6 +248,7 @@ ArenaVector<HOptimization*> ConstructOptimizations(
/* total_number_of_instructions= */ 0,
/* parent= */ nullptr,
/* depth= */ 0,
+ /* try_catch_inlining_allowed= */ true,
pass_name);
break;
}
@@ -267,6 +277,9 @@ ArenaVector<HOptimization*> ConstructOptimizations(
case OptimizationPass::kLoadStoreElimination:
opt = new (allocator) LoadStoreElimination(graph, stats, pass_name);
break;
+ case OptimizationPass::kWriteBarrierElimination:
+ opt = new (allocator) WriteBarrierElimination(graph, stats, pass_name);
+ break;
case OptimizationPass::kScheduling:
opt = new (allocator) HInstructionScheduling(
graph, codegen->GetCompilerOptions().GetInstructionSet(), codegen, pass_name);
diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h
index 2113df0c81..134e3cdc7a 100644
--- a/compiler/optimizing/optimization.h
+++ b/compiler/optimizing/optimization.h
@@ -18,10 +18,11 @@
#define ART_COMPILER_OPTIMIZING_OPTIMIZATION_H_
#include "base/arena_object.h"
+#include "base/macros.h"
#include "nodes.h"
#include "optimizing_compiler_stats.h"
-namespace art {
+namespace art HIDDEN {
class CodeGenerator;
class DexCompilationUnit;
@@ -42,7 +43,7 @@ class HOptimization : public ArenaObject<kArenaAllocOptimization> {
// Return the name of the pass. Pass names for a single HOptimization should be of form
// <optimization_name> or <optimization_name>$<pass_name> for common <optimization_name> prefix.
- // Example: 'instruction_simplifier', 'instruction_simplifier$after_bce',
+ // Example: 'instruction_simplifier', 'instruction_simplifier$before_codegen',
// 'instruction_simplifier$before_codegen'.
const char* GetPassName() const { return pass_name_; }
@@ -66,6 +67,7 @@ class HOptimization : public ArenaObject<kArenaAllocOptimization> {
// field is preferred over a string lookup at places where performance matters.
// TODO: generate this table and lookup methods below automatically?
enum class OptimizationPass {
+ kAggressiveConstantFolding,
kAggressiveInstructionSimplifier,
kBoundsCheckElimination,
kCHAGuardOptimization,
@@ -83,6 +85,7 @@ enum class OptimizationPass {
kScheduling,
kSelectGenerator,
kSideEffectsAnalysis,
+ kWriteBarrierElimination,
#ifdef ART_ENABLE_CODEGEN_arm
kInstructionSimplifierArm,
kCriticalNativeAbiFixupArm,
diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc
index bad540e03c..f12e748941 100644
--- a/compiler/optimizing/optimizing_cfi_test.cc
+++ b/compiler/optimizing/optimizing_cfi_test.cc
@@ -18,6 +18,7 @@
#include <vector>
#include "arch/instruction_set.h"
+#include "base/macros.h"
#include "base/runtime_debug.h"
#include "cfi_test.h"
#include "driver/compiler_options.h"
@@ -32,7 +33,7 @@
namespace vixl32 = vixl::aarch32;
-namespace art {
+namespace art HIDDEN {
// Run the tests only on host.
#ifndef ART_TARGET_ANDROID
@@ -167,9 +168,20 @@ TEST_ISA(kThumb2)
// barrier configuration, and as such is removed from the set of
// callee-save registers in the ARM64 code generator of the Optimizing
// compiler.
-#if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
-TEST_ISA(kArm64)
-#endif
+//
+// We can't use compile-time macros for read-barrier as the introduction
+// of userfaultfd-GC has made it a runtime choice.
+TEST_F(OptimizingCFITest, kArm64) {
+ if (kUseBakerReadBarrier && gUseReadBarrier) {
+ std::vector<uint8_t> expected_asm(
+ expected_asm_kArm64,
+ expected_asm_kArm64 + arraysize(expected_asm_kArm64));
+ std::vector<uint8_t> expected_cfi(
+ expected_cfi_kArm64,
+ expected_cfi_kArm64 + arraysize(expected_cfi_kArm64));
+ TestImpl(InstructionSet::kArm64, "kArm64", expected_asm, expected_cfi);
+ }
+}
#endif
#ifdef ART_ENABLE_CODEGEN_x86
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 6eb3d01e42..00eb6e5c42 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -33,12 +33,11 @@
#include "base/timing_logger.h"
#include "builder.h"
#include "code_generator.h"
-#include "compiled_method.h"
#include "compiler.h"
#include "debug/elf_debug_writer.h"
#include "debug/method_debug_info.h"
#include "dex/dex_file_types.h"
-#include "driver/compiled_method_storage.h"
+#include "driver/compiled_code_storage.h"
#include "driver/compiler_options.h"
#include "driver/dex_compilation_unit.h"
#include "graph_checker.h"
@@ -52,6 +51,7 @@
#include "linker/linker_patch.h"
#include "nodes.h"
#include "oat_quick_method_header.h"
+#include "optimizing/write_barrier_elimination.h"
#include "prepare_for_register_allocation.h"
#include "reference_type_propagation.h"
#include "register_allocator_linear_scan.h"
@@ -62,7 +62,7 @@
#include "stack_map_stream.h"
#include "utils/assembler.h"
-namespace art {
+namespace art HIDDEN {
static constexpr size_t kArenaAllocatorMemoryReportThreshold = 8 * MB;
@@ -269,7 +269,7 @@ class PassScope : public ValueObject {
class OptimizingCompiler final : public Compiler {
public:
explicit OptimizingCompiler(const CompilerOptions& compiler_options,
- CompiledMethodStorage* storage);
+ CompiledCodeStorage* storage);
~OptimizingCompiler() override;
bool CanCompileMethod(uint32_t method_idx, const DexFile& dex_file) const override;
@@ -359,11 +359,11 @@ class OptimizingCompiler final : public Compiler {
const DexCompilationUnit& dex_compilation_unit,
PassObserver* pass_observer) const;
- private:
// Create a 'CompiledMethod' for an optimized graph.
CompiledMethod* Emit(ArenaAllocator* allocator,
CodeVectorAllocator* code_allocator,
CodeGenerator* codegen,
+ bool is_intrinsic,
const dex::CodeItem* item) const;
// Try compiling a method and return the code generator used for
@@ -413,7 +413,7 @@ class OptimizingCompiler final : public Compiler {
static const int kMaximumCompilationTimeBeforeWarning = 100; /* ms */
OptimizingCompiler::OptimizingCompiler(const CompilerOptions& compiler_options,
- CompiledMethodStorage* storage)
+ CompiledCodeStorage* storage)
: Compiler(compiler_options, storage, kMaximumCompilationTimeBeforeWarning) {
// Enable C1visualizer output.
const std::string& cfg_file_name = compiler_options.GetDumpCfgFileName();
@@ -568,6 +568,9 @@ bool OptimizingCompiler::RunArchOptimizations(HGraph* graph,
}
#endif
default:
+ UNUSED(graph);
+ UNUSED(dex_compilation_unit);
+ UNUSED(pass_observer);
return false;
}
}
@@ -653,7 +656,7 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph,
OptDef(OptimizationPass::kGlobalValueNumbering),
// Simplification (TODO: only if GVN occurred).
OptDef(OptimizationPass::kSelectGenerator),
- OptDef(OptimizationPass::kConstantFolding,
+ OptDef(OptimizationPass::kAggressiveConstantFolding,
"constant_folding$after_gvn"),
OptDef(OptimizationPass::kInstructionSimplifier,
"instruction_simplifier$after_gvn"),
@@ -668,20 +671,27 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph,
OptDef(OptimizationPass::kLoopOptimization),
// Simplification.
OptDef(OptimizationPass::kConstantFolding,
- "constant_folding$after_bce"),
+ "constant_folding$after_loop_opt"),
OptDef(OptimizationPass::kAggressiveInstructionSimplifier,
- "instruction_simplifier$after_bce"),
+ "instruction_simplifier$after_loop_opt"),
+ OptDef(OptimizationPass::kDeadCodeElimination,
+ "dead_code_elimination$after_loop_opt"),
// Other high-level optimizations.
OptDef(OptimizationPass::kLoadStoreElimination),
OptDef(OptimizationPass::kCHAGuardOptimization),
- OptDef(OptimizationPass::kDeadCodeElimination,
- "dead_code_elimination$final"),
OptDef(OptimizationPass::kCodeSinking),
+ // Simplification.
+ OptDef(OptimizationPass::kConstantFolding,
+ "constant_folding$before_codegen"),
// The codegen has a few assumptions that only the instruction simplifier
// can satisfy. For example, the code generator does not expect to see a
// HTypeConversion from a type to the same type.
OptDef(OptimizationPass::kAggressiveInstructionSimplifier,
"instruction_simplifier$before_codegen"),
+ // Simplification may result in dead code that should be removed prior to
+ // code generation.
+ OptDef(OptimizationPass::kDeadCodeElimination,
+ "dead_code_elimination$before_codegen"),
// Eliminate constructor fences after code sinking to avoid
// complicated sinking logic to split a fence with many inputs.
OptDef(OptimizationPass::kConstructorFenceRedundancyElimination)
@@ -711,18 +721,19 @@ static ArenaVector<linker::LinkerPatch> EmitAndSortLinkerPatches(CodeGenerator*
CompiledMethod* OptimizingCompiler::Emit(ArenaAllocator* allocator,
CodeVectorAllocator* code_allocator,
CodeGenerator* codegen,
+ bool is_intrinsic,
const dex::CodeItem* code_item_for_osr_check) const {
ArenaVector<linker::LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen);
ScopedArenaVector<uint8_t> stack_map = codegen->BuildStackMaps(code_item_for_osr_check);
- CompiledMethodStorage* storage = GetCompiledMethodStorage();
- CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod(
- storage,
+ CompiledCodeStorage* storage = GetCompiledCodeStorage();
+ CompiledMethod* compiled_method = storage->CreateCompiledMethod(
codegen->GetInstructionSet(),
code_allocator->GetMemory(),
ArrayRef<const uint8_t>(stack_map),
ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()),
- ArrayRef<const linker::LinkerPatch>(linker_patches));
+ ArrayRef<const linker::LinkerPatch>(linker_patches),
+ is_intrinsic);
for (const linker::LinkerPatch& patch : linker_patches) {
if (codegen->NeedsThunkCode(patch) && storage->GetThunkCode(patch).empty()) {
@@ -891,6 +902,8 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator,
RunBaselineOptimizations(graph, codegen.get(), dex_compilation_unit, &pass_observer);
} else {
RunOptimizations(graph, codegen.get(), dex_compilation_unit, &pass_observer);
+ PassScope scope(WriteBarrierElimination::kWBEPassName, &pass_observer);
+ WriteBarrierElimination(graph, compilation_stats_.get()).Run();
}
RegisterAllocator::Strategy regalloc_strategy =
@@ -984,6 +997,10 @@ CodeGenerator* OptimizingCompiler::TryCompileIntrinsic(
optimizations);
RunArchOptimizations(graph, codegen.get(), dex_compilation_unit, &pass_observer);
+ {
+ PassScope scope(WriteBarrierElimination::kWBEPassName, &pass_observer);
+ WriteBarrierElimination(graph, compilation_stats_.get()).Run();
+ }
AllocateRegisters(graph,
codegen.get(),
@@ -1079,10 +1096,8 @@ CompiledMethod* OptimizingCompiler::Compile(const dex::CodeItem* code_item,
compiled_method = Emit(&allocator,
&code_allocator,
codegen.get(),
+ compiled_intrinsic,
compiled_intrinsic ? nullptr : code_item);
- if (compiled_intrinsic) {
- compiled_method->MarkAsIntrinsic();
- }
if (kArenaAllocatorCountAllocations) {
codegen.reset(); // Release codegen's ScopedArenaAllocator for memory accounting.
@@ -1115,17 +1130,18 @@ CompiledMethod* OptimizingCompiler::Compile(const dex::CodeItem* code_item,
static ScopedArenaVector<uint8_t> CreateJniStackMap(ScopedArenaAllocator* allocator,
const JniCompiledMethod& jni_compiled_method,
- size_t code_size) {
+ size_t code_size,
+ bool debuggable) {
// StackMapStream is quite large, so allocate it using the ScopedArenaAllocator
// to stay clear of the frame size limit.
std::unique_ptr<StackMapStream> stack_map_stream(
new (allocator) StackMapStream(allocator, jni_compiled_method.GetInstructionSet()));
- stack_map_stream->BeginMethod(
- jni_compiled_method.GetFrameSize(),
- jni_compiled_method.GetCoreSpillMask(),
- jni_compiled_method.GetFpSpillMask(),
- /* num_dex_registers= */ 0,
- /* baseline= */ false);
+ stack_map_stream->BeginMethod(jni_compiled_method.GetFrameSize(),
+ jni_compiled_method.GetCoreSpillMask(),
+ jni_compiled_method.GetFpSpillMask(),
+ /* num_dex_registers= */ 0,
+ /* baseline= */ false,
+ debuggable);
stack_map_stream->EndMethod(code_size);
return stack_map_stream->Encode();
}
@@ -1172,12 +1188,11 @@ CompiledMethod* OptimizingCompiler::JniCompile(uint32_t access_flags,
method,
&handles));
if (codegen != nullptr) {
- CompiledMethod* compiled_method = Emit(&allocator,
- &code_allocator,
- codegen.get(),
- /* item= */ nullptr);
- compiled_method->MarkAsIntrinsic();
- return compiled_method;
+ return Emit(&allocator,
+ &code_allocator,
+ codegen.get(),
+ /*is_intrinsic=*/ true,
+ /*item=*/ nullptr);
}
}
}
@@ -1187,19 +1202,22 @@ CompiledMethod* OptimizingCompiler::JniCompile(uint32_t access_flags,
MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kCompiledNativeStub);
ScopedArenaAllocator stack_map_allocator(&arena_stack); // Will hold the stack map.
- ScopedArenaVector<uint8_t> stack_map = CreateJniStackMap(
- &stack_map_allocator, jni_compiled_method, jni_compiled_method.GetCode().size());
- return CompiledMethod::SwapAllocCompiledMethod(
- GetCompiledMethodStorage(),
+ ScopedArenaVector<uint8_t> stack_map =
+ CreateJniStackMap(&stack_map_allocator,
+ jni_compiled_method,
+ jni_compiled_method.GetCode().size(),
+ compiler_options.GetDebuggable() && compiler_options.IsJitCompiler());
+ return GetCompiledCodeStorage()->CreateCompiledMethod(
jni_compiled_method.GetInstructionSet(),
jni_compiled_method.GetCode(),
ArrayRef<const uint8_t>(stack_map),
jni_compiled_method.GetCfi(),
- /* patches= */ ArrayRef<const linker::LinkerPatch>());
+ /*patches=*/ ArrayRef<const linker::LinkerPatch>(),
+ /*is_intrinsic=*/ false);
}
Compiler* CreateOptimizingCompiler(const CompilerOptions& compiler_options,
- CompiledMethodStorage* storage) {
+ CompiledCodeStorage* storage) {
return new OptimizingCompiler(compiler_options, storage);
}
@@ -1233,6 +1251,19 @@ bool OptimizingCompiler::JitCompile(Thread* self,
ArenaAllocator allocator(runtime->GetJitArenaPool());
if (UNLIKELY(method->IsNative())) {
+ // Use GenericJniTrampoline for critical native methods in debuggable runtimes. We don't
+ // support calling method entry / exit hooks for critical native methods yet.
+ // TODO(mythria): Add support for calling method entry / exit hooks in JITed stubs for critical
+ // native methods too.
+ if (compiler_options.GetDebuggable() && method->IsCriticalNative()) {
+ DCHECK(compiler_options.IsJitCompiler());
+ return false;
+ }
+ // Java debuggable runtimes should set compiler options to debuggable, so that we either
+ // generate method entry / exit hooks or skip JITing. For critical native methods we don't
+ // generate method entry / exit hooks so we shouldn't JIT them in debuggable runtimes.
+ DCHECK_IMPLIES(method->IsCriticalNative(), !runtime->IsJavaDebuggable());
+
JniCompiledMethod jni_compiled_method = ArtQuickJniCompileMethod(
compiler_options, access_flags, method_idx, *dex_file, &allocator);
std::vector<Handle<mirror::Object>> roots;
@@ -1241,8 +1272,11 @@ bool OptimizingCompiler::JitCompile(Thread* self,
ArenaStack arena_stack(runtime->GetJitArenaPool());
// StackMapStream is large and it does not fit into this frame, so we need helper method.
ScopedArenaAllocator stack_map_allocator(&arena_stack); // Will hold the stack map.
- ScopedArenaVector<uint8_t> stack_map = CreateJniStackMap(
- &stack_map_allocator, jni_compiled_method, jni_compiled_method.GetCode().size());
+ ScopedArenaVector<uint8_t> stack_map =
+ CreateJniStackMap(&stack_map_allocator,
+ jni_compiled_method,
+ jni_compiled_method.GetCode().size(),
+ compiler_options.GetDebuggable() && compiler_options.IsJitCompiler());
ArrayRef<const uint8_t> reserved_code;
ArrayRef<const uint8_t> reserved_data;
diff --git a/compiler/optimizing/optimizing_compiler.h b/compiler/optimizing/optimizing_compiler.h
index cd6d684590..737ffd034a 100644
--- a/compiler/optimizing/optimizing_compiler.h
+++ b/compiler/optimizing/optimizing_compiler.h
@@ -18,18 +18,19 @@
#define ART_COMPILER_OPTIMIZING_OPTIMIZING_COMPILER_H_
#include "base/globals.h"
+#include "base/macros.h"
#include "base/mutex.h"
-namespace art {
+namespace art HIDDEN {
class ArtMethod;
+class CompiledCodeStorage;
class Compiler;
-class CompiledMethodStorage;
class CompilerOptions;
class DexFile;
Compiler* CreateOptimizingCompiler(const CompilerOptions& compiler_options,
- CompiledMethodStorage* storage);
+ CompiledCodeStorage* storage);
bool EncodeArtMethodInInlineInfo(ArtMethod* method);
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index d458e42608..a1d0a5a845 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -26,8 +26,9 @@
#include "base/atomic.h"
#include "base/globals.h"
+#include "base/macros.h"
-namespace art {
+namespace art HIDDEN {
enum class MethodCompilationStat {
kAttemptBytecodeCompilation = 0,
@@ -46,6 +47,7 @@ enum class MethodCompilationStat {
kUnresolvedFieldNotAFastAccess,
kRemovedCheckedCast,
kRemovedDeadInstruction,
+ kRemovedTry,
kRemovedNullCheck,
kNotCompiledSkipped,
kNotCompiledInvalidBytecode,
@@ -59,6 +61,7 @@ enum class MethodCompilationStat {
kNotCompiledSpaceFilter,
kNotCompiledUnhandledInstruction,
kNotCompiledUnsupportedIsa,
+ kNotCompiledInliningIrreducibleLoop,
kNotCompiledIrreducibleLoopAndStringInit,
kNotCompiledPhiEquivalentInOsr,
kInlinedMonomorphicCall,
@@ -73,11 +76,13 @@ enum class MethodCompilationStat {
kLoopVectorizedIdiom,
kSelectGenerated,
kRemovedInstanceOf,
+ kPropagatedIfValue,
kInlinedInvokeVirtualOrInterface,
kInlinedLastInvokeVirtualOrInterface,
kImplicitNullCheckGenerated,
kExplicitNullCheckGenerated,
kSimplifyIf,
+ kSimplifyIfAddedPhi,
kSimplifyThrowingInvoke,
kInstructionSunk,
kNotInlinedUnresolvedEntrypoint,
@@ -88,16 +93,19 @@ enum class MethodCompilationStat {
kNotInlinedEnvironmentBudget,
kNotInlinedInstructionBudget,
kNotInlinedLoopWithoutExit,
- kNotInlinedIrreducibleLoop,
+ kNotInlinedIrreducibleLoopCallee,
+ kNotInlinedIrreducibleLoopCaller,
kNotInlinedAlwaysThrows,
kNotInlinedInfiniteLoop,
- kNotInlinedTryCatchCaller,
kNotInlinedTryCatchCallee,
+ kNotInlinedTryCatchDisabled,
kNotInlinedRegisterAllocator,
kNotInlinedCannotBuild,
+ kNotInlinedNeverInlineAnnotation,
kNotInlinedNotCompilable,
kNotInlinedNotVerified,
kNotInlinedCodeItem,
+ kNotInlinedEndsWithThrow,
kNotInlinedWont,
kNotInlinedRecursiveBudget,
kNotInlinedPolymorphicRecursiveBudget,
@@ -105,12 +113,15 @@ enum class MethodCompilationStat {
kNotInlinedUnresolved,
kNotInlinedPolymorphic,
kNotInlinedCustom,
+ kNotVarAnalyzedPathological,
kTryInline,
kConstructorFenceGeneratedNew,
kConstructorFenceGeneratedFinal,
kConstructorFenceRemovedLSE,
kConstructorFenceRemovedPFRA,
kConstructorFenceRemovedCFRE,
+ kPossibleWriteBarrier,
+ kRemovedWriteBarrier,
kBitstringTypeCheck,
kJitOutOfMemoryForCommit,
kFullLSEAllocationRemoved,
diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h
index e83688039a..2e05c41f01 100644
--- a/compiler/optimizing/optimizing_unit_test.h
+++ b/compiler/optimizing/optimizing_unit_test.h
@@ -25,6 +25,7 @@
#include <vector>
#include <variant>
+#include "base/macros.h"
#include "base/indenter.h"
#include "base/malloc_arena_pool.h"
#include "base/scoped_arena_allocator.h"
@@ -46,7 +47,7 @@
#include "ssa_builder.h"
#include "ssa_liveness_analysis.h"
-namespace art {
+namespace art HIDDEN {
#define NUM_INSTRUCTIONS(...) \
(sizeof((uint16_t[]) {__VA_ARGS__}) /sizeof(uint16_t))
@@ -240,13 +241,14 @@ class OptimizingUnitTestHelper {
// Create the dex file based on the fake data. Call the constructor so that we can use virtual
// functions. Don't use the arena for the StandardDexFile otherwise the dex location leaks.
- dex_files_.emplace_back(new StandardDexFile(
- dex_data,
- sizeof(StandardDexFile::Header),
- "no_location",
- /*location_checksum*/ 0,
- /*oat_dex_file*/ nullptr,
- /*container*/ nullptr));
+ auto container =
+ std::make_shared<MemoryDexFileContainer>(dex_data, sizeof(StandardDexFile::Header));
+ dex_files_.emplace_back(new StandardDexFile(dex_data,
+ sizeof(StandardDexFile::Header),
+ "no_location",
+ /*location_checksum*/ 0,
+ /*oat_dex_file*/ nullptr,
+ std::move(container)));
graph_ = new (allocator) HGraph(
allocator,
@@ -260,9 +262,10 @@ class OptimizingUnitTestHelper {
// Create a control-flow graph from Dex instructions.
HGraph* CreateCFG(const std::vector<uint16_t>& data,
- DataType::Type return_type = DataType::Type::kInt32,
- VariableSizedHandleScope* handles = nullptr) {
- HGraph* graph = CreateGraph(handles);
+ DataType::Type return_type = DataType::Type::kInt32) {
+ ScopedObjectAccess soa(Thread::Current());
+ VariableSizedHandleScope handles(soa.Self());
+ HGraph* graph = CreateGraph(&handles);
// The code item data might not aligned to 4 bytes, copy it to ensure that.
const size_t code_item_size = data.size() * sizeof(data.front());
@@ -278,7 +281,7 @@ class OptimizingUnitTestHelper {
/* class_linker= */ nullptr,
graph->GetDexFile(),
code_item,
- /* class_def_index= */ DexFile::kDexNoIndex16,
+ /* class_def_idx= */ DexFile::kDexNoIndex16,
/* method_idx= */ dex::kDexNoIndex,
/* access_flags= */ 0u,
/* verified_method= */ nullptr,
@@ -320,25 +323,10 @@ class OptimizingUnitTestHelper {
// Run GraphChecker with all checks.
//
// Return: the status whether the run is successful.
- bool CheckGraph(HGraph* graph, std::ostream& oss = std::cerr) {
- return CheckGraph(graph, /*check_ref_type_info=*/true, oss);
- }
-
bool CheckGraph(std::ostream& oss = std::cerr) {
return CheckGraph(graph_, oss);
}
- // Run GraphChecker with all checks except reference type information checks.
- //
- // Return: the status whether the run is successful.
- bool CheckGraphSkipRefTypeInfoChecks(HGraph* graph, std::ostream& oss = std::cerr) {
- return CheckGraph(graph, /*check_ref_type_info=*/false, oss);
- }
-
- bool CheckGraphSkipRefTypeInfoChecks(std::ostream& oss = std::cerr) {
- return CheckGraphSkipRefTypeInfoChecks(graph_, oss);
- }
-
HEnvironment* ManuallyBuildEnvFor(HInstruction* instruction,
ArenaVector<HInstruction*>* current_locals) {
HEnvironment* environment = new (GetAllocator()) HEnvironment(
@@ -473,7 +461,8 @@ class OptimizingUnitTestHelper {
HInvokeStaticOrDirect::DispatchInfo{},
InvokeType::kStatic,
/* resolved_method_reference= */ method_reference,
- HInvokeStaticOrDirect::ClinitCheckRequirement::kNone);
+ HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
+ !graph_->IsDebuggable());
for (auto [ins, idx] : ZipCount(MakeIterationRange(args))) {
res->SetRawInputAt(idx, ins);
}
@@ -531,9 +520,8 @@ class OptimizingUnitTestHelper {
}
protected:
- bool CheckGraph(HGraph* graph, bool check_ref_type_info, std::ostream& oss) {
+ bool CheckGraph(HGraph* graph, std::ostream& oss) {
GraphChecker checker(graph);
- checker.SetRefTypeInfoCheckEnabled(check_ref_type_info);
checker.Run();
checker.Dump(oss);
return checker.IsValid();
@@ -559,7 +547,7 @@ class OptimizingUnitTestHelper {
class OptimizingUnitTest : public CommonArtTest, public OptimizingUnitTestHelper {};
// Naive string diff data type.
-typedef std::list<std::pair<std::string, std::string>> diff_t;
+using diff_t = std::list<std::pair<std::string, std::string>>;
// An alias for the empty string used to make it clear that a line is
// removed in a diff.
@@ -586,7 +574,7 @@ inline std::ostream& operator<<(std::ostream& oss, const AdjacencyListGraph& alg
return alg.Dump(oss);
}
-class PatternMatchGraphVisitor : public HGraphVisitor {
+class PatternMatchGraphVisitor final : public HGraphVisitor {
private:
struct HandlerWrapper {
public:
diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc
index 2036b4a370..9fc4cc86bf 100644
--- a/compiler/optimizing/parallel_move_resolver.cc
+++ b/compiler/optimizing/parallel_move_resolver.cc
@@ -19,7 +19,7 @@
#include "base/stl_util.h"
#include "nodes.h"
-namespace art {
+namespace art HIDDEN {
void ParallelMoveResolver::BuildInitialMoveList(HParallelMove* parallel_move) {
// Perform a linear sweep of the moves to add them to the initial list of
diff --git a/compiler/optimizing/parallel_move_resolver.h b/compiler/optimizing/parallel_move_resolver.h
index 5fadcab402..17d5122542 100644
--- a/compiler/optimizing/parallel_move_resolver.h
+++ b/compiler/optimizing/parallel_move_resolver.h
@@ -18,11 +18,12 @@
#define ART_COMPILER_OPTIMIZING_PARALLEL_MOVE_RESOLVER_H_
#include "base/arena_containers.h"
+#include "base/macros.h"
#include "base/value_object.h"
#include "data_type.h"
#include "locations.h"
-namespace art {
+namespace art HIDDEN {
class HParallelMove;
class MoveOperands;
diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc
index a8ab6cdd0c..a1c05e9cad 100644
--- a/compiler/optimizing/parallel_move_test.cc
+++ b/compiler/optimizing/parallel_move_test.cc
@@ -15,6 +15,7 @@
*/
#include "base/arena_allocator.h"
+#include "base/macros.h"
#include "base/malloc_arena_pool.h"
#include "nodes.h"
#include "parallel_move_resolver.h"
@@ -22,7 +23,7 @@
#include "gtest/gtest-typed-test.h"
#include "gtest/gtest.h"
-namespace art {
+namespace art HIDDEN {
constexpr int kScratchRegisterStartIndexForTest = 100;
diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc
index 17f37f05c5..d3da3d3ce1 100644
--- a/compiler/optimizing/pc_relative_fixups_x86.cc
+++ b/compiler/optimizing/pc_relative_fixups_x86.cc
@@ -18,13 +18,13 @@
#include "code_generator_x86.h"
#include "intrinsics_x86.h"
-namespace art {
+namespace art HIDDEN {
namespace x86 {
/**
* Finds instructions that need the constant area base as an input.
*/
-class PCRelativeHandlerVisitor : public HGraphVisitor {
+class PCRelativeHandlerVisitor final : public HGraphVisitor {
public:
PCRelativeHandlerVisitor(HGraph* graph, CodeGenerator* codegen)
: HGraphVisitor(graph),
diff --git a/compiler/optimizing/pc_relative_fixups_x86.h b/compiler/optimizing/pc_relative_fixups_x86.h
index 3b470a6502..45578d8050 100644
--- a/compiler/optimizing/pc_relative_fixups_x86.h
+++ b/compiler/optimizing/pc_relative_fixups_x86.h
@@ -17,10 +17,11 @@
#ifndef ART_COMPILER_OPTIMIZING_PC_RELATIVE_FIXUPS_X86_H_
#define ART_COMPILER_OPTIMIZING_PC_RELATIVE_FIXUPS_X86_H_
+#include "base/macros.h"
#include "nodes.h"
#include "optimization.h"
-namespace art {
+namespace art HIDDEN {
class CodeGenerator;
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index c2f3d0e741..398b10abf3 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -22,7 +22,7 @@
#include "optimizing_compiler_stats.h"
#include "well_known_classes.h"
-namespace art {
+namespace art HIDDEN {
void PrepareForRegisterAllocation::Run() {
// Order does not matter.
@@ -83,7 +83,7 @@ void PrepareForRegisterAllocation::VisitBoundsCheck(HBoundsCheck* check) {
if (check->IsStringCharAt()) {
// Add a fake environment for String.charAt() inline info as we want the exception
// to appear as being thrown from there. Skip if we're compiling String.charAt() itself.
- ArtMethod* char_at_method = jni::DecodeArtMethod(WellKnownClasses::java_lang_String_charAt);
+ ArtMethod* char_at_method = WellKnownClasses::java_lang_String_charAt;
if (GetGraph()->GetArtMethod() != char_at_method) {
ArenaAllocator* allocator = GetGraph()->GetAllocator();
HEnvironment* environment = new (allocator) HEnvironment(allocator,
@@ -109,7 +109,7 @@ void PrepareForRegisterAllocation::VisitArraySet(HArraySet* instruction) {
if (value->IsNullConstant()) {
DCHECK_EQ(value->GetType(), DataType::Type::kReference);
if (instruction->NeedsTypeCheck()) {
- instruction->ClearNeedsTypeCheck();
+ instruction->ClearTypeCheck();
}
}
}
@@ -295,15 +295,16 @@ bool PrepareForRegisterAllocation::CanMoveClinitCheck(HInstruction* input,
return false;
}
- // In debug mode, check that we have not inserted a throwing instruction
- // or an instruction with side effects between input and user.
- if (kIsDebugBuild) {
- for (HInstruction* between = input->GetNext(); between != user; between = between->GetNext()) {
- CHECK(between != nullptr); // User must be after input in the same block.
- CHECK(!between->CanThrow()) << *between << " User: " << *user;
- CHECK(!between->HasSideEffects()) << *between << " User: " << *user;
+ // If there's a instruction between them that can throw or it has side effects, we cannot move the
+ // responsibility.
+ for (HInstruction* between = input->GetNext(); between != user; between = between->GetNext()) {
+ DCHECK(between != nullptr) << " User must be after input in the same block. input: " << *input
+ << ", user: " << *user;
+ if (between->CanThrow() || between->HasSideEffects()) {
+ return false;
}
}
+
return true;
}
diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h
index e0bb76eb22..0426f8470b 100644
--- a/compiler/optimizing/prepare_for_register_allocation.h
+++ b/compiler/optimizing/prepare_for_register_allocation.h
@@ -17,9 +17,10 @@
#ifndef ART_COMPILER_OPTIMIZING_PREPARE_FOR_REGISTER_ALLOCATION_H_
#define ART_COMPILER_OPTIMIZING_PREPARE_FOR_REGISTER_ALLOCATION_H_
+#include "base/macros.h"
#include "nodes.h"
-namespace art {
+namespace art HIDDEN {
class CompilerOptions;
class OptimizingCompilerStats;
diff --git a/compiler/optimizing/pretty_printer.h b/compiler/optimizing/pretty_printer.h
index 8ef9ce4e8b..77ddb97707 100644
--- a/compiler/optimizing/pretty_printer.h
+++ b/compiler/optimizing/pretty_printer.h
@@ -19,9 +19,10 @@
#include "android-base/stringprintf.h"
+#include "base/macros.h"
#include "nodes.h"
-namespace art {
+namespace art HIDDEN {
class HPrettyPrinter : public HGraphVisitor {
public:
diff --git a/compiler/optimizing/pretty_printer_test.cc b/compiler/optimizing/pretty_printer_test.cc
index 6ef386b4a5..90d5f8f08f 100644
--- a/compiler/optimizing/pretty_printer_test.cc
+++ b/compiler/optimizing/pretty_printer_test.cc
@@ -17,6 +17,7 @@
#include "pretty_printer.h"
#include "base/arena_allocator.h"
+#include "base/macros.h"
#include "builder.h"
#include "dex/dex_file.h"
#include "dex/dex_instruction.h"
@@ -25,9 +26,9 @@
#include "gtest/gtest.h"
-namespace art {
+namespace art HIDDEN {
-class PrettyPrinterTest : public OptimizingUnitTest {
+class PrettyPrinterTest : public CommonCompilerTest, public OptimizingUnitTestHelper {
protected:
void TestCode(const std::vector<uint16_t>& data, const char* expected);
};
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index e6024b08cb..91bae5f49b 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -29,7 +29,7 @@
#include "mirror/dex_cache.h"
#include "scoped_thread_state_change-inl.h"
-namespace art {
+namespace art HIDDEN {
static inline ObjPtr<mirror::DexCache> FindDexCacheWithHint(
Thread* self, const DexFile& dex_file, Handle<mirror::DexCache> hint_dex_cache)
@@ -41,18 +41,14 @@ static inline ObjPtr<mirror::DexCache> FindDexCacheWithHint(
}
}
-class ReferenceTypePropagation::RTPVisitor : public HGraphDelegateVisitor {
+class ReferenceTypePropagation::RTPVisitor final : public HGraphDelegateVisitor {
public:
- RTPVisitor(HGraph* graph,
- Handle<mirror::ClassLoader> class_loader,
- Handle<mirror::DexCache> hint_dex_cache,
- bool is_first_run)
- : HGraphDelegateVisitor(graph),
- class_loader_(class_loader),
- hint_dex_cache_(hint_dex_cache),
- allocator_(graph->GetArenaStack()),
- worklist_(allocator_.Adapter(kArenaAllocReferenceTypePropagation)),
- is_first_run_(is_first_run) {
+ RTPVisitor(HGraph* graph, Handle<mirror::DexCache> hint_dex_cache, bool is_first_run)
+ : HGraphDelegateVisitor(graph),
+ hint_dex_cache_(hint_dex_cache),
+ allocator_(graph->GetArenaStack()),
+ worklist_(allocator_.Adapter(kArenaAllocReferenceTypePropagation)),
+ is_first_run_(is_first_run) {
worklist_.reserve(kDefaultWorklistSize);
}
@@ -110,7 +106,6 @@ class ReferenceTypePropagation::RTPVisitor : public HGraphDelegateVisitor {
static constexpr size_t kDefaultWorklistSize = 8;
- Handle<mirror::ClassLoader> class_loader_;
Handle<mirror::DexCache> hint_dex_cache_;
// Use local allocator for allocating memory.
@@ -122,63 +117,18 @@ class ReferenceTypePropagation::RTPVisitor : public HGraphDelegateVisitor {
};
ReferenceTypePropagation::ReferenceTypePropagation(HGraph* graph,
- Handle<mirror::ClassLoader> class_loader,
Handle<mirror::DexCache> hint_dex_cache,
bool is_first_run,
const char* name)
- : HOptimization(graph, name),
- class_loader_(class_loader),
- hint_dex_cache_(hint_dex_cache),
- is_first_run_(is_first_run) {
-}
-
-void ReferenceTypePropagation::ValidateTypes() {
- // TODO: move this to the graph checker. Note: There may be no Thread for gtests.
- if (kIsDebugBuild && Thread::Current() != nullptr) {
- ScopedObjectAccess soa(Thread::Current());
- for (HBasicBlock* block : graph_->GetReversePostOrder()) {
- for (HInstructionIterator iti(block->GetInstructions()); !iti.Done(); iti.Advance()) {
- HInstruction* instr = iti.Current();
- if (instr->GetType() == DataType::Type::kReference) {
- DCHECK(instr->GetReferenceTypeInfo().IsValid())
- << "Invalid RTI for instruction: " << instr->DebugName();
- if (instr->IsBoundType()) {
- DCHECK(instr->AsBoundType()->GetUpperBound().IsValid());
- } else if (instr->IsLoadClass()) {
- HLoadClass* cls = instr->AsLoadClass();
- DCHECK(cls->GetReferenceTypeInfo().IsExact());
- DCHECK_IMPLIES(cls->GetLoadedClassRTI().IsValid(), cls->GetLoadedClassRTI().IsExact());
- } else if (instr->IsNullCheck()) {
- DCHECK(instr->GetReferenceTypeInfo().IsEqual(instr->InputAt(0)->GetReferenceTypeInfo()))
- << "NullCheck " << instr->GetReferenceTypeInfo()
- << "Input(0) " << instr->InputAt(0)->GetReferenceTypeInfo();
- }
- } else if (instr->IsInstanceOf()) {
- HInstanceOf* iof = instr->AsInstanceOf();
- DCHECK_IMPLIES(iof->GetTargetClassRTI().IsValid(), iof->GetTargetClassRTI().IsExact());
- } else if (instr->IsCheckCast()) {
- HCheckCast* check = instr->AsCheckCast();
- DCHECK_IMPLIES(check->GetTargetClassRTI().IsValid(),
- check->GetTargetClassRTI().IsExact());
- }
- }
- }
- }
-}
+ : HOptimization(graph, name), hint_dex_cache_(hint_dex_cache), is_first_run_(is_first_run) {}
void ReferenceTypePropagation::Visit(HInstruction* instruction) {
- RTPVisitor visitor(graph_,
- class_loader_,
- hint_dex_cache_,
- is_first_run_);
+ RTPVisitor visitor(graph_, hint_dex_cache_, is_first_run_);
instruction->Accept(&visitor);
}
void ReferenceTypePropagation::Visit(ArrayRef<HInstruction* const> instructions) {
- RTPVisitor visitor(graph_,
- class_loader_,
- hint_dex_cache_,
- is_first_run_);
+ RTPVisitor visitor(graph_, hint_dex_cache_, is_first_run_);
for (HInstruction* instruction : instructions) {
if (instruction->IsPhi()) {
// Need to force phis to recalculate null-ness.
@@ -349,7 +299,10 @@ static void BoundTypeForClassCheck(HInstruction* check) {
}
bool ReferenceTypePropagation::Run() {
- RTPVisitor visitor(graph_, class_loader_, hint_dex_cache_, is_first_run_);
+ DCHECK(Thread::Current() != nullptr)
+ << "ReferenceTypePropagation requires the use of Thread::Current(). Make sure you have a "
+ << "Runtime initialized before calling this optimization pass";
+ RTPVisitor visitor(graph_, hint_dex_cache_, is_first_run_);
// To properly propagate type info we need to visit in the dominator-based order.
// Reverse post order guarantees a node's dominators are visited first.
@@ -359,7 +312,6 @@ bool ReferenceTypePropagation::Run() {
}
visitor.ProcessWorklist();
- ValidateTypes();
return true;
}
@@ -446,10 +398,13 @@ static bool MatchIfInstanceOf(HIf* ifInstruction,
if (rhs->AsIntConstant()->IsTrue()) {
// Case (1a)
*trueBranch = ifInstruction->IfTrueSuccessor();
- } else {
+ } else if (rhs->AsIntConstant()->IsFalse()) {
// Case (2a)
- DCHECK(rhs->AsIntConstant()->IsFalse()) << rhs->AsIntConstant()->GetValue();
*trueBranch = ifInstruction->IfFalseSuccessor();
+ } else {
+ // Sometimes we see a comparison of instance-of with a constant which is neither 0 nor 1.
+ // In those cases, we cannot do the match if+instance-of.
+ return false;
}
*instanceOf = lhs->AsInstanceOf();
return true;
@@ -463,10 +418,13 @@ static bool MatchIfInstanceOf(HIf* ifInstruction,
if (rhs->AsIntConstant()->IsFalse()) {
// Case (1b)
*trueBranch = ifInstruction->IfTrueSuccessor();
- } else {
+ } else if (rhs->AsIntConstant()->IsTrue()) {
// Case (2b)
- DCHECK(rhs->AsIntConstant()->IsTrue()) << rhs->AsIntConstant()->GetValue();
*trueBranch = ifInstruction->IfFalseSuccessor();
+ } else {
+ // Sometimes we see a comparison of instance-of with a constant which is neither 0 nor 1.
+ // In those cases, we cannot do the match if+instance-of.
+ return false;
}
*instanceOf = lhs->AsInstanceOf();
return true;
@@ -583,7 +541,7 @@ void ReferenceTypePropagation::RTPVisitor::UpdateReferenceTypeInfo(HInstruction*
ScopedObjectAccess soa(Thread::Current());
ObjPtr<mirror::DexCache> dex_cache = FindDexCacheWithHint(soa.Self(), dex_file, hint_dex_cache_);
ObjPtr<mirror::Class> klass = Runtime::Current()->GetClassLinker()->LookupResolvedType(
- type_idx, dex_cache, class_loader_.Get());
+ type_idx, dex_cache, dex_cache->GetClassLoader());
SetClassAsTypeInfo(instr, klass, is_exact);
}
diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h
index 889a8465e0..655f62b3da 100644
--- a/compiler/optimizing/reference_type_propagation.h
+++ b/compiler/optimizing/reference_type_propagation.h
@@ -18,12 +18,13 @@
#define ART_COMPILER_OPTIMIZING_REFERENCE_TYPE_PROPAGATION_H_
#include "base/arena_containers.h"
+#include "base/macros.h"
#include "mirror/class-inl.h"
#include "nodes.h"
#include "obj_ptr.h"
#include "optimization.h"
-namespace art {
+namespace art HIDDEN {
/**
* Propagates reference types to instructions.
@@ -31,7 +32,6 @@ namespace art {
class ReferenceTypePropagation : public HOptimization {
public:
ReferenceTypePropagation(HGraph* graph,
- Handle<mirror::ClassLoader> class_loader,
Handle<mirror::DexCache> hint_dex_cache,
bool is_first_run,
const char* name = kReferenceTypePropagationPassName);
@@ -71,10 +71,6 @@ class ReferenceTypePropagation : public HOptimization {
HandleCache* handle_cache)
REQUIRES_SHARED(Locks::mutator_lock_);
- void ValidateTypes();
-
- Handle<mirror::ClassLoader> class_loader_;
-
// Note: hint_dex_cache_ is usually, but not necessarily, the dex cache associated with
// graph_->GetDexFile(). Since we may look up also in other dex files, it's used only
// as a hint, to reduce the number of calls to the costly ClassLinker::FindDexCache().
diff --git a/compiler/optimizing/reference_type_propagation_test.cc b/compiler/optimizing/reference_type_propagation_test.cc
index d1bcab083c..2b012fcd67 100644
--- a/compiler/optimizing/reference_type_propagation_test.cc
+++ b/compiler/optimizing/reference_type_propagation_test.cc
@@ -19,6 +19,7 @@
#include <random>
#include "base/arena_allocator.h"
+#include "base/macros.h"
#include "base/transform_array_ref.h"
#include "base/transform_iterator.h"
#include "builder.h"
@@ -26,7 +27,7 @@
#include "object_lock.h"
#include "optimizing_unit_test.h"
-namespace art {
+namespace art HIDDEN {
// TODO It would be good to use the following but there is a miniscule amount of
// chance for flakiness so we'll just use a set seed instead.
@@ -47,11 +48,8 @@ class ReferenceTypePropagationTestBase : public SuperTest, public OptimizingUnit
void SetupPropagation(VariableSizedHandleScope* handles) {
graph_ = CreateGraph(handles);
- propagation_ = new (GetAllocator()) ReferenceTypePropagation(graph_,
- Handle<mirror::ClassLoader>(),
- Handle<mirror::DexCache>(),
- true,
- "test_prop");
+ propagation_ = new (GetAllocator())
+ ReferenceTypePropagation(graph_, Handle<mirror::DexCache>(), true, "test_prop");
}
// Relay method to merge type in reference type propagation.
diff --git a/compiler/optimizing/register_allocation_resolver.cc b/compiler/optimizing/register_allocation_resolver.cc
index 875c633889..53e11f2c3d 100644
--- a/compiler/optimizing/register_allocation_resolver.cc
+++ b/compiler/optimizing/register_allocation_resolver.cc
@@ -21,7 +21,7 @@
#include "linear_order.h"
#include "ssa_liveness_analysis.h"
-namespace art {
+namespace art HIDDEN {
RegisterAllocationResolver::RegisterAllocationResolver(CodeGenerator* codegen,
const SsaLivenessAnalysis& liveness)
diff --git a/compiler/optimizing/register_allocation_resolver.h b/compiler/optimizing/register_allocation_resolver.h
index 278371777d..f4782eb48e 100644
--- a/compiler/optimizing/register_allocation_resolver.h
+++ b/compiler/optimizing/register_allocation_resolver.h
@@ -18,10 +18,11 @@
#define ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATION_RESOLVER_H_
#include "base/array_ref.h"
+#include "base/macros.h"
#include "base/value_object.h"
#include "data_type.h"
-namespace art {
+namespace art HIDDEN {
class ArenaAllocator;
class CodeGenerator;
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index a9c217fc4f..e4c2d74908 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -27,7 +27,7 @@
#include "register_allocator_linear_scan.h"
#include "ssa_liveness_analysis.h"
-namespace art {
+namespace art HIDDEN {
RegisterAllocator::RegisterAllocator(ScopedArenaAllocator* allocator,
CodeGenerator* codegen,
diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h
index 4d226875bf..453e339cba 100644
--- a/compiler/optimizing/register_allocator.h
+++ b/compiler/optimizing/register_allocator.h
@@ -22,7 +22,7 @@
#include "base/arena_object.h"
#include "base/macros.h"
-namespace art {
+namespace art HIDDEN {
class CodeGenerator;
class HBasicBlock;
diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc
index 684aaf5750..a7c891d4e7 100644
--- a/compiler/optimizing/register_allocator_graph_color.cc
+++ b/compiler/optimizing/register_allocator_graph_color.cc
@@ -22,7 +22,7 @@
#include "ssa_liveness_analysis.h"
#include "thread-current-inl.h"
-namespace art {
+namespace art HIDDEN {
// Highest number of registers that we support for any platform. This can be used for std::bitset,
// for example, which needs to know its size at compile time.
diff --git a/compiler/optimizing/register_allocator_graph_color.h b/compiler/optimizing/register_allocator_graph_color.h
index e5b86eacee..0e10152049 100644
--- a/compiler/optimizing/register_allocator_graph_color.h
+++ b/compiler/optimizing/register_allocator_graph_color.h
@@ -24,7 +24,7 @@
#include "base/scoped_arena_containers.h"
#include "register_allocator.h"
-namespace art {
+namespace art HIDDEN {
class CodeGenerator;
class HBasicBlock;
diff --git a/compiler/optimizing/register_allocator_linear_scan.cc b/compiler/optimizing/register_allocator_linear_scan.cc
index 833c24d5bb..fcdaa2d34f 100644
--- a/compiler/optimizing/register_allocator_linear_scan.cc
+++ b/compiler/optimizing/register_allocator_linear_scan.cc
@@ -26,7 +26,7 @@
#include "register_allocation_resolver.h"
#include "ssa_liveness_analysis.h"
-namespace art {
+namespace art HIDDEN {
static constexpr size_t kMaxLifetimePosition = -1;
static constexpr size_t kDefaultNumberOfSpillSlots = 4;
diff --git a/compiler/optimizing/register_allocator_linear_scan.h b/compiler/optimizing/register_allocator_linear_scan.h
index 9a1e0d7f10..c71a9e9ff1 100644
--- a/compiler/optimizing/register_allocator_linear_scan.h
+++ b/compiler/optimizing/register_allocator_linear_scan.h
@@ -22,7 +22,7 @@
#include "base/scoped_arena_containers.h"
#include "register_allocator.h"
-namespace art {
+namespace art HIDDEN {
class CodeGenerator;
class HBasicBlock;
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index 682315545d..d316aa5dc2 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -18,6 +18,7 @@
#include "arch/x86/instruction_set_features_x86.h"
#include "base/arena_allocator.h"
+#include "base/macros.h"
#include "builder.h"
#include "code_generator.h"
#include "code_generator_x86.h"
@@ -31,17 +32,17 @@
#include "ssa_liveness_analysis.h"
#include "ssa_phi_elimination.h"
-namespace art {
+namespace art HIDDEN {
using Strategy = RegisterAllocator::Strategy;
// Note: the register allocator tests rely on the fact that constants have live
// intervals and registers get allocated to them.
-class RegisterAllocatorTest : public OptimizingUnitTest {
+class RegisterAllocatorTest : public CommonCompilerTest, public OptimizingUnitTestHelper {
protected:
void SetUp() override {
- OptimizingUnitTest::SetUp();
+ CommonCompilerTest::SetUp();
// This test is using the x86 ISA.
compiler_options_ = CommonCompilerTest::CreateCompilerOptions(InstructionSet::kX86, "default");
}
diff --git a/compiler/optimizing/scheduler.cc b/compiler/optimizing/scheduler.cc
index 8f18ccff5f..116f52605e 100644
--- a/compiler/optimizing/scheduler.cc
+++ b/compiler/optimizing/scheduler.cc
@@ -32,7 +32,7 @@
#include "scheduler_arm.h"
#endif
-namespace art {
+namespace art HIDDEN {
void SchedulingGraph::AddDependency(SchedulingNode* node,
SchedulingNode* dependency,
@@ -718,9 +718,10 @@ bool HScheduler::IsSchedulable(const HInstruction* instruction) const {
// HLoadException
// HMemoryBarrier
// HMonitorOperation
- // HNativeDebugInfo
+ // HNop
// HThrow
// HTryBoundary
+ // All volatile field access e.g. HInstanceFieldGet
// TODO: Some of the instructions above may be safe to schedule (maybe as
// scheduling barriers).
return instruction->IsArrayGet() ||
diff --git a/compiler/optimizing/scheduler.h b/compiler/optimizing/scheduler.h
index f7180a02d7..299fbc93f3 100644
--- a/compiler/optimizing/scheduler.h
+++ b/compiler/optimizing/scheduler.h
@@ -19,6 +19,7 @@
#include <fstream>
+#include "base/macros.h"
#include "base/scoped_arena_allocator.h"
#include "base/scoped_arena_containers.h"
#include "base/stl_util.h"
@@ -28,7 +29,7 @@
#include "nodes.h"
#include "optimization.h"
-namespace art {
+namespace art HIDDEN {
// General description of instruction scheduling.
//
diff --git a/compiler/optimizing/scheduler_arm.cc b/compiler/optimizing/scheduler_arm.cc
index 965e1bd9f4..3f931c4c49 100644
--- a/compiler/optimizing/scheduler_arm.cc
+++ b/compiler/optimizing/scheduler_arm.cc
@@ -23,7 +23,7 @@
#include "mirror/array-inl.h"
#include "mirror/string.h"
-namespace art {
+namespace art HIDDEN {
namespace arm {
using helpers::Int32ConstantFrom;
@@ -669,7 +669,7 @@ void SchedulingLatencyVisitorARM::VisitArrayGet(HArrayGet* instruction) {
}
case DataType::Type::kReference: {
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
last_visited_latency_ = kArmLoadWithBakerReadBarrierLatency;
} else {
if (index->IsConstant()) {
@@ -937,7 +937,7 @@ void SchedulingLatencyVisitorARM::HandleFieldGetLatencies(HInstruction* instruct
break;
case DataType::Type::kReference:
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (gUseReadBarrier && kUseBakerReadBarrier) {
last_visited_internal_latency_ = kArmMemoryLoadLatency + kArmIntegerOpLatency;
last_visited_latency_ = kArmMemoryLoadLatency;
} else {
diff --git a/compiler/optimizing/scheduler_arm.h b/compiler/optimizing/scheduler_arm.h
index d11222d9f4..0da21c187f 100644
--- a/compiler/optimizing/scheduler_arm.h
+++ b/compiler/optimizing/scheduler_arm.h
@@ -17,14 +17,12 @@
#ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_
#define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_
+#include "base/macros.h"
#include "code_generator_arm_vixl.h"
#include "scheduler.h"
-namespace art {
+namespace art HIDDEN {
namespace arm {
-// TODO: Replace CodeGeneratorARMType with CodeGeneratorARMVIXL everywhere?
-typedef CodeGeneratorARMVIXL CodeGeneratorARMType;
-
// AArch32 instruction latencies.
// We currently assume that all ARM CPUs share the same instruction latency list.
// The following latencies were tuned based on performance experiments and
@@ -49,10 +47,10 @@ static constexpr uint32_t kArmNopLatency = 2;
static constexpr uint32_t kArmLoadWithBakerReadBarrierLatency = 18;
static constexpr uint32_t kArmRuntimeTypeCheckLatency = 46;
-class SchedulingLatencyVisitorARM : public SchedulingLatencyVisitor {
+class SchedulingLatencyVisitorARM final : public SchedulingLatencyVisitor {
public:
explicit SchedulingLatencyVisitorARM(CodeGenerator* codegen)
- : codegen_(down_cast<CodeGeneratorARMType*>(codegen)) {}
+ : codegen_(down_cast<CodeGeneratorARMVIXL*>(codegen)) {}
// Default visitor for instructions not handled specifically below.
void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) override {
@@ -133,7 +131,7 @@ class SchedulingLatencyVisitorARM : public SchedulingLatencyVisitor {
// The latency setting for each HInstruction depends on how CodeGenerator may generate code,
// latency visitors may query CodeGenerator for such information for accurate latency settings.
- CodeGeneratorARMType* codegen_;
+ CodeGeneratorARMVIXL* codegen_;
};
class HSchedulerARM : public HScheduler {
diff --git a/compiler/optimizing/scheduler_arm64.cc b/compiler/optimizing/scheduler_arm64.cc
index 4f504c2100..3071afd951 100644
--- a/compiler/optimizing/scheduler_arm64.cc
+++ b/compiler/optimizing/scheduler_arm64.cc
@@ -20,7 +20,7 @@
#include "mirror/array-inl.h"
#include "mirror/string.h"
-namespace art {
+namespace art HIDDEN {
namespace arm64 {
void SchedulingLatencyVisitorARM64::VisitBinaryOperation(HBinaryOperation* instr) {
diff --git a/compiler/optimizing/scheduler_arm64.h b/compiler/optimizing/scheduler_arm64.h
index ba5a743545..ec41577e9d 100644
--- a/compiler/optimizing/scheduler_arm64.h
+++ b/compiler/optimizing/scheduler_arm64.h
@@ -17,9 +17,10 @@
#ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_ARM64_H_
#define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM64_H_
+#include "base/macros.h"
#include "scheduler.h"
-namespace art {
+namespace art HIDDEN {
namespace arm64 {
static constexpr uint32_t kArm64MemoryLoadLatency = 5;
@@ -55,7 +56,7 @@ static constexpr uint32_t kArm64SIMDDivDoubleLatency = 60;
static constexpr uint32_t kArm64SIMDDivFloatLatency = 30;
static constexpr uint32_t kArm64SIMDTypeConversionInt2FPLatency = 10;
-class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor {
+class SchedulingLatencyVisitorARM64 final : public SchedulingLatencyVisitor {
public:
// Default visitor for instructions not handled specifically below.
void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) override {
diff --git a/compiler/optimizing/scheduler_test.cc b/compiler/optimizing/scheduler_test.cc
index a1cc202a89..165bfe3d94 100644
--- a/compiler/optimizing/scheduler_test.cc
+++ b/compiler/optimizing/scheduler_test.cc
@@ -17,6 +17,7 @@
#include "scheduler.h"
#include "base/arena_allocator.h"
+#include "base/macros.h"
#include "builder.h"
#include "codegen_test_utils.h"
#include "common_compiler_test.h"
@@ -34,7 +35,7 @@
#include "scheduler_arm.h"
#endif
-namespace art {
+namespace art HIDDEN {
// Return all combinations of ISA and code generator that are executable on
// hardware, or on simulator, and that we'd like to test.
@@ -65,7 +66,7 @@ static ::std::vector<CodegenTargetConfig> GetTargetConfigs() {
return v;
}
-class SchedulerTest : public OptimizingUnitTest {
+class SchedulerTest : public CommonCompilerTest, public OptimizingUnitTestHelper {
public:
SchedulerTest() : graph_(CreateGraph()) { }
diff --git a/compiler/optimizing/select_generator.cc b/compiler/optimizing/select_generator.cc
index 54053820ca..6a10440d11 100644
--- a/compiler/optimizing/select_generator.cc
+++ b/compiler/optimizing/select_generator.cc
@@ -16,10 +16,10 @@
#include "select_generator.h"
-#include "base/scoped_arena_containers.h"
+#include "optimizing/nodes.h"
#include "reference_type_propagation.h"
-namespace art {
+namespace art HIDDEN {
static constexpr size_t kMaxInstructionsInBranch = 1u;
@@ -69,156 +69,277 @@ static bool BlocksMergeTogether(HBasicBlock* block1, HBasicBlock* block2) {
return block1->GetSingleSuccessor() == block2->GetSingleSuccessor();
}
-// Returns nullptr if `block` has either no phis or there is more than one phi
-// with different inputs at `index1` and `index2`. Otherwise returns that phi.
-static HPhi* GetSingleChangedPhi(HBasicBlock* block, size_t index1, size_t index2) {
+// Returns nullptr if `block` has either no phis or there is more than one phi. Otherwise returns
+// that phi.
+static HPhi* GetSinglePhi(HBasicBlock* block, size_t index1, size_t index2) {
DCHECK_NE(index1, index2);
HPhi* select_phi = nullptr;
for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
HPhi* phi = it.Current()->AsPhi();
- if (phi->InputAt(index1) != phi->InputAt(index2)) {
- if (select_phi == nullptr) {
- // First phi with different inputs for the two indices found.
- select_phi = phi;
- } else {
- // More than one phis has different inputs for the two indices.
- return nullptr;
- }
+ if (select_phi == nullptr) {
+ // First phi found.
+ select_phi = phi;
+ } else {
+ // More than one phi found, return null.
+ return nullptr;
}
}
return select_phi;
}
-bool HSelectGenerator::Run() {
- bool didSelect = false;
- // Select cache with local allocator.
- ScopedArenaAllocator allocator(graph_->GetArenaStack());
- ScopedArenaSafeMap<HInstruction*, HSelect*> cache(
- std::less<HInstruction*>(), allocator.Adapter(kArenaAllocSelectGenerator));
+bool HSelectGenerator::TryGenerateSelectSimpleDiamondPattern(
+ HBasicBlock* block, ScopedArenaSafeMap<HInstruction*, HSelect*>* cache) {
+ DCHECK(block->GetLastInstruction()->IsIf());
+ HIf* if_instruction = block->GetLastInstruction()->AsIf();
+ HBasicBlock* true_block = if_instruction->IfTrueSuccessor();
+ HBasicBlock* false_block = if_instruction->IfFalseSuccessor();
+ DCHECK_NE(true_block, false_block);
- // Iterate in post order in the unlikely case that removing one occurrence of
- // the selection pattern empties a branch block of another occurrence.
- for (HBasicBlock* block : graph_->GetPostOrder()) {
- if (!block->EndsWithIf()) continue;
+ if (!IsSimpleBlock(true_block) ||
+ !IsSimpleBlock(false_block) ||
+ !BlocksMergeTogether(true_block, false_block)) {
+ return false;
+ }
+ HBasicBlock* merge_block = true_block->GetSingleSuccessor();
- // Find elements of the diamond pattern.
- HIf* if_instruction = block->GetLastInstruction()->AsIf();
- HBasicBlock* true_block = if_instruction->IfTrueSuccessor();
- HBasicBlock* false_block = if_instruction->IfFalseSuccessor();
- DCHECK_NE(true_block, false_block);
+ // If the branches are not empty, move instructions in front of the If.
+ // TODO(dbrazdil): This puts an instruction between If and its condition.
+ // Implement moving of conditions to first users if possible.
+ while (!true_block->IsSingleGoto() && !true_block->IsSingleReturn()) {
+ HInstruction* instr = true_block->GetFirstInstruction();
+ DCHECK(!instr->CanThrow());
+ instr->MoveBefore(if_instruction);
+ }
+ while (!false_block->IsSingleGoto() && !false_block->IsSingleReturn()) {
+ HInstruction* instr = false_block->GetFirstInstruction();
+ DCHECK(!instr->CanThrow());
+ instr->MoveBefore(if_instruction);
+ }
+ DCHECK(true_block->IsSingleGoto() || true_block->IsSingleReturn());
+ DCHECK(false_block->IsSingleGoto() || false_block->IsSingleReturn());
- if (!IsSimpleBlock(true_block) ||
- !IsSimpleBlock(false_block) ||
- !BlocksMergeTogether(true_block, false_block)) {
- continue;
- }
- HBasicBlock* merge_block = true_block->GetSingleSuccessor();
-
- // If the branches are not empty, move instructions in front of the If.
- // TODO(dbrazdil): This puts an instruction between If and its condition.
- // Implement moving of conditions to first users if possible.
- while (!true_block->IsSingleGoto() && !true_block->IsSingleReturn()) {
- HInstruction* instr = true_block->GetFirstInstruction();
- DCHECK(!instr->CanThrow());
- instr->MoveBefore(if_instruction);
- }
- while (!false_block->IsSingleGoto() && !false_block->IsSingleReturn()) {
- HInstruction* instr = false_block->GetFirstInstruction();
- DCHECK(!instr->CanThrow());
- instr->MoveBefore(if_instruction);
- }
- DCHECK(true_block->IsSingleGoto() || true_block->IsSingleReturn());
- DCHECK(false_block->IsSingleGoto() || false_block->IsSingleReturn());
-
- // Find the resulting true/false values.
- size_t predecessor_index_true = merge_block->GetPredecessorIndexOf(true_block);
- size_t predecessor_index_false = merge_block->GetPredecessorIndexOf(false_block);
- DCHECK_NE(predecessor_index_true, predecessor_index_false);
-
- bool both_successors_return = true_block->IsSingleReturn() && false_block->IsSingleReturn();
- HPhi* phi = GetSingleChangedPhi(merge_block, predecessor_index_true, predecessor_index_false);
-
- HInstruction* true_value = nullptr;
- HInstruction* false_value = nullptr;
- if (both_successors_return) {
- true_value = true_block->GetFirstInstruction()->InputAt(0);
- false_value = false_block->GetFirstInstruction()->InputAt(0);
- } else if (phi != nullptr) {
- true_value = phi->InputAt(predecessor_index_true);
- false_value = phi->InputAt(predecessor_index_false);
- } else {
- continue;
- }
- DCHECK(both_successors_return || phi != nullptr);
-
- // Create the Select instruction and insert it in front of the If.
- HInstruction* condition = if_instruction->InputAt(0);
- HSelect* select = new (graph_->GetAllocator()) HSelect(condition,
- true_value,
- false_value,
- if_instruction->GetDexPc());
- if (both_successors_return) {
- if (true_value->GetType() == DataType::Type::kReference) {
- DCHECK(false_value->GetType() == DataType::Type::kReference);
- ReferenceTypePropagation::FixUpInstructionType(select, graph_->GetHandleCache());
- }
- } else if (phi->GetType() == DataType::Type::kReference) {
- select->SetReferenceTypeInfo(phi->GetReferenceTypeInfo());
- }
- block->InsertInstructionBefore(select, if_instruction);
+ // Find the resulting true/false values.
+ size_t predecessor_index_true = merge_block->GetPredecessorIndexOf(true_block);
+ size_t predecessor_index_false = merge_block->GetPredecessorIndexOf(false_block);
+ DCHECK_NE(predecessor_index_true, predecessor_index_false);
- // Remove the true branch which removes the corresponding Phi
- // input if needed. If left only with the false branch, the Phi is
- // automatically removed.
- if (both_successors_return) {
- false_block->GetFirstInstruction()->ReplaceInput(select, 0);
- } else {
- phi->ReplaceInput(select, predecessor_index_false);
+ bool both_successors_return = true_block->IsSingleReturn() && false_block->IsSingleReturn();
+ // TODO(solanes): Extend to support multiple phis? e.g.
+ // int a, b;
+ // if (bool) {
+ // a = 0; b = 1;
+ // } else {
+ // a = 1; b = 2;
+ // }
+ // // use a and b
+ HPhi* phi = GetSinglePhi(merge_block, predecessor_index_true, predecessor_index_false);
+
+ HInstruction* true_value = nullptr;
+ HInstruction* false_value = nullptr;
+ if (both_successors_return) {
+ true_value = true_block->GetFirstInstruction()->InputAt(0);
+ false_value = false_block->GetFirstInstruction()->InputAt(0);
+ } else if (phi != nullptr) {
+ true_value = phi->InputAt(predecessor_index_true);
+ false_value = phi->InputAt(predecessor_index_false);
+ } else {
+ return false;
+ }
+ DCHECK(both_successors_return || phi != nullptr);
+
+ // Create the Select instruction and insert it in front of the If.
+ HInstruction* condition = if_instruction->InputAt(0);
+ HSelect* select = new (graph_->GetAllocator()) HSelect(condition,
+ true_value,
+ false_value,
+ if_instruction->GetDexPc());
+ if (both_successors_return) {
+ if (true_value->GetType() == DataType::Type::kReference) {
+ DCHECK(false_value->GetType() == DataType::Type::kReference);
+ ReferenceTypePropagation::FixUpInstructionType(select, graph_->GetHandleCache());
}
+ } else if (phi->GetType() == DataType::Type::kReference) {
+ select->SetReferenceTypeInfoIfValid(phi->GetReferenceTypeInfo());
+ }
+ block->InsertInstructionBefore(select, if_instruction);
- bool only_two_predecessors = (merge_block->GetPredecessors().size() == 2u);
- true_block->DisconnectAndDelete();
+ // Remove the true branch which removes the corresponding Phi
+ // input if needed. If left only with the false branch, the Phi is
+ // automatically removed.
+ if (both_successors_return) {
+ false_block->GetFirstInstruction()->ReplaceInput(select, 0);
+ } else {
+ phi->ReplaceInput(select, predecessor_index_false);
+ }
+
+ bool only_two_predecessors = (merge_block->GetPredecessors().size() == 2u);
+ true_block->DisconnectAndDelete();
+
+ // Merge remaining blocks which are now connected with Goto.
+ DCHECK_EQ(block->GetSingleSuccessor(), false_block);
+ block->MergeWith(false_block);
+ if (!both_successors_return && only_two_predecessors) {
+ DCHECK_EQ(only_two_predecessors, phi->GetBlock() == nullptr);
+ DCHECK_EQ(block->GetSingleSuccessor(), merge_block);
+ block->MergeWith(merge_block);
+ }
- // Merge remaining blocks which are now connected with Goto.
- DCHECK_EQ(block->GetSingleSuccessor(), false_block);
- block->MergeWith(false_block);
- if (!both_successors_return && only_two_predecessors) {
- DCHECK_EQ(only_two_predecessors, phi->GetBlock() == nullptr);
- DCHECK_EQ(block->GetSingleSuccessor(), merge_block);
- block->MergeWith(merge_block);
+ MaybeRecordStat(stats_, MethodCompilationStat::kSelectGenerated);
+
+ // Very simple way of finding common subexpressions in the generated HSelect statements
+ // (since this runs after GVN). Lookup by condition, and reuse latest one if possible
+ // (due to post order, latest select is most likely replacement). If needed, we could
+ // improve this by e.g. using the operands in the map as well.
+ auto it = cache->find(condition);
+ if (it == cache->end()) {
+ cache->Put(condition, select);
+ } else {
+ // Found cached value. See if latest can replace cached in the HIR.
+ HSelect* cached_select = it->second;
+ DCHECK_EQ(cached_select->GetCondition(), select->GetCondition());
+ if (cached_select->GetTrueValue() == select->GetTrueValue() &&
+ cached_select->GetFalseValue() == select->GetFalseValue() &&
+ select->StrictlyDominates(cached_select)) {
+ cached_select->ReplaceWith(select);
+ cached_select->GetBlock()->RemoveInstruction(cached_select);
}
+ it->second = select; // always cache latest
+ }
+
+ // No need to update dominance information, as we are simplifying
+ // a simple diamond shape, where the join block is merged with the
+ // entry block. Any following blocks would have had the join block
+ // as a dominator, and `MergeWith` handles changing that to the
+ // entry block
+ return true;
+}
- MaybeRecordStat(stats_, MethodCompilationStat::kSelectGenerated);
+HBasicBlock* HSelectGenerator::TryFixupDoubleDiamondPattern(HBasicBlock* block) {
+ DCHECK(block->GetLastInstruction()->IsIf());
+ HIf* if_instruction = block->GetLastInstruction()->AsIf();
+ HBasicBlock* true_block = if_instruction->IfTrueSuccessor();
+ HBasicBlock* false_block = if_instruction->IfFalseSuccessor();
+ DCHECK_NE(true_block, false_block);
- // Very simple way of finding common subexpressions in the generated HSelect statements
- // (since this runs after GVN). Lookup by condition, and reuse latest one if possible
- // (due to post order, latest select is most likely replacement). If needed, we could
- // improve this by e.g. using the operands in the map as well.
- auto it = cache.find(condition);
- if (it == cache.end()) {
- cache.Put(condition, select);
+ // One branch must be a single goto, and the other one the inner if.
+ if (true_block->IsSingleGoto() == false_block->IsSingleGoto()) {
+ return nullptr;
+ }
+
+ HBasicBlock* single_goto = true_block->IsSingleGoto() ? true_block : false_block;
+ HBasicBlock* inner_if_block = true_block->IsSingleGoto() ? false_block : true_block;
+
+ // The innner if branch has to be a block with just a comparison and an if.
+ if (!inner_if_block->EndsWithIf() ||
+ inner_if_block->GetLastInstruction()->AsIf()->InputAt(0) !=
+ inner_if_block->GetFirstInstruction() ||
+ inner_if_block->GetLastInstruction()->GetPrevious() !=
+ inner_if_block->GetFirstInstruction() ||
+ !inner_if_block->GetFirstInstruction()->IsCondition()) {
+ return nullptr;
+ }
+
+ HIf* inner_if_instruction = inner_if_block->GetLastInstruction()->AsIf();
+ HBasicBlock* inner_if_true_block = inner_if_instruction->IfTrueSuccessor();
+ HBasicBlock* inner_if_false_block = inner_if_instruction->IfFalseSuccessor();
+ if (!inner_if_true_block->IsSingleGoto() || !inner_if_false_block->IsSingleGoto()) {
+ return nullptr;
+ }
+
+ // One must merge into the outer condition and the other must not.
+ if (BlocksMergeTogether(single_goto, inner_if_true_block) ==
+ BlocksMergeTogether(single_goto, inner_if_false_block)) {
+ return nullptr;
+ }
+
+ // First merge merges the outer if with one of the inner if branches. The block must be a Phi and
+ // a Goto.
+ HBasicBlock* first_merge = single_goto->GetSingleSuccessor();
+ if (first_merge->GetNumberOfPredecessors() != 2 ||
+ first_merge->GetPhis().CountSize() != 1 ||
+ !first_merge->GetLastInstruction()->IsGoto() ||
+ first_merge->GetFirstInstruction() != first_merge->GetLastInstruction()) {
+ return nullptr;
+ }
+
+ HPhi* first_phi = first_merge->GetFirstPhi()->AsPhi();
+
+ // Second merge is first_merge and the remainder branch merging. It must be phi + goto, or phi +
+ // return. Depending on the first merge, we define the second merge.
+ HBasicBlock* merges_into_second_merge =
+ BlocksMergeTogether(single_goto, inner_if_true_block)
+ ? inner_if_false_block
+ : inner_if_true_block;
+ if (!BlocksMergeTogether(first_merge, merges_into_second_merge)) {
+ return nullptr;
+ }
+
+ HBasicBlock* second_merge = merges_into_second_merge->GetSingleSuccessor();
+ if (second_merge->GetNumberOfPredecessors() != 2 ||
+ second_merge->GetPhis().CountSize() != 1 ||
+ !(second_merge->GetLastInstruction()->IsGoto() ||
+ second_merge->GetLastInstruction()->IsReturn()) ||
+ second_merge->GetFirstInstruction() != second_merge->GetLastInstruction()) {
+ return nullptr;
+ }
+
+ size_t index = second_merge->GetPredecessorIndexOf(merges_into_second_merge);
+ HPhi* second_phi = second_merge->GetFirstPhi()->AsPhi();
+
+ // Merge the phis.
+ first_phi->AddInput(second_phi->InputAt(index));
+ merges_into_second_merge->ReplaceSuccessor(second_merge, first_merge);
+ second_phi->ReplaceWith(first_phi);
+ second_merge->RemovePhi(second_phi);
+
+ // Sort out the new domination before merging the blocks
+ DCHECK_EQ(second_merge->GetSinglePredecessor(), first_merge);
+ second_merge->GetDominator()->RemoveDominatedBlock(second_merge);
+ second_merge->SetDominator(first_merge);
+ first_merge->AddDominatedBlock(second_merge);
+ first_merge->MergeWith(second_merge);
+
+ // No need to update dominance information. There's a chance that `merges_into_second_merge`
+ // doesn't come before `first_merge` but we don't need to fix it since `merges_into_second_merge`
+ // will disappear from the graph altogether when doing the follow-up
+ // TryGenerateSelectSimpleDiamondPattern.
+
+ return inner_if_block;
+}
+
+bool HSelectGenerator::Run() {
+ bool did_select = false;
+ // Select cache with local allocator.
+ ScopedArenaAllocator allocator(graph_->GetArenaStack());
+ ScopedArenaSafeMap<HInstruction*, HSelect*> cache(std::less<HInstruction*>(),
+ allocator.Adapter(kArenaAllocSelectGenerator));
+
+ // Iterate in post order in the unlikely case that removing one occurrence of
+ // the selection pattern empties a branch block of another occurrence.
+ for (HBasicBlock* block : graph_->GetPostOrder()) {
+ if (!block->EndsWithIf()) {
+ continue;
+ }
+
+ if (TryGenerateSelectSimpleDiamondPattern(block, &cache)) {
+ did_select = true;
} else {
- // Found cached value. See if latest can replace cached in the HIR.
- HSelect* cached = it->second;
- DCHECK_EQ(cached->GetCondition(), select->GetCondition());
- if (cached->GetTrueValue() == select->GetTrueValue() &&
- cached->GetFalseValue() == select->GetFalseValue() &&
- select->StrictlyDominates(cached)) {
- cached->ReplaceWith(select);
- cached->GetBlock()->RemoveInstruction(cached);
+ // Try to fix up the odd version of the double diamond pattern. If we could do it, it means
+ // that we can generate two selects.
+ HBasicBlock* inner_if_block = TryFixupDoubleDiamondPattern(block);
+ if (inner_if_block != nullptr) {
+ // Generate the selects now since `inner_if_block` should be after `block` in PostOrder.
+ bool result = TryGenerateSelectSimpleDiamondPattern(inner_if_block, &cache);
+ DCHECK(result);
+ result = TryGenerateSelectSimpleDiamondPattern(block, &cache);
+ DCHECK(result);
+ did_select = true;
}
- it->second = select; // always cache latest
}
-
- // No need to update dominance information, as we are simplifying
- // a simple diamond shape, where the join block is merged with the
- // entry block. Any following blocks would have had the join block
- // as a dominator, and `MergeWith` handles changing that to the
- // entry block.
- didSelect = true;
}
- return didSelect;
+
+ return did_select;
}
} // namespace art
diff --git a/compiler/optimizing/select_generator.h b/compiler/optimizing/select_generator.h
index 30ac8a86eb..7aa0803d89 100644
--- a/compiler/optimizing/select_generator.h
+++ b/compiler/optimizing/select_generator.h
@@ -57,9 +57,12 @@
#ifndef ART_COMPILER_OPTIMIZING_SELECT_GENERATOR_H_
#define ART_COMPILER_OPTIMIZING_SELECT_GENERATOR_H_
+#include "base/macros.h"
+#include "base/scoped_arena_containers.h"
#include "optimization.h"
+#include "optimizing/nodes.h"
-namespace art {
+namespace art HIDDEN {
class HSelectGenerator : public HOptimization {
public:
@@ -72,6 +75,43 @@ class HSelectGenerator : public HOptimization {
static constexpr const char* kSelectGeneratorPassName = "select_generator";
private:
+ bool TryGenerateSelectSimpleDiamondPattern(HBasicBlock* block,
+ ScopedArenaSafeMap<HInstruction*, HSelect*>* cache);
+
+ // When generating code for nested ternary operators (e.g. `return (x > 100) ? 100 : ((x < -100) ?
+ // -100 : x);`), a dexer can generate a double diamond pattern but it is not a clear cut one due
+ // to the merging of the blocks. `TryFixupDoubleDiamondPattern` recognizes that pattern and fixes
+ // up the graph to have a clean double diamond that `TryGenerateSelectSimpleDiamondPattern` can
+ // use to generate selects.
+ //
+ // In ASCII, it turns:
+ //
+ // 1 (outer if)
+ // / \
+ // 2 3 (inner if)
+ // | / \
+ // | 4 5
+ // \/ |
+ // 6 |
+ // \ |
+ // 7
+ // |
+ // 8
+ // into:
+ // 1 (outer if)
+ // / \
+ // 2 3 (inner if)
+ // | / \
+ // | 4 5
+ // \/ /
+ // 6
+ // |
+ // 8
+ //
+ // In short, block 7 disappears and we merge 6 and 7. Now we have a diamond with {3,4,5,6}, and
+ // when that gets resolved we get another one with the outer if.
+ HBasicBlock* TryFixupDoubleDiamondPattern(HBasicBlock* block);
+
DISALLOW_COPY_AND_ASSIGN(HSelectGenerator);
};
diff --git a/compiler/optimizing/select_generator_test.cc b/compiler/optimizing/select_generator_test.cc
index b18d41abbb..fc9e150d92 100644
--- a/compiler/optimizing/select_generator_test.cc
+++ b/compiler/optimizing/select_generator_test.cc
@@ -17,12 +17,13 @@
#include "select_generator.h"
#include "base/arena_allocator.h"
+#include "base/macros.h"
#include "builder.h"
#include "nodes.h"
#include "optimizing_unit_test.h"
#include "side_effects_analysis.h"
-namespace art {
+namespace art HIDDEN {
class SelectGeneratorTest : public OptimizingUnitTest {
protected:
diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc
index 17cf3d3477..277edff33e 100644
--- a/compiler/optimizing/sharpening.cc
+++ b/compiler/optimizing/sharpening.cc
@@ -34,7 +34,7 @@
#include "runtime.h"
#include "scoped_thread_state_change-inl.h"
-namespace art {
+namespace art HIDDEN {
static bool IsInBootImage(ArtMethod* method) {
gc::Heap* heap = Runtime::Current()->GetHeap();
@@ -63,9 +63,9 @@ HInvokeStaticOrDirect::DispatchInfo HSharpening::SharpenLoadMethod(
bool for_interface_call,
CodeGenerator* codegen) {
if (kIsDebugBuild) {
- ScopedObjectAccess soa(Thread::Current()); // Required for GetDeclaringClass below.
+ ScopedObjectAccess soa(Thread::Current()); // Required for `IsStringConstructor()` below.
DCHECK(callee != nullptr);
- DCHECK(!(callee->IsConstructor() && callee->GetDeclaringClass()->IsStringClass()));
+ DCHECK(!callee->IsStringConstructor());
}
MethodLoadKind method_load_kind;
diff --git a/compiler/optimizing/sharpening.h b/compiler/optimizing/sharpening.h
index 975366918c..6dfe904f27 100644
--- a/compiler/optimizing/sharpening.h
+++ b/compiler/optimizing/sharpening.h
@@ -17,10 +17,11 @@
#ifndef ART_COMPILER_OPTIMIZING_SHARPENING_H_
#define ART_COMPILER_OPTIMIZING_SHARPENING_H_
+#include "base/macros.h"
#include "nodes.h"
#include "optimization.h"
-namespace art {
+namespace art HIDDEN {
class CodeGenerator;
class DexCompilationUnit;
diff --git a/compiler/optimizing/side_effects_analysis.cc b/compiler/optimizing/side_effects_analysis.cc
index ba97b43de9..56719b100e 100644
--- a/compiler/optimizing/side_effects_analysis.cc
+++ b/compiler/optimizing/side_effects_analysis.cc
@@ -16,7 +16,7 @@
#include "side_effects_analysis.h"
-namespace art {
+namespace art HIDDEN {
bool SideEffectsAnalysis::Run() {
// Inlining might have created more blocks, so we need to increase the size
diff --git a/compiler/optimizing/side_effects_analysis.h b/compiler/optimizing/side_effects_analysis.h
index 56a01e63f1..47fcdc5d1b 100644
--- a/compiler/optimizing/side_effects_analysis.h
+++ b/compiler/optimizing/side_effects_analysis.h
@@ -18,10 +18,11 @@
#define ART_COMPILER_OPTIMIZING_SIDE_EFFECTS_ANALYSIS_H_
#include "base/arena_containers.h"
+#include "base/macros.h"
#include "nodes.h"
#include "optimization.h"
-namespace art {
+namespace art HIDDEN {
class SideEffectsAnalysis : public HOptimization {
public:
diff --git a/compiler/optimizing/side_effects_test.cc b/compiler/optimizing/side_effects_test.cc
index 268798ca7d..f2b781dfa4 100644
--- a/compiler/optimizing/side_effects_test.cc
+++ b/compiler/optimizing/side_effects_test.cc
@@ -16,10 +16,11 @@
#include <gtest/gtest.h>
+#include "base/macros.h"
#include "data_type.h"
#include "nodes.h"
-namespace art {
+namespace art HIDDEN {
// Only runtime types other than void are allowed.
static const DataType::Type kTestTypes[] = {
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index 67ee83c9dd..a658252e69 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -27,7 +27,7 @@
#include "scoped_thread_state_change-inl.h"
#include "ssa_phi_elimination.h"
-namespace art {
+namespace art HIDDEN {
void SsaBuilder::FixNullConstantType() {
// The order doesn't matter here.
@@ -538,7 +538,6 @@ GraphAnalysisResult SsaBuilder::BuildSsa() {
// Compute type of reference type instructions. The pass assumes that
// NullConstant has been fixed up.
ReferenceTypePropagation(graph_,
- class_loader_,
dex_cache_,
/* is_first_run= */ true).Run();
diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h
index a7d4e0ebd3..99a5469932 100644
--- a/compiler/optimizing/ssa_builder.h
+++ b/compiler/optimizing/ssa_builder.h
@@ -17,12 +17,13 @@
#ifndef ART_COMPILER_OPTIMIZING_SSA_BUILDER_H_
#define ART_COMPILER_OPTIMIZING_SSA_BUILDER_H_
+#include "base/macros.h"
#include "base/scoped_arena_allocator.h"
#include "base/scoped_arena_containers.h"
#include "nodes.h"
#include "optimization.h"
-namespace art {
+namespace art HIDDEN {
/**
* Transforms a graph into SSA form. The liveness guarantees of
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index 18942a1823..317e0999d7 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -21,7 +21,7 @@
#include "linear_order.h"
#include "nodes.h"
-namespace art {
+namespace art HIDDEN {
void SsaLivenessAnalysis::Analyze() {
// Compute the linear order directly in the graph's data structure
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index 7f31585f34..cc2b49cf22 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -21,11 +21,12 @@
#include "base/intrusive_forward_list.h"
#include "base/iteration_range.h"
+#include "base/macros.h"
#include "base/scoped_arena_allocator.h"
#include "base/scoped_arena_containers.h"
#include "nodes.h"
-namespace art {
+namespace art HIDDEN {
class CodeGenerator;
class SsaLivenessAnalysis;
diff --git a/compiler/optimizing/ssa_liveness_analysis_test.cc b/compiler/optimizing/ssa_liveness_analysis_test.cc
index a477893d57..2df0f34c7d 100644
--- a/compiler/optimizing/ssa_liveness_analysis_test.cc
+++ b/compiler/optimizing/ssa_liveness_analysis_test.cc
@@ -20,12 +20,13 @@
#include "arch/instruction_set_features.h"
#include "base/arena_allocator.h"
#include "base/arena_containers.h"
+#include "base/macros.h"
#include "code_generator.h"
#include "driver/compiler_options.h"
#include "nodes.h"
#include "optimizing_unit_test.h"
-namespace art {
+namespace art HIDDEN {
class SsaLivenessAnalysisTest : public OptimizingUnitTest {
protected:
diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc
index 8fd6962500..ce343dffec 100644
--- a/compiler/optimizing/ssa_phi_elimination.cc
+++ b/compiler/optimizing/ssa_phi_elimination.cc
@@ -21,7 +21,7 @@
#include "base/scoped_arena_containers.h"
#include "base/bit_vector-inl.h"
-namespace art {
+namespace art HIDDEN {
bool SsaDeadPhiElimination::Run() {
MarkDeadPhis();
diff --git a/compiler/optimizing/ssa_phi_elimination.h b/compiler/optimizing/ssa_phi_elimination.h
index c5cc752ffc..f606f928fa 100644
--- a/compiler/optimizing/ssa_phi_elimination.h
+++ b/compiler/optimizing/ssa_phi_elimination.h
@@ -17,10 +17,11 @@
#ifndef ART_COMPILER_OPTIMIZING_SSA_PHI_ELIMINATION_H_
#define ART_COMPILER_OPTIMIZING_SSA_PHI_ELIMINATION_H_
+#include "base/macros.h"
#include "nodes.h"
#include "optimization.h"
-namespace art {
+namespace art HIDDEN {
/**
* Optimization phase that removes dead phis from the graph. Dead phis are unused
diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc
index e679893af2..980493db34 100644
--- a/compiler/optimizing/ssa_test.cc
+++ b/compiler/optimizing/ssa_test.cc
@@ -17,6 +17,7 @@
#include "android-base/stringprintf.h"
#include "base/arena_allocator.h"
+#include "base/macros.h"
#include "builder.h"
#include "dex/dex_file.h"
#include "dex/dex_instruction.h"
@@ -27,9 +28,9 @@
#include "gtest/gtest.h"
-namespace art {
+namespace art HIDDEN {
-class SsaTest : public OptimizingUnitTest {
+class SsaTest : public CommonCompilerTest, public OptimizingUnitTestHelper {
protected:
void TestCode(const std::vector<uint16_t>& data, const char* expected);
};
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index f55bbee1c8..1a368ed347 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -20,6 +20,7 @@
#include <vector>
#include "art_method-inl.h"
+#include "base/globals.h"
#include "base/stl_util.h"
#include "class_linker.h"
#include "dex/dex_file.h"
@@ -32,7 +33,7 @@
#include "scoped_thread_state_change-inl.h"
#include "stack_map.h"
-namespace art {
+namespace art HIDDEN {
constexpr static bool kVerifyStackMaps = kIsDebugBuild;
@@ -49,7 +50,8 @@ void StackMapStream::BeginMethod(size_t frame_size_in_bytes,
size_t core_spill_mask,
size_t fp_spill_mask,
uint32_t num_dex_registers,
- bool baseline) {
+ bool baseline,
+ bool debuggable) {
DCHECK(!in_method_) << "Mismatched Begin/End calls";
in_method_ = true;
DCHECK_EQ(packed_frame_size_, 0u) << "BeginMethod was already called";
@@ -60,6 +62,7 @@ void StackMapStream::BeginMethod(size_t frame_size_in_bytes,
fp_spill_mask_ = fp_spill_mask;
num_dex_registers_ = num_dex_registers;
baseline_ = baseline;
+ debuggable_ = debuggable;
if (kVerifyStackMaps) {
dchecks_.emplace_back([=](const CodeInfo& code_info) {
@@ -99,16 +102,21 @@ void StackMapStream::EndMethod(size_t code_size) {
}
}
-void StackMapStream::BeginStackMapEntry(uint32_t dex_pc,
- uint32_t native_pc_offset,
- uint32_t register_mask,
- BitVector* stack_mask,
- StackMap::Kind kind,
- bool needs_vreg_info) {
+void StackMapStream::BeginStackMapEntry(
+ uint32_t dex_pc,
+ uint32_t native_pc_offset,
+ uint32_t register_mask,
+ BitVector* stack_mask,
+ StackMap::Kind kind,
+ bool needs_vreg_info,
+ const std::vector<uint32_t>& dex_pc_list_for_catch_verification) {
DCHECK(in_method_) << "Call BeginMethod first";
DCHECK(!in_stack_map_) << "Mismatched Begin/End calls";
in_stack_map_ = true;
+ DCHECK_IMPLIES(!dex_pc_list_for_catch_verification.empty(), kind == StackMap::Kind::Catch);
+ DCHECK_IMPLIES(!dex_pc_list_for_catch_verification.empty(), kIsDebugBuild);
+
current_stack_map_ = BitTableBuilder<StackMap>::Entry();
current_stack_map_[StackMap::kKind] = static_cast<uint32_t>(kind);
current_stack_map_[StackMap::kPackedNativePc] =
@@ -149,7 +157,8 @@ void StackMapStream::BeginStackMapEntry(uint32_t dex_pc,
instruction_set_);
CHECK_EQ(stack_map.Row(), stack_map_index);
} else if (kind == StackMap::Kind::Catch) {
- StackMap stack_map = code_info.GetCatchStackMapForDexPc(dex_pc);
+ StackMap stack_map = code_info.GetCatchStackMapForDexPc(
+ ArrayRef<const uint32_t>(dex_pc_list_for_catch_verification));
CHECK_EQ(stack_map.Row(), stack_map_index);
}
StackMap stack_map = code_info.GetStackMapAt(stack_map_index);
@@ -367,6 +376,7 @@ ScopedArenaVector<uint8_t> StackMapStream::Encode() {
uint32_t flags = (inline_infos_.size() > 0) ? CodeInfo::kHasInlineInfo : 0;
flags |= baseline_ ? CodeInfo::kIsBaseline : 0;
+ flags |= debuggable_ ? CodeInfo::kIsDebuggable : 0;
DCHECK_LE(flags, kVarintMax); // Ensure flags can be read directly as byte.
uint32_t bit_table_flags = 0;
ForEachBitTable([&bit_table_flags](size_t i, auto bit_table) {
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index 27145a174c..643af2da94 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -21,6 +21,7 @@
#include "base/arena_bit_vector.h"
#include "base/bit_table.h"
#include "base/bit_vector-inl.h"
+#include "base/macros.h"
#include "base/memory_region.h"
#include "base/scoped_arena_containers.h"
#include "base/value_object.h"
@@ -28,7 +29,7 @@
#include "nodes.h"
#include "stack_map.h"
-namespace art {
+namespace art HIDDEN {
class CodeGenerator;
@@ -64,15 +65,19 @@ class StackMapStream : public DeletableArenaObject<kArenaAllocStackMapStream> {
size_t core_spill_mask,
size_t fp_spill_mask,
uint32_t num_dex_registers,
- bool baseline = false);
+ bool baseline,
+ bool debuggable);
void EndMethod(size_t code_size);
- void BeginStackMapEntry(uint32_t dex_pc,
- uint32_t native_pc_offset,
- uint32_t register_mask = 0,
- BitVector* sp_mask = nullptr,
- StackMap::Kind kind = StackMap::Kind::Default,
- bool needs_vreg_info = true);
+ void BeginStackMapEntry(
+ uint32_t dex_pc,
+ uint32_t native_pc_offset,
+ uint32_t register_mask = 0,
+ BitVector* sp_mask = nullptr,
+ StackMap::Kind kind = StackMap::Kind::Default,
+ bool needs_vreg_info = true,
+ const std::vector<uint32_t>& dex_pc_list_for_catch_verification = std::vector<uint32_t>());
+
void EndStackMapEntry();
void AddDexRegisterEntry(DexRegisterLocation::Kind kind, int32_t value) {
@@ -125,6 +130,7 @@ class StackMapStream : public DeletableArenaObject<kArenaAllocStackMapStream> {
uint32_t fp_spill_mask_ = 0;
uint32_t num_dex_registers_ = 0;
bool baseline_;
+ bool debuggable_;
BitTableBuilder<StackMap> stack_maps_;
BitTableBuilder<RegisterMask> register_masks_;
BitmapTableBuilder stack_masks_;
diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc
index f6a739e15a..a2c30e7681 100644
--- a/compiler/optimizing/stack_map_test.cc
+++ b/compiler/optimizing/stack_map_test.cc
@@ -18,12 +18,13 @@
#include "art_method.h"
#include "base/arena_bit_vector.h"
+#include "base/macros.h"
#include "base/malloc_arena_pool.h"
#include "stack_map_stream.h"
#include "gtest/gtest.h"
-namespace art {
+namespace art HIDDEN {
// Check that the stack mask of given stack map is identical
// to the given bit vector. Returns true if they are same.
@@ -52,7 +53,12 @@ TEST(StackMapTest, Test1) {
ArenaStack arena_stack(&pool);
ScopedArenaAllocator allocator(&arena_stack);
StackMapStream stream(&allocator, kRuntimeISA);
- stream.BeginMethod(32, 0, 0, 2);
+ stream.BeginMethod(/* frame_size_in_bytes= */ 32,
+ /* core_spill_mask= */ 0,
+ /* fp_spill_mask= */ 0,
+ /* num_dex_registers= */ 2,
+ /* baseline= */ false,
+ /* debuggable= */ false);
ArenaBitVector sp_mask(&allocator, 0, false);
size_t number_of_dex_registers = 2;
@@ -106,7 +112,12 @@ TEST(StackMapTest, Test2) {
ArenaStack arena_stack(&pool);
ScopedArenaAllocator allocator(&arena_stack);
StackMapStream stream(&allocator, kRuntimeISA);
- stream.BeginMethod(32, 0, 0, 2);
+ stream.BeginMethod(/* frame_size_in_bytes= */ 32,
+ /* core_spill_mask= */ 0,
+ /* fp_spill_mask= */ 0,
+ /* num_dex_registers= */ 2,
+ /* baseline= */ false,
+ /* debuggable= */ false);
ArtMethod art_method;
ArenaBitVector sp_mask1(&allocator, 0, true);
@@ -300,7 +311,12 @@ TEST(StackMapTest, TestDeduplicateInlineInfoDexRegisterMap) {
ArenaStack arena_stack(&pool);
ScopedArenaAllocator allocator(&arena_stack);
StackMapStream stream(&allocator, kRuntimeISA);
- stream.BeginMethod(32, 0, 0, 2);
+ stream.BeginMethod(/* frame_size_in_bytes= */ 32,
+ /* core_spill_mask= */ 0,
+ /* fp_spill_mask= */ 0,
+ /* num_dex_registers= */ 2,
+ /* baseline= */ false,
+ /* debuggable= */ false);
ArtMethod art_method;
ArenaBitVector sp_mask1(&allocator, 0, true);
@@ -363,7 +379,12 @@ TEST(StackMapTest, TestNonLiveDexRegisters) {
ArenaStack arena_stack(&pool);
ScopedArenaAllocator allocator(&arena_stack);
StackMapStream stream(&allocator, kRuntimeISA);
- stream.BeginMethod(32, 0, 0, 2);
+ stream.BeginMethod(/* frame_size_in_bytes= */ 32,
+ /* core_spill_mask= */ 0,
+ /* fp_spill_mask= */ 0,
+ /* num_dex_registers= */ 2,
+ /* baseline= */ false,
+ /* debuggable= */ false);
ArenaBitVector sp_mask(&allocator, 0, false);
uint32_t number_of_dex_registers = 2;
@@ -411,7 +432,12 @@ TEST(StackMapTest, TestShareDexRegisterMap) {
ArenaStack arena_stack(&pool);
ScopedArenaAllocator allocator(&arena_stack);
StackMapStream stream(&allocator, kRuntimeISA);
- stream.BeginMethod(32, 0, 0, 2);
+ stream.BeginMethod(/* frame_size_in_bytes= */ 32,
+ /* core_spill_mask= */ 0,
+ /* fp_spill_mask= */ 0,
+ /* num_dex_registers= */ 2,
+ /* baseline= */ false,
+ /* debuggable= */ false);
ArenaBitVector sp_mask(&allocator, 0, false);
uint32_t number_of_dex_registers = 2;
@@ -467,7 +493,12 @@ TEST(StackMapTest, TestNoDexRegisterMap) {
ArenaStack arena_stack(&pool);
ScopedArenaAllocator allocator(&arena_stack);
StackMapStream stream(&allocator, kRuntimeISA);
- stream.BeginMethod(32, 0, 0, 1);
+ stream.BeginMethod(/* frame_size_in_bytes= */ 32,
+ /* core_spill_mask= */ 0,
+ /* fp_spill_mask= */ 0,
+ /* num_dex_registers= */ 1,
+ /* baseline= */ false,
+ /* debuggable= */ false);
ArenaBitVector sp_mask(&allocator, 0, false);
stream.BeginStackMapEntry(0, 64 * kPcAlign, 0x3, &sp_mask);
@@ -512,7 +543,12 @@ TEST(StackMapTest, InlineTest) {
ArenaStack arena_stack(&pool);
ScopedArenaAllocator allocator(&arena_stack);
StackMapStream stream(&allocator, kRuntimeISA);
- stream.BeginMethod(32, 0, 0, 2);
+ stream.BeginMethod(/* frame_size_in_bytes= */ 32,
+ /* core_spill_mask= */ 0,
+ /* fp_spill_mask= */ 0,
+ /* num_dex_registers= */ 2,
+ /* baseline= */ false,
+ /* debuggable= */ false);
ArtMethod art_method;
ArenaBitVector sp_mask1(&allocator, 0, true);
@@ -702,7 +738,12 @@ TEST(StackMapTest, TestDeduplicateStackMask) {
ArenaStack arena_stack(&pool);
ScopedArenaAllocator allocator(&arena_stack);
StackMapStream stream(&allocator, kRuntimeISA);
- stream.BeginMethod(32, 0, 0, 0);
+ stream.BeginMethod(/* frame_size_in_bytes= */ 32,
+ /* core_spill_mask= */ 0,
+ /* fp_spill_mask= */ 0,
+ /* num_dex_registers= */ 0,
+ /* baseline= */ false,
+ /* debuggable= */ false);
ArenaBitVector sp_mask(&allocator, 0, true);
sp_mask.SetBit(1);
diff --git a/compiler/optimizing/superblock_cloner.cc b/compiler/optimizing/superblock_cloner.cc
index a5f919c31c..7c0097c6f6 100644
--- a/compiler/optimizing/superblock_cloner.cc
+++ b/compiler/optimizing/superblock_cloner.cc
@@ -22,7 +22,7 @@
#include <sstream>
-namespace art {
+namespace art HIDDEN {
using HBasicBlockMap = SuperblockCloner::HBasicBlockMap;
using HInstructionMap = SuperblockCloner::HInstructionMap;
@@ -633,7 +633,7 @@ void SuperblockCloner::ConstructSubgraphClosedSSA() {
HPhi* phi = new (arena_) HPhi(arena_, kNoRegNumber, 0, value->GetType());
if (value->GetType() == DataType::Type::kReference) {
- phi->SetReferenceTypeInfo(value->GetReferenceTypeInfo());
+ phi->SetReferenceTypeInfoIfValid(value->GetReferenceTypeInfo());
}
exit_block->AddPhi(phi);
diff --git a/compiler/optimizing/superblock_cloner.h b/compiler/optimizing/superblock_cloner.h
index 1f6ee74fbd..421701fb19 100644
--- a/compiler/optimizing/superblock_cloner.h
+++ b/compiler/optimizing/superblock_cloner.h
@@ -20,9 +20,10 @@
#include "base/arena_bit_vector.h"
#include "base/arena_containers.h"
#include "base/bit_vector-inl.h"
+#include "base/macros.h"
#include "nodes.h"
-namespace art {
+namespace art HIDDEN {
class InductionVarRange;
diff --git a/compiler/optimizing/superblock_cloner_test.cc b/compiler/optimizing/superblock_cloner_test.cc
index d8d68b7763..ea2563ea7d 100644
--- a/compiler/optimizing/superblock_cloner_test.cc
+++ b/compiler/optimizing/superblock_cloner_test.cc
@@ -14,6 +14,7 @@
* limitations under the License.
*/
+#include "base/macros.h"
#include "graph_checker.h"
#include "nodes.h"
#include "optimizing_unit_test.h"
@@ -21,7 +22,7 @@
#include "gtest/gtest.h"
-namespace art {
+namespace art HIDDEN {
using HBasicBlockMap = SuperblockCloner::HBasicBlockMap;
using HInstructionMap = SuperblockCloner::HInstructionMap;
diff --git a/compiler/optimizing/suspend_check_test.cc b/compiler/optimizing/suspend_check_test.cc
index 33823e2a11..76e7e0c32c 100644
--- a/compiler/optimizing/suspend_check_test.cc
+++ b/compiler/optimizing/suspend_check_test.cc
@@ -14,6 +14,7 @@
* limitations under the License.
*/
+#include "base/macros.h"
#include "builder.h"
#include "dex/dex_instruction.h"
#include "nodes.h"
@@ -22,13 +23,13 @@
#include "gtest/gtest.h"
-namespace art {
+namespace art HIDDEN {
/**
* Check that the HGraphBuilder adds suspend checks to backward branches.
*/
-class SuspendCheckTest : public OptimizingUnitTest {
+class SuspendCheckTest : public CommonCompilerTest, public OptimizingUnitTestHelper {
protected:
void TestCode(const std::vector<uint16_t>& data);
};
diff --git a/compiler/optimizing/write_barrier_elimination.cc b/compiler/optimizing/write_barrier_elimination.cc
new file mode 100644
index 0000000000..eb70b670fe
--- /dev/null
+++ b/compiler/optimizing/write_barrier_elimination.cc
@@ -0,0 +1,161 @@
+/*
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "write_barrier_elimination.h"
+
+#include "base/arena_allocator.h"
+#include "base/scoped_arena_allocator.h"
+#include "base/scoped_arena_containers.h"
+#include "optimizing/nodes.h"
+
+namespace art HIDDEN {
+
+class WBEVisitor final : public HGraphVisitor {
+ public:
+ WBEVisitor(HGraph* graph, OptimizingCompilerStats* stats)
+ : HGraphVisitor(graph),
+ scoped_allocator_(graph->GetArenaStack()),
+ current_write_barriers_(scoped_allocator_.Adapter(kArenaAllocWBE)),
+ stats_(stats) {}
+
+ void VisitBasicBlock(HBasicBlock* block) override {
+ // We clear the map to perform this optimization only in the same block. Doing it across blocks
+ // would entail non-trivial merging of states.
+ current_write_barriers_.clear();
+ HGraphVisitor::VisitBasicBlock(block);
+ }
+
+ void VisitInstanceFieldSet(HInstanceFieldSet* instruction) override {
+ DCHECK(!instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC()));
+
+ if (instruction->GetFieldType() != DataType::Type::kReference ||
+ instruction->GetValue()->IsNullConstant()) {
+ instruction->SetWriteBarrierKind(WriteBarrierKind::kDontEmit);
+ return;
+ }
+
+ MaybeRecordStat(stats_, MethodCompilationStat::kPossibleWriteBarrier);
+ HInstruction* obj = HuntForOriginalReference(instruction->InputAt(0));
+ auto it = current_write_barriers_.find(obj);
+ if (it != current_write_barriers_.end()) {
+ DCHECK(it->second->IsInstanceFieldSet());
+ DCHECK(it->second->AsInstanceFieldSet()->GetWriteBarrierKind() !=
+ WriteBarrierKind::kDontEmit);
+ DCHECK_EQ(it->second->GetBlock(), instruction->GetBlock());
+ it->second->AsInstanceFieldSet()->SetWriteBarrierKind(WriteBarrierKind::kEmitNoNullCheck);
+ instruction->SetWriteBarrierKind(WriteBarrierKind::kDontEmit);
+ MaybeRecordStat(stats_, MethodCompilationStat::kRemovedWriteBarrier);
+ } else {
+ const bool inserted = current_write_barriers_.insert({obj, instruction}).second;
+ DCHECK(inserted);
+ DCHECK(instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit);
+ }
+ }
+
+ void VisitStaticFieldSet(HStaticFieldSet* instruction) override {
+ DCHECK(!instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC()));
+
+ if (instruction->GetFieldType() != DataType::Type::kReference ||
+ instruction->GetValue()->IsNullConstant()) {
+ instruction->SetWriteBarrierKind(WriteBarrierKind::kDontEmit);
+ return;
+ }
+
+ MaybeRecordStat(stats_, MethodCompilationStat::kPossibleWriteBarrier);
+ HInstruction* cls = HuntForOriginalReference(instruction->InputAt(0));
+ auto it = current_write_barriers_.find(cls);
+ if (it != current_write_barriers_.end()) {
+ DCHECK(it->second->IsStaticFieldSet());
+ DCHECK(it->second->AsStaticFieldSet()->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit);
+ DCHECK_EQ(it->second->GetBlock(), instruction->GetBlock());
+ it->second->AsStaticFieldSet()->SetWriteBarrierKind(WriteBarrierKind::kEmitNoNullCheck);
+ instruction->SetWriteBarrierKind(WriteBarrierKind::kDontEmit);
+ MaybeRecordStat(stats_, MethodCompilationStat::kRemovedWriteBarrier);
+ } else {
+ const bool inserted = current_write_barriers_.insert({cls, instruction}).second;
+ DCHECK(inserted);
+ DCHECK(instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit);
+ }
+ }
+
+ void VisitArraySet(HArraySet* instruction) override {
+ if (instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC())) {
+ ClearCurrentValues();
+ }
+
+ if (instruction->GetComponentType() != DataType::Type::kReference ||
+ instruction->GetValue()->IsNullConstant()) {
+ instruction->SetWriteBarrierKind(WriteBarrierKind::kDontEmit);
+ return;
+ }
+
+ HInstruction* arr = HuntForOriginalReference(instruction->InputAt(0));
+ MaybeRecordStat(stats_, MethodCompilationStat::kPossibleWriteBarrier);
+ auto it = current_write_barriers_.find(arr);
+ if (it != current_write_barriers_.end()) {
+ DCHECK(it->second->IsArraySet());
+ DCHECK(it->second->AsArraySet()->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit);
+ DCHECK_EQ(it->second->GetBlock(), instruction->GetBlock());
+ // We never skip the null check in ArraySets so that value is already set.
+ DCHECK(it->second->AsArraySet()->GetWriteBarrierKind() == WriteBarrierKind::kEmitNoNullCheck);
+ instruction->SetWriteBarrierKind(WriteBarrierKind::kDontEmit);
+ MaybeRecordStat(stats_, MethodCompilationStat::kRemovedWriteBarrier);
+ } else {
+ const bool inserted = current_write_barriers_.insert({arr, instruction}).second;
+ DCHECK(inserted);
+ DCHECK(instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit);
+ }
+ }
+
+ void VisitInstruction(HInstruction* instruction) override {
+ if (instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC())) {
+ ClearCurrentValues();
+ }
+ }
+
+ private:
+ void ClearCurrentValues() { current_write_barriers_.clear(); }
+
+ HInstruction* HuntForOriginalReference(HInstruction* ref) const {
+ // An original reference can be transformed by instructions like:
+ // i0 NewArray
+ // i1 HInstruction(i0) <-- NullCheck, BoundType, IntermediateAddress.
+ // i2 ArraySet(i1, index, value)
+ DCHECK(ref != nullptr);
+ while (ref->IsNullCheck() || ref->IsBoundType() || ref->IsIntermediateAddress()) {
+ ref = ref->InputAt(0);
+ }
+ return ref;
+ }
+
+ ScopedArenaAllocator scoped_allocator_;
+
+ // Stores a map of <Receiver, InstructionWhereTheWriteBarrierIs>.
+ // `InstructionWhereTheWriteBarrierIs` is used for DCHECKs only.
+ ScopedArenaHashMap<HInstruction*, HInstruction*> current_write_barriers_;
+
+ OptimizingCompilerStats* const stats_;
+
+ DISALLOW_COPY_AND_ASSIGN(WBEVisitor);
+};
+
+bool WriteBarrierElimination::Run() {
+ WBEVisitor wbe_visitor(graph_, stats_);
+ wbe_visitor.VisitReversePostOrder();
+ return true;
+}
+
+} // namespace art
diff --git a/compiler/optimizing/write_barrier_elimination.h b/compiler/optimizing/write_barrier_elimination.h
new file mode 100644
index 0000000000..a3769e7421
--- /dev/null
+++ b/compiler/optimizing/write_barrier_elimination.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_WRITE_BARRIER_ELIMINATION_H_
+#define ART_COMPILER_OPTIMIZING_WRITE_BARRIER_ELIMINATION_H_
+
+#include "base/macros.h"
+#include "optimization.h"
+
+namespace art HIDDEN {
+
+// Eliminates unnecessary write barriers from InstanceFieldSet, StaticFieldSet, and ArraySet.
+//
+// We can eliminate redundant write barriers as we don't need several for the same receiver. For
+// example:
+// MyObject o;
+// o.inner_obj = io;
+// o.inner_obj2 = io2;
+// o.inner_obj3 = io3;
+// We can keep the write barrier for `inner_obj` and remove the other two.
+//
+// In order to do this, we set the WriteBarrierKind of the instruction. The instruction's kind are
+// set to kEmitNoNullCheck (if this write barrier coalesced other write barriers, we don't want to
+// perform the null check optimization), or to kDontEmit (if the write barrier as a whole is not
+// needed).
+class WriteBarrierElimination : public HOptimization {
+ public:
+ WriteBarrierElimination(HGraph* graph,
+ OptimizingCompilerStats* stats,
+ const char* name = kWBEPassName)
+ : HOptimization(graph, name, stats) {}
+
+ bool Run() override;
+
+ static constexpr const char* kWBEPassName = "write_barrier_elimination";
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(WriteBarrierElimination);
+};
+
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_WRITE_BARRIER_ELIMINATION_H_
diff --git a/compiler/optimizing/x86_memory_gen.cc b/compiler/optimizing/x86_memory_gen.cc
index b1abcf6747..e266618980 100644
--- a/compiler/optimizing/x86_memory_gen.cc
+++ b/compiler/optimizing/x86_memory_gen.cc
@@ -18,13 +18,13 @@
#include "code_generator.h"
#include "driver/compiler_options.h"
-namespace art {
+namespace art HIDDEN {
namespace x86 {
/**
* Replace instructions with memory operand forms.
*/
-class MemoryOperandVisitor : public HGraphVisitor {
+class MemoryOperandVisitor final : public HGraphVisitor {
public:
MemoryOperandVisitor(HGraph* graph, bool do_implicit_null_checks)
: HGraphVisitor(graph),
diff --git a/compiler/optimizing/x86_memory_gen.h b/compiler/optimizing/x86_memory_gen.h
index 3f4178d58a..1cae1a5d3a 100644
--- a/compiler/optimizing/x86_memory_gen.h
+++ b/compiler/optimizing/x86_memory_gen.h
@@ -17,10 +17,11 @@
#ifndef ART_COMPILER_OPTIMIZING_X86_MEMORY_GEN_H_
#define ART_COMPILER_OPTIMIZING_X86_MEMORY_GEN_H_
+#include "base/macros.h"
#include "nodes.h"
#include "optimization.h"
-namespace art {
+namespace art HIDDEN {
class CodeGenerator;
namespace x86 {