summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--build/Android.common_build.mk7
-rw-r--r--build/Android.gtest.mk24
-rw-r--r--compiler/Android.mk6
-rw-r--r--compiler/jni/jni_cfi_test.cc12
-rw-r--r--compiler/optimizing/code_generator.cc3
-rw-r--r--compiler/optimizing/code_generator_arm.cc9
-rw-r--r--compiler/optimizing/code_generator_arm64.cc9
-rw-r--r--compiler/optimizing/code_generator_x86.cc17
-rw-r--r--compiler/optimizing/code_generator_x86.h3
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc3
-rw-r--r--compiler/optimizing/codegen_test.cc61
-rw-r--r--compiler/optimizing/intrinsics_arm.cc466
-rw-r--r--compiler/optimizing/intrinsics_arm64.cc463
-rw-r--r--compiler/optimizing/intrinsics_x86.cc492
-rw-r--r--compiler/optimizing/intrinsics_x86_64.cc343
-rw-r--r--compiler/optimizing/optimizing_cfi_test.cc18
-rw-r--r--compiler/optimizing/optimizing_compiler.cc26
-rw-r--r--compiler/optimizing/register_allocator_graph_color.cc232
-rw-r--r--compiler/optimizing/register_allocator_graph_color.h18
-rw-r--r--compiler/utils/arm/assembler_arm.h6
-rw-r--r--compiler/utils/arm64/assembler_arm64.cc6
-rw-r--r--compiler/utils/arm64/assembler_arm64.h2
-rw-r--r--compiler/utils/jni_macro_assembler.cc1
-rw-r--r--compiler/utils/x86/assembler_x86.h6
-rw-r--r--compiler/utils/x86_64/assembler_x86_64.h6
-rw-r--r--runtime/Android.mk29
-rw-r--r--runtime/art_method.cc2
-rw-r--r--runtime/dex_instruction-inl.h4
-rw-r--r--runtime/dex_instruction_test.cc11
-rw-r--r--runtime/gc/collector/concurrent_copying.cc2
-rw-r--r--runtime/gc/collector/mark_sweep.cc4
-rw-r--r--runtime/gc/heap.cc10
-rw-r--r--runtime/mirror/class.cc2
-rw-r--r--runtime/native/dalvik_system_ZygoteHooks.cc2
-rw-r--r--runtime/thread-inl.h1
-rw-r--r--runtime/thread.cc6
-rw-r--r--runtime/thread.h38
-rw-r--r--runtime/thread_list.cc36
-rw-r--r--test/Android.run-test.mk5
-rw-r--r--tools/javafuzz/README.md24
-rwxr-xr-xtools/javafuzz/run_java_fuzz_test.py406
41 files changed, 2277 insertions, 544 deletions
diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk
index 0cd90c97a6..1e2cfa3e97 100644
--- a/build/Android.common_build.mk
+++ b/build/Android.common_build.mk
@@ -157,19 +157,14 @@ ART_HOST_CODEGEN_ARCHS ?= all
ifeq ($(ART_TARGET_CODEGEN_ARCHS),all)
ART_TARGET_CODEGEN_ARCHS := $(sort $(ART_TARGET_SUPPORTED_ARCH) $(ART_HOST_SUPPORTED_ARCH))
- # We need to handle the fact that some compiler tests mix code from different architectures.
- ART_TARGET_COMPILER_TESTS ?= true
else
- ART_TARGET_COMPILER_TESTS := false
ifeq ($(ART_TARGET_CODEGEN_ARCHS),svelte)
ART_TARGET_CODEGEN_ARCHS := $(sort $(ART_TARGET_ARCH_64) $(ART_TARGET_ARCH_32))
endif
endif
ifeq ($(ART_HOST_CODEGEN_ARCHS),all)
ART_HOST_CODEGEN_ARCHS := $(sort $(ART_TARGET_SUPPORTED_ARCH) $(ART_HOST_SUPPORTED_ARCH))
- ART_HOST_COMPILER_TESTS ?= true
else
- ART_HOST_COMPILER_TESTS := false
ifeq ($(ART_HOST_CODEGEN_ARCHS),svelte)
ART_HOST_CODEGEN_ARCHS := $(sort $(ART_TARGET_CODEGEN_ARCHS) $(ART_HOST_ARCH_64) $(ART_HOST_ARCH_32))
endif
@@ -318,7 +313,7 @@ ifndef LIBART_IMG_HOST_BASE_ADDRESS
$(error LIBART_IMG_HOST_BASE_ADDRESS unset)
endif
ART_HOST_CFLAGS += -DART_BASE_ADDRESS=$(LIBART_IMG_HOST_BASE_ADDRESS)
-ART_HOST_CFLAGS += -DART_DEFAULT_INSTRUCTION_SET_FEATURES=default $(art_host_cflags)
+ART_HOST_CFLAGS += $(art_host_cflags)
ART_HOST_CFLAGS += -DART_FRAME_SIZE_LIMIT=$(ART_HOST_FRAME_SIZE_LIMIT) \
$(ART_COMMON_STACK_OVERFLOW_DEFINES)
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 3d07fc0ca8..c538c4f03a 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -299,13 +299,7 @@ COMPILER_GTEST_COMMON_SRC_FILES := \
COMPILER_GTEST_COMMON_SRC_FILES_all := \
compiler/jni/jni_cfi_test.cc \
compiler/optimizing/codegen_test.cc \
- compiler/optimizing/constant_folding_test.cc \
- compiler/optimizing/dead_code_elimination_test.cc \
- compiler/optimizing/linearize_test.cc \
- compiler/optimizing/liveness_test.cc \
- compiler/optimizing/live_ranges_test.cc \
compiler/optimizing/optimizing_cfi_test.cc \
- compiler/optimizing/register_allocator_test.cc \
COMPILER_GTEST_COMMON_SRC_FILES_arm := \
compiler/linker/arm/relative_patcher_thumb2_test.cc \
@@ -325,6 +319,16 @@ COMPILER_GTEST_COMMON_SRC_FILES_x86 := \
compiler/linker/x86/relative_patcher_x86_test.cc \
compiler/utils/x86/managed_register_x86_test.cc \
+# These tests are testing architecture-independent functionality, but happen
+# to use x86 codegen as part of the test.
+COMPILER_GTEST_COMMON_SRC_FILES_x86 += \
+ compiler/optimizing/constant_folding_test.cc \
+ compiler/optimizing/dead_code_elimination_test.cc \
+ compiler/optimizing/linearize_test.cc \
+ compiler/optimizing/live_ranges_test.cc \
+ compiler/optimizing/liveness_test.cc \
+ compiler/optimizing/register_allocator_test.cc \
+
COMPILER_GTEST_COMMON_SRC_FILES_x86_64 := \
compiler/linker/x86_64/relative_patcher_x86_64_test.cc \
@@ -359,9 +363,7 @@ COMPILER_GTEST_TARGET_SRC_FILES_x86_64 := \
$(COMPILER_GTEST_COMMON_SRC_FILES_x86_64) \
$(foreach arch,$(ART_TARGET_CODEGEN_ARCHS),$(eval COMPILER_GTEST_TARGET_SRC_FILES += $$(COMPILER_GTEST_TARGET_SRC_FILES_$(arch))))
-ifeq (true,$(ART_TARGET_COMPILER_TESTS))
- COMPILER_GTEST_TARGET_SRC_FILES += $(COMPILER_GTEST_TARGET_SRC_FILES_all)
-endif
+COMPILER_GTEST_TARGET_SRC_FILES += $(COMPILER_GTEST_TARGET_SRC_FILES_all)
COMPILER_GTEST_HOST_SRC_FILES := \
$(COMPILER_GTEST_COMMON_SRC_FILES) \
@@ -396,9 +398,7 @@ COMPILER_GTEST_HOST_SRC_FILES_x86_64 := \
compiler/utils/x86_64/assembler_x86_64_test.cc
$(foreach arch,$(ART_HOST_CODEGEN_ARCHS),$(eval COMPILER_GTEST_HOST_SRC_FILES += $$(COMPILER_GTEST_HOST_SRC_FILES_$(arch))))
-ifeq (true,$(ART_HOST_COMPILER_TESTS))
- COMPILER_GTEST_HOST_SRC_FILES += $(COMPILER_GTEST_HOST_SRC_FILES_all)
-endif
+COMPILER_GTEST_HOST_SRC_FILES += $(COMPILER_GTEST_HOST_SRC_FILES_all)
ART_TEST_CFLAGS :=
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 6c6d99f616..410b2d05f2 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -47,7 +47,6 @@ LIBART_COMPILER_SRC_FILES := \
optimizing/code_generator_utils.cc \
optimizing/constant_folding.cc \
optimizing/dead_code_elimination.cc \
- optimizing/dex_cache_array_fixups_arm.cc \
optimizing/graph_checker.cc \
optimizing/graph_visualizer.cc \
optimizing/gvn.cc \
@@ -61,7 +60,6 @@ LIBART_COMPILER_SRC_FILES := \
optimizing/load_store_elimination.cc \
optimizing/locations.cc \
optimizing/nodes.cc \
- optimizing/nodes_arm64.cc \
optimizing/optimization.cc \
optimizing/optimizing_compiler.cc \
optimizing/parallel_move_resolver.cc \
@@ -78,7 +76,6 @@ LIBART_COMPILER_SRC_FILES := \
optimizing/ssa_liveness_analysis.cc \
optimizing/ssa_phi_elimination.cc \
optimizing/stack_map_stream.cc \
- optimizing/x86_memory_gen.cc \
trampolines/trampoline_compiler.cc \
utils/assembler.cc \
utils/jni_macro_assembler.cc \
@@ -94,6 +91,7 @@ LIBART_COMPILER_SRC_FILES_arm := \
linker/arm/relative_patcher_arm_base.cc \
linker/arm/relative_patcher_thumb2.cc \
optimizing/code_generator_arm.cc \
+ optimizing/dex_cache_array_fixups_arm.cc \
optimizing/intrinsics_arm.cc \
utils/arm/assembler_arm.cc \
utils/arm/assembler_arm32.cc \
@@ -109,6 +107,7 @@ LIBART_COMPILER_SRC_FILES_arm64 := \
$(LIBART_COMPILER_SRC_FILES_arm) \
jni/quick/arm64/calling_convention_arm64.cc \
linker/arm64/relative_patcher_arm64.cc \
+ optimizing/nodes_arm64.cc \
optimizing/code_generator_arm64.cc \
optimizing/instruction_simplifier_arm.cc \
optimizing/instruction_simplifier_arm64.cc \
@@ -144,6 +143,7 @@ LIBART_COMPILER_SRC_FILES_x86 := \
optimizing/code_generator_x86.cc \
optimizing/intrinsics_x86.cc \
optimizing/pc_relative_fixups_x86.cc \
+ optimizing/x86_memory_gen.cc \
utils/x86/assembler_x86.cc \
utils/x86/jni_macro_assembler_x86.cc \
utils/x86/managed_register_x86.cc \
diff --git a/compiler/jni/jni_cfi_test.cc b/compiler/jni/jni_cfi_test.cc
index 524ce4d34e..4b056f552a 100644
--- a/compiler/jni/jni_cfi_test.cc
+++ b/compiler/jni/jni_cfi_test.cc
@@ -104,12 +104,24 @@ class JNICFITest : public CFITest {
TestImpl(isa, #isa, expected_asm, expected_cfi); \
}
+#ifdef ART_ENABLE_CODEGEN_arm
TEST_ISA(kThumb2)
+#endif
+#ifdef ART_ENABLE_CODEGEN_arm64
TEST_ISA(kArm64)
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86
TEST_ISA(kX86)
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86_64
TEST_ISA(kX86_64)
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips
TEST_ISA(kMips)
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips64
TEST_ISA(kMips64)
+#endif
#endif // ART_TARGET_ANDROID
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 5152075499..c532e72465 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -1228,7 +1228,8 @@ void CodeGenerator::ValidateInvokeRuntimeWithoutRecordingPcInfo(HInstruction* in
instruction->IsLoadString() ||
instruction->IsInstanceOf() ||
instruction->IsCheckCast() ||
- (instruction->IsInvokeVirtual() && instruction->GetLocations()->Intrinsified()))
+ (instruction->IsInvokeVirtual() && instruction->GetLocations()->Intrinsified()) ||
+ (instruction->IsInvokeStaticOrDirect() && instruction->GetLocations()->Intrinsified()))
<< "instruction->DebugName()=" << instruction->DebugName()
<< " slow_path->GetDescription()=" << slow_path->GetDescription();
}
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 870d1fbd29..404f044cef 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -429,7 +429,8 @@ class ReadBarrierMarkSlowPathARM : public SlowPathCode {
instruction_->IsLoadString() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
- (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
+ (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
+ (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
<< "Unexpected instruction in read barrier marking slow path: "
<< instruction_->DebugName();
@@ -441,6 +442,9 @@ class ReadBarrierMarkSlowPathARM : public SlowPathCode {
DCHECK_NE(reg, SP);
DCHECK_NE(reg, LR);
DCHECK_NE(reg, PC);
+ // IP is used internally by the ReadBarrierMarkRegX entry point
+ // as a temporary, it cannot be the entry point's input/output.
+ DCHECK_NE(reg, IP);
DCHECK(0 <= reg && reg < kNumberOfCoreRegisters) << reg;
// "Compact" slow path, saving two moves.
//
@@ -6482,7 +6486,8 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
// Introduce a dependency on the lock_word including the rb_state,
// which shall prevent load-load reordering without using
// a memory barrier (which would be more expensive).
- // obj is unchanged by this operation, but its value now depends on temp_reg.
+ // `obj` is unchanged by this operation, but its value now depends
+ // on `temp_reg`.
__ add(obj, obj, ShifterOperand(temp_reg, LSR, 32));
// The actual reference load.
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 004d427511..122c174eae 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -595,7 +595,8 @@ class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 {
instruction_->IsLoadString() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
- (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
+ (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
+ (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
<< "Unexpected instruction in read barrier marking slow path: "
<< instruction_->DebugName();
@@ -607,7 +608,8 @@ class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 {
DCHECK_NE(obj_.reg(), LR);
DCHECK_NE(obj_.reg(), WSP);
DCHECK_NE(obj_.reg(), WZR);
- // WIP0 is used by the slow path as a temp, it can not be the object register.
+ // IP0 is used internally by the ReadBarrierMarkRegX entry point
+ // as a temporary, it cannot be the entry point's input/output.
DCHECK_NE(obj_.reg(), IP0);
DCHECK(0 <= obj_.reg() && obj_.reg() < kNumberOfWRegisters) << obj_.reg();
// "Compact" slow path, saving two moves.
@@ -5190,7 +5192,8 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction*
// Introduce a dependency on the lock_word including rb_state,
// to prevent load-load reordering, and without using
// a memory barrier (which would be more expensive).
- // obj is unchanged by this operation, but its value now depends on temp.
+ // `obj` is unchanged by this operation, but its value now depends
+ // on `temp`.
__ Add(obj.X(), obj.X(), Operand(temp.X(), LSR, 32));
// The actual reference load.
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 0305d6a030..7aca16f867 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -464,7 +464,8 @@ class ReadBarrierMarkSlowPathX86 : public SlowPathCode {
instruction_->IsLoadString() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
- (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
+ (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
+ (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
<< "Unexpected instruction in read barrier marking slow path: "
<< instruction_->DebugName();
@@ -1578,15 +1579,15 @@ void LocationsBuilderX86::VisitSelect(HSelect* select) {
locations->SetOut(Location::SameAsFirstInput());
}
-void InstructionCodeGeneratorX86::GenerateIntCompare(Location lhs, Location rhs) {
+void CodeGeneratorX86::GenerateIntCompare(Location lhs, Location rhs) {
Register lhs_reg = lhs.AsRegister<Register>();
if (rhs.IsConstant()) {
int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
- codegen_->Compare32BitValue(lhs_reg, value);
+ Compare32BitValue(lhs_reg, value);
} else if (rhs.IsStackSlot()) {
- __ cmpl(lhs_reg, Address(ESP, rhs.GetStackIndex()));
+ assembler_.cmpl(lhs_reg, Address(ESP, rhs.GetStackIndex()));
} else {
- __ cmpl(lhs_reg, rhs.AsRegister<Register>());
+ assembler_.cmpl(lhs_reg, rhs.AsRegister<Register>());
}
}
@@ -1619,7 +1620,7 @@ void InstructionCodeGeneratorX86::VisitSelect(HSelect* select) {
DCHECK_NE(condition->InputAt(0)->GetType(), Primitive::kPrimLong);
DCHECK(!Primitive::IsFloatingPointType(condition->InputAt(0)->GetType()));
LocationSummary* cond_locations = condition->GetLocations();
- GenerateIntCompare(cond_locations->InAt(0), cond_locations->InAt(1));
+ codegen_->GenerateIntCompare(cond_locations->InAt(0), cond_locations->InAt(1));
cond = X86Condition(condition->GetCondition());
}
} else {
@@ -1728,7 +1729,7 @@ void InstructionCodeGeneratorX86::HandleCondition(HCondition* cond) {
// Clear output register: setb only sets the low byte.
__ xorl(reg, reg);
- GenerateIntCompare(lhs, rhs);
+ codegen_->GenerateIntCompare(lhs, rhs);
__ setb(X86Condition(cond->GetCondition()), reg);
return;
}
@@ -4210,7 +4211,7 @@ void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) {
case Primitive::kPrimShort:
case Primitive::kPrimChar:
case Primitive::kPrimInt: {
- GenerateIntCompare(left, right);
+ codegen_->GenerateIntCompare(left, right);
break;
}
case Primitive::kPrimLong: {
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index f306b33247..894f2e8f40 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -295,7 +295,6 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator {
HBasicBlock* default_block);
void GenerateFPCompare(Location lhs, Location rhs, HInstruction* insn, bool is_double);
- void GenerateIntCompare(Location lhs, Location rhs);
X86Assembler* const assembler_;
CodeGeneratorX86* const codegen_;
@@ -431,6 +430,8 @@ class CodeGeneratorX86 : public CodeGenerator {
Register value,
bool value_can_be_null);
+ void GenerateIntCompare(Location lhs, Location rhs);
+
void GenerateMemoryBarrier(MemBarrierKind kind);
Label* GetLabelOf(HBasicBlock* block) const {
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 9ecd14ec5b..0c55ae44de 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -485,7 +485,8 @@ class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
instruction_->IsLoadString() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
- (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
+ (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
+ (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
<< "Unexpected instruction in read barrier marking slow path: "
<< instruction_->DebugName();
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index 18db507c48..fe6c0a305e 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -29,12 +29,6 @@
#include "arch/x86_64/instruction_set_features_x86_64.h"
#include "base/macros.h"
#include "builder.h"
-#include "code_generator_arm.h"
-#include "code_generator_arm64.h"
-#include "code_generator_mips.h"
-#include "code_generator_mips64.h"
-#include "code_generator_x86.h"
-#include "code_generator_x86_64.h"
#include "code_simulator_container.h"
#include "common_compiler_test.h"
#include "dex_file.h"
@@ -52,10 +46,35 @@
#include "utils/mips64/managed_register_mips64.h"
#include "utils/x86/managed_register_x86.h"
+#ifdef ART_ENABLE_CODEGEN_arm
+#include "code_generator_arm.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_arm64
+#include "code_generator_arm64.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_x86
+#include "code_generator_x86.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_x86_64
+#include "code_generator_x86_64.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_mips
+#include "code_generator_mips.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_mips64
+#include "code_generator_mips64.h"
+#endif
+
#include "gtest/gtest.h"
namespace art {
+#ifdef ART_ENABLE_CODEGEN_arm
// Provide our own codegen, that ensures the C calling conventions
// are preserved. Currently, ART and C do not match as R4 is caller-save
// in ART, and callee-save in C. Alternatively, we could use or write
@@ -80,7 +99,9 @@ class TestCodeGeneratorARM : public arm::CodeGeneratorARM {
blocked_register_pairs_[arm::R6_R7] = false;
}
};
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86
class TestCodeGeneratorX86 : public x86::CodeGeneratorX86 {
public:
TestCodeGeneratorX86(HGraph* graph,
@@ -105,6 +126,7 @@ class TestCodeGeneratorX86 : public x86::CodeGeneratorX86 {
blocked_register_pairs_[x86::ECX_EDI] = false;
}
};
+#endif
class InternalCodeAllocator : public CodeAllocator {
public:
@@ -234,37 +256,54 @@ static void RunCode(InstructionSet target_isa,
bool has_result,
Expected expected) {
CompilerOptions compiler_options;
+#ifdef ART_ENABLE_CODEGEN_arm
if (target_isa == kArm || target_isa == kThumb2) {
std::unique_ptr<const ArmInstructionSetFeatures> features_arm(
ArmInstructionSetFeatures::FromCppDefines());
TestCodeGeneratorARM codegenARM(graph, *features_arm.get(), compiler_options);
RunCode(&codegenARM, graph, hook_before_codegen, has_result, expected);
- } else if (target_isa == kArm64) {
+ }
+#endif
+#ifdef ART_ENABLE_CODEGEN_arm64
+ if (target_isa == kArm64) {
std::unique_ptr<const Arm64InstructionSetFeatures> features_arm64(
Arm64InstructionSetFeatures::FromCppDefines());
arm64::CodeGeneratorARM64 codegenARM64(graph, *features_arm64.get(), compiler_options);
RunCode(&codegenARM64, graph, hook_before_codegen, has_result, expected);
- } else if (target_isa == kX86) {
+ }
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86
+ if (target_isa == kX86) {
std::unique_ptr<const X86InstructionSetFeatures> features_x86(
X86InstructionSetFeatures::FromCppDefines());
TestCodeGeneratorX86 codegenX86(graph, *features_x86.get(), compiler_options);
RunCode(&codegenX86, graph, hook_before_codegen, has_result, expected);
- } else if (target_isa == kX86_64) {
+ }
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86_64
+ if (target_isa == kX86_64) {
std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64(
X86_64InstructionSetFeatures::FromCppDefines());
x86_64::CodeGeneratorX86_64 codegenX86_64(graph, *features_x86_64.get(), compiler_options);
RunCode(&codegenX86_64, graph, hook_before_codegen, has_result, expected);
- } else if (target_isa == kMips) {
+ }
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips
+ if (target_isa == kMips) {
std::unique_ptr<const MipsInstructionSetFeatures> features_mips(
MipsInstructionSetFeatures::FromCppDefines());
mips::CodeGeneratorMIPS codegenMIPS(graph, *features_mips.get(), compiler_options);
RunCode(&codegenMIPS, graph, hook_before_codegen, has_result, expected);
- } else if (target_isa == kMips64) {
+ }
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips64
+ if (target_isa == kMips64) {
std::unique_ptr<const Mips64InstructionSetFeatures> features_mips64(
Mips64InstructionSetFeatures::FromCppDefines());
mips64::CodeGeneratorMIPS64 codegenMIPS64(graph, *features_mips64.get(), compiler_options);
RunCode(&codegenMIPS64, graph, hook_before_codegen, has_result, expected);
}
+#endif
}
static ::std::vector<InstructionSet> GetTargetISAs() {
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 27d9d48560..0bbc0e54bc 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -41,6 +41,92 @@ ArenaAllocator* IntrinsicCodeGeneratorARM::GetAllocator() {
using IntrinsicSlowPathARM = IntrinsicSlowPath<InvokeDexCallingConventionVisitorARM>;
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<ArmAssembler*>(codegen->GetAssembler())-> // NOLINT
+
+// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
+class ReadBarrierSystemArrayCopySlowPathARM : public SlowPathCode {
+ public:
+ explicit ReadBarrierSystemArrayCopySlowPathARM(HInstruction* instruction)
+ : SlowPathCode(instruction) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
+ LocationSummary* locations = instruction_->GetLocations();
+ DCHECK(locations->CanCall());
+ DCHECK(instruction_->IsInvokeStaticOrDirect())
+ << "Unexpected instruction in read barrier arraycopy slow path: "
+ << instruction_->DebugName();
+ DCHECK(instruction_->GetLocations()->Intrinsified());
+ DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
+
+ int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
+ uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot);
+ uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
+
+ Register dest = locations->InAt(2).AsRegister<Register>();
+ Location dest_pos = locations->InAt(3);
+ Register src_curr_addr = locations->GetTemp(0).AsRegister<Register>();
+ Register dst_curr_addr = locations->GetTemp(1).AsRegister<Register>();
+ Register src_stop_addr = locations->GetTemp(2).AsRegister<Register>();
+ Register tmp = locations->GetTemp(3).AsRegister<Register>();
+
+ __ Bind(GetEntryLabel());
+ // Compute the base destination address in `dst_curr_addr`.
+ if (dest_pos.IsConstant()) {
+ int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+ __ AddConstant(dst_curr_addr, dest, element_size * constant + offset);
+ } else {
+ __ add(dst_curr_addr,
+ dest,
+ ShifterOperand(dest_pos.AsRegister<Register>(), LSL, element_size_shift));
+ __ AddConstant(dst_curr_addr, offset);
+ }
+
+ Label loop;
+ __ Bind(&loop);
+ __ ldr(tmp, Address(src_curr_addr, element_size, Address::PostIndex));
+ __ MaybeUnpoisonHeapReference(tmp);
+ // TODO: Inline the mark bit check before calling the runtime?
+ // tmp = ReadBarrier::Mark(tmp);
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
+ // (See ReadBarrierMarkSlowPathARM::EmitNativeCode for more
+ // explanations.)
+ DCHECK_NE(tmp, SP);
+ DCHECK_NE(tmp, LR);
+ DCHECK_NE(tmp, PC);
+ // IP is used internally by the ReadBarrierMarkRegX entry point
+ // as a temporary (and not preserved). It thus cannot be used by
+ // any live register in this slow path.
+ DCHECK_NE(src_curr_addr, IP);
+ DCHECK_NE(dst_curr_addr, IP);
+ DCHECK_NE(src_stop_addr, IP);
+ DCHECK_NE(tmp, IP);
+ DCHECK(0 <= tmp && tmp < kNumberOfCoreRegisters) << tmp;
+ int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp);
+ // This runtime call does not require a stack map.
+ arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+ __ MaybePoisonHeapReference(tmp);
+ __ str(tmp, Address(dst_curr_addr, element_size, Address::PostIndex));
+ __ cmp(src_curr_addr, ShifterOperand(src_stop_addr));
+ __ b(&loop, NE);
+ __ b(GetExitLabel());
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathARM"; }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARM);
+};
+
+#undef __
+
bool IntrinsicLocationsBuilderARM::TryDispatch(HInvoke* invoke) {
Dispatch(invoke);
LocationSummary* res = invoke->GetLocations();
@@ -1337,9 +1423,9 @@ void IntrinsicCodeGeneratorARM::VisitStringNewStringFromString(HInvoke* invoke)
}
void IntrinsicLocationsBuilderARM::VisitSystemArrayCopy(HInvoke* invoke) {
- // TODO(rpl): Implement read barriers in the SystemArrayCopy
- // intrinsic and re-enable it (b/29516905).
- if (kEmitCompilerReadBarrier) {
+ // The only read barrier implementation supporting the
+ // SystemArrayCopy intrinsic is the Baker-style read barriers.
+ if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
return;
}
@@ -1362,6 +1448,13 @@ void IntrinsicLocationsBuilderARM::VisitSystemArrayCopy(HInvoke* invoke) {
if (length != nullptr && !assembler_->ShifterOperandCanAlwaysHold(length->GetValue())) {
locations->SetInAt(4, Location::RequiresRegister());
}
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // Temporary register IP cannot be used in
+ // ReadBarrierSystemArrayCopySlowPathARM64 (because that register
+ // is clobbered by ReadBarrierMarkRegX entry points). Get an extra
+ // temporary register from the register allocator.
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
static void CheckPosition(ArmAssembler* assembler,
@@ -1427,9 +1520,9 @@ static void CheckPosition(ArmAssembler* assembler,
}
void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
- // TODO(rpl): Implement read barriers in the SystemArrayCopy
- // intrinsic and re-enable it (b/29516905).
- DCHECK(!kEmitCompilerReadBarrier);
+ // The only read barrier implementation supporting the
+ // SystemArrayCopy intrinsic is the Baker-style read barriers.
+ DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
ArmAssembler* assembler = GetAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -1438,18 +1531,22 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
Register src = locations->InAt(0).AsRegister<Register>();
Location src_pos = locations->InAt(1);
Register dest = locations->InAt(2).AsRegister<Register>();
Location dest_pos = locations->InAt(3);
Location length = locations->InAt(4);
- Register temp1 = locations->GetTemp(0).AsRegister<Register>();
- Register temp2 = locations->GetTemp(1).AsRegister<Register>();
- Register temp3 = locations->GetTemp(2).AsRegister<Register>();
+ Location temp1_loc = locations->GetTemp(0);
+ Register temp1 = temp1_loc.AsRegister<Register>();
+ Location temp2_loc = locations->GetTemp(1);
+ Register temp2 = temp2_loc.AsRegister<Register>();
+ Location temp3_loc = locations->GetTemp(2);
+ Register temp3 = temp3_loc.AsRegister<Register>();
- SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
- codegen_->AddSlowPath(slow_path);
+ SlowPathCode* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
+ codegen_->AddSlowPath(intrinsic_slow_path);
Label conditions_on_positions_validated;
SystemArrayCopyOptimizations optimizations(invoke);
@@ -1465,7 +1562,7 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
DCHECK_GE(src_pos_constant, dest_pos_constant);
} else if (src_pos_constant < dest_pos_constant) {
__ cmp(src, ShifterOperand(dest));
- __ b(slow_path->GetEntryLabel(), EQ);
+ __ b(intrinsic_slow_path->GetEntryLabel(), EQ);
}
// Checked when building locations.
@@ -1477,7 +1574,7 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
__ b(&conditions_on_positions_validated, NE);
}
__ cmp(dest_pos.AsRegister<Register>(), ShifterOperand(src_pos_constant));
- __ b(slow_path->GetEntryLabel(), GT);
+ __ b(intrinsic_slow_path->GetEntryLabel(), GT);
}
} else {
if (!optimizations.GetDestinationIsSource()) {
@@ -1490,19 +1587,19 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
} else {
__ cmp(src_pos.AsRegister<Register>(), ShifterOperand(dest_pos.AsRegister<Register>()));
}
- __ b(slow_path->GetEntryLabel(), LT);
+ __ b(intrinsic_slow_path->GetEntryLabel(), LT);
}
__ Bind(&conditions_on_positions_validated);
if (!optimizations.GetSourceIsNotNull()) {
// Bail out if the source is null.
- __ CompareAndBranchIfZero(src, slow_path->GetEntryLabel());
+ __ CompareAndBranchIfZero(src, intrinsic_slow_path->GetEntryLabel());
}
if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
// Bail out if the destination is null.
- __ CompareAndBranchIfZero(dest, slow_path->GetEntryLabel());
+ __ CompareAndBranchIfZero(dest, intrinsic_slow_path->GetEntryLabel());
}
// If the length is negative, bail out.
@@ -1511,7 +1608,7 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
!optimizations.GetCountIsSourceLength() &&
!optimizations.GetCountIsDestinationLength()) {
__ cmp(length.AsRegister<Register>(), ShifterOperand(0));
- __ b(slow_path->GetEntryLabel(), LT);
+ __ b(intrinsic_slow_path->GetEntryLabel(), LT);
}
// Validity checks: source.
@@ -1519,7 +1616,7 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
src_pos,
src,
length,
- slow_path,
+ intrinsic_slow_path,
temp1,
optimizations.GetCountIsSourceLength());
@@ -1528,7 +1625,7 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
dest_pos,
dest,
length,
- slow_path,
+ intrinsic_slow_path,
temp1,
optimizations.GetCountIsDestinationLength());
@@ -1537,112 +1634,287 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
// type of the destination array. We do two checks: the classes are the same,
// or the destination is Object[]. If none of these checks succeed, we go to the
// slow path.
- __ LoadFromOffset(kLoadWord, temp1, dest, class_offset);
- __ LoadFromOffset(kLoadWord, temp2, src, class_offset);
- bool did_unpoison = false;
- if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
- !optimizations.GetSourceIsNonPrimitiveArray()) {
- // One or two of the references need to be unpoisoned. Unpoison them
- // both to make the identity check valid.
- __ MaybeUnpoisonHeapReference(temp1);
- __ MaybeUnpoisonHeapReference(temp2);
- did_unpoison = true;
- }
- if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
- // Bail out if the destination is not a non primitive array.
- // /* HeapReference<Class> */ temp3 = temp1->component_type_
- __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset);
- __ CompareAndBranchIfZero(temp3, slow_path->GetEntryLabel());
- __ MaybeUnpoisonHeapReference(temp3);
- __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset);
- static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
- __ CompareAndBranchIfNonZero(temp3, slow_path->GetEntryLabel());
- }
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
+ // Bail out if the source is not a non primitive array.
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
+ __ CompareAndBranchIfZero(temp1, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `temp1` has been unpoisoned
+ // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_);
+ __ LoadFromOffset(kLoadUnsignedHalfword, temp1, temp1, primitive_offset);
+ static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+ __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
+ }
- if (!optimizations.GetSourceIsNonPrimitiveArray()) {
- // Bail out if the source is not a non primitive array.
- // /* HeapReference<Class> */ temp3 = temp2->component_type_
- __ LoadFromOffset(kLoadWord, temp3, temp2, component_offset);
- __ CompareAndBranchIfZero(temp3, slow_path->GetEntryLabel());
- __ MaybeUnpoisonHeapReference(temp3);
- __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset);
- static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
- __ CompareAndBranchIfNonZero(temp3, slow_path->GetEntryLabel());
- }
+ // /* HeapReference<Class> */ temp1 = dest->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, dest, class_offset, temp2_loc, /* needs_null_check */ false);
+
+ if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+ // Bail out if the destination is not a non primitive array.
+ //
+ // Register `temp1` is not trashed by the read barrier emitted
+ // by GenerateFieldLoadWithBakerReadBarrier below, as that
+ // method produces a call to a ReadBarrierMarkRegX entry point,
+ // which saves all potentially live registers, including
+ // temporaries such a `temp1`.
+ // /* HeapReference<Class> */ temp2 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp2_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false);
+ __ CompareAndBranchIfZero(temp2, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `temp2` has been unpoisoned
+ // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
+ __ LoadFromOffset(kLoadUnsignedHalfword, temp2, temp2, primitive_offset);
+ static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+ __ CompareAndBranchIfNonZero(temp2, intrinsic_slow_path->GetEntryLabel());
+ }
+
+ // For the same reason given earlier, `temp1` is not trashed by the
+ // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
+ // /* HeapReference<Class> */ temp2 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check */ false);
+ // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
+ __ cmp(temp1, ShifterOperand(temp2));
+
+ if (optimizations.GetDestinationIsTypedObjectArray()) {
+ Label do_copy;
+ __ b(&do_copy, EQ);
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
+ // /* HeapReference<Class> */ temp1 = temp1->super_class_
+ // We do not need to emit a read barrier for the following
+ // heap reference load, as `temp1` is only used in a
+ // comparison with null below, and this reference is not
+ // kept afterwards.
+ __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
+ __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
+ __ Bind(&do_copy);
+ } else {
+ __ b(intrinsic_slow_path->GetEntryLabel(), NE);
+ }
+ } else {
+ // Non read barrier code.
+
+ // /* HeapReference<Class> */ temp1 = dest->klass_
+ __ LoadFromOffset(kLoadWord, temp1, dest, class_offset);
+ // /* HeapReference<Class> */ temp2 = src->klass_
+ __ LoadFromOffset(kLoadWord, temp2, src, class_offset);
+ bool did_unpoison = false;
+ if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
+ !optimizations.GetSourceIsNonPrimitiveArray()) {
+ // One or two of the references need to be unpoisoned. Unpoison them
+ // both to make the identity check valid.
+ __ MaybeUnpoisonHeapReference(temp1);
+ __ MaybeUnpoisonHeapReference(temp2);
+ did_unpoison = true;
+ }
- __ cmp(temp1, ShifterOperand(temp2));
+ if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+ // Bail out if the destination is not a non primitive array.
+ // /* HeapReference<Class> */ temp3 = temp1->component_type_
+ __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset);
+ __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
+ __ MaybeUnpoisonHeapReference(temp3);
+ // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
+ __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset);
+ static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+ __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
+ }
+
+ if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+ // Bail out if the source is not a non primitive array.
+ // /* HeapReference<Class> */ temp3 = temp2->component_type_
+ __ LoadFromOffset(kLoadWord, temp3, temp2, component_offset);
+ __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
+ __ MaybeUnpoisonHeapReference(temp3);
+ // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
+ __ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset);
+ static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+ __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
+ }
- if (optimizations.GetDestinationIsTypedObjectArray()) {
- Label do_copy;
- __ b(&do_copy, EQ);
- if (!did_unpoison) {
+ __ cmp(temp1, ShifterOperand(temp2));
+
+ if (optimizations.GetDestinationIsTypedObjectArray()) {
+ Label do_copy;
+ __ b(&do_copy, EQ);
+ if (!did_unpoison) {
+ __ MaybeUnpoisonHeapReference(temp1);
+ }
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
__ MaybeUnpoisonHeapReference(temp1);
+ // /* HeapReference<Class> */ temp1 = temp1->super_class_
+ __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
+ // No need to unpoison the result, we're comparing against null.
+ __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
+ __ Bind(&do_copy);
+ } else {
+ __ b(intrinsic_slow_path->GetEntryLabel(), NE);
}
- // /* HeapReference<Class> */ temp1 = temp1->component_type_
- __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
- __ MaybeUnpoisonHeapReference(temp1);
- // /* HeapReference<Class> */ temp1 = temp1->super_class_
- __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
- // No need to unpoison the result, we're comparing against null.
- __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel());
- __ Bind(&do_copy);
- } else {
- __ b(slow_path->GetEntryLabel(), NE);
}
} else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
// Bail out if the source is not a non primitive array.
- // /* HeapReference<Class> */ temp1 = src->klass_
- __ LoadFromOffset(kLoadWord, temp1, src, class_offset);
- __ MaybeUnpoisonHeapReference(temp1);
- // /* HeapReference<Class> */ temp3 = temp1->component_type_
- __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset);
- __ CompareAndBranchIfZero(temp3, slow_path->GetEntryLabel());
- __ MaybeUnpoisonHeapReference(temp3);
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
+ // /* HeapReference<Class> */ temp3 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp3_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
+ __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `temp3` has been unpoisoned
+ // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ } else {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ __ LoadFromOffset(kLoadWord, temp1, src, class_offset);
+ __ MaybeUnpoisonHeapReference(temp1);
+ // /* HeapReference<Class> */ temp3 = temp1->component_type_
+ __ LoadFromOffset(kLoadWord, temp3, temp1, component_offset);
+ __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
+ __ MaybeUnpoisonHeapReference(temp3);
+ }
+ // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
__ LoadFromOffset(kLoadUnsignedHalfword, temp3, temp3, primitive_offset);
static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
- __ CompareAndBranchIfNonZero(temp3, slow_path->GetEntryLabel());
+ __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
}
- // Compute base source address, base destination address, and end source address.
-
int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
+ uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot);
uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
+
+ // Compute the base source address in `temp1`.
if (src_pos.IsConstant()) {
int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
__ AddConstant(temp1, src, element_size * constant + offset);
} else {
- __ add(temp1, src, ShifterOperand(src_pos.AsRegister<Register>(), LSL, 2));
+ __ add(temp1, src, ShifterOperand(src_pos.AsRegister<Register>(), LSL, element_size_shift));
__ AddConstant(temp1, offset);
}
- if (dest_pos.IsConstant()) {
- int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
- __ AddConstant(temp2, dest, element_size * constant + offset);
- } else {
- __ add(temp2, dest, ShifterOperand(dest_pos.AsRegister<Register>(), LSL, 2));
- __ AddConstant(temp2, offset);
- }
-
+ // Compute the end source address in `temp3`.
if (length.IsConstant()) {
int32_t constant = length.GetConstant()->AsIntConstant()->GetValue();
__ AddConstant(temp3, temp1, element_size * constant);
} else {
- __ add(temp3, temp1, ShifterOperand(length.AsRegister<Register>(), LSL, 2));
+ __ add(temp3, temp1, ShifterOperand(length.AsRegister<Register>(), LSL, element_size_shift));
}
- // Iterate over the arrays and do a raw copy of the objects. We don't need to
- // poison/unpoison.
- Label loop, done;
- __ cmp(temp1, ShifterOperand(temp3));
- __ b(&done, EQ);
- __ Bind(&loop);
- __ ldr(IP, Address(temp1, element_size, Address::PostIndex));
- __ str(IP, Address(temp2, element_size, Address::PostIndex));
- __ cmp(temp1, ShifterOperand(temp3));
- __ b(&loop, NE);
- __ Bind(&done);
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // The base destination address is computed later, as `temp2` is
+ // used for intermediate computations.
+
+ // SystemArrayCopy implementation for Baker read barriers (see
+ // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier):
+ //
+ // if (src_ptr != end_ptr) {
+ // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+ // if (is_gray) {
+ // // Slow-path copy.
+ // do {
+ // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
+ // } while (src_ptr != end_ptr)
+ // } else {
+ // // Fast-path copy.
+ // do {
+ // *dest_ptr++ = *src_ptr++;
+ // } while (src_ptr != end_ptr)
+ // }
+ // }
+
+ Label loop, done;
+
+ // Don't enter copy loop if `length == 0`.
+ __ cmp(temp1, ShifterOperand(temp3));
+ __ b(&done, EQ);
+
+ // /* int32_t */ monitor = src->monitor_
+ __ LoadFromOffset(kLoadWord, temp2, src, monitor_offset);
+ // /* LockWord */ lock_word = LockWord(monitor)
+ static_assert(sizeof(LockWord) == sizeof(int32_t),
+ "art::LockWord and int32_t have different sizes.");
+
+ // Introduce a dependency on the lock_word including the rb_state,
+ // which shall prevent load-load reordering without using
+ // a memory barrier (which would be more expensive).
+ // `src` is unchanged by this operation, but its value now depends
+ // on `temp2`.
+ __ add(src, src, ShifterOperand(temp2, LSR, 32));
+
+ // Slow path used to copy array when `src` is gray.
+ SlowPathCode* read_barrier_slow_path =
+ new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARM(invoke);
+ codegen_->AddSlowPath(read_barrier_slow_path);
+
+ // Given the numeric representation, it's enough to check the low bit of the
+ // rb_state. We do that by shifting the bit out of the lock word with LSRS
+ // which can be a 16-bit instruction unlike the TST immediate.
+ static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+ static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+ __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1);
+ // Carry flag is the last bit shifted out by LSRS.
+ __ b(read_barrier_slow_path->GetEntryLabel(), CS);
+
+ // Fast-path copy.
+
+ // Compute the base destination address in `temp2`.
+ if (dest_pos.IsConstant()) {
+ int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+ __ AddConstant(temp2, dest, element_size * constant + offset);
+ } else {
+ __ add(temp2, dest, ShifterOperand(dest_pos.AsRegister<Register>(), LSL, element_size_shift));
+ __ AddConstant(temp2, offset);
+ }
+
+ // Iterate over the arrays and do a raw copy of the objects. We don't need to
+ // poison/unpoison.
+ __ Bind(&loop);
+ __ ldr(IP, Address(temp1, element_size, Address::PostIndex));
+ __ str(IP, Address(temp2, element_size, Address::PostIndex));
+ __ cmp(temp1, ShifterOperand(temp3));
+ __ b(&loop, NE);
+
+ __ Bind(read_barrier_slow_path->GetExitLabel());
+ __ Bind(&done);
+ } else {
+ // Non read barrier code.
+
+ // Compute the base destination address in `temp2`.
+ if (dest_pos.IsConstant()) {
+ int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+ __ AddConstant(temp2, dest, element_size * constant + offset);
+ } else {
+ __ add(temp2, dest, ShifterOperand(dest_pos.AsRegister<Register>(), LSL, element_size_shift));
+ __ AddConstant(temp2, offset);
+ }
+
+ // Iterate over the arrays and do a raw copy of the objects. We don't need to
+ // poison/unpoison.
+ Label loop, done;
+ __ cmp(temp1, ShifterOperand(temp3));
+ __ b(&done, EQ);
+ __ Bind(&loop);
+ __ ldr(IP, Address(temp1, element_size, Address::PostIndex));
+ __ str(IP, Address(temp2, element_size, Address::PostIndex));
+ __ cmp(temp1, ShifterOperand(temp3));
+ __ b(&loop, NE);
+ __ Bind(&done);
+ }
// We only need one card marking on the destination array.
codegen_->MarkGCCard(temp1,
@@ -1651,7 +1923,7 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
Register(kNoRegister),
/* value_can_be_null */ false);
- __ Bind(slow_path->GetExitLabel());
+ __ Bind(intrinsic_slow_path->GetExitLabel());
}
static void CreateFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 9cfe3ce569..91374b3108 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -144,6 +144,73 @@ class IntrinsicSlowPathARM64 : public SlowPathCodeARM64 {
DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathARM64);
};
+// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
+class ReadBarrierSystemArrayCopySlowPathARM64 : public SlowPathCodeARM64 {
+ public:
+ ReadBarrierSystemArrayCopySlowPathARM64(HInstruction* instruction, Location tmp)
+ : SlowPathCodeARM64(instruction), tmp_(tmp) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
+ CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in);
+ LocationSummary* locations = instruction_->GetLocations();
+ DCHECK(locations->CanCall());
+ DCHECK(instruction_->IsInvokeStaticOrDirect())
+ << "Unexpected instruction in read barrier arraycopy slow path: "
+ << instruction_->DebugName();
+ DCHECK(instruction_->GetLocations()->Intrinsified());
+ DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
+
+ const int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
+
+ Register src_curr_addr = XRegisterFrom(locations->GetTemp(0));
+ Register dst_curr_addr = XRegisterFrom(locations->GetTemp(1));
+ Register src_stop_addr = XRegisterFrom(locations->GetTemp(2));
+ Register tmp_reg = WRegisterFrom(tmp_);
+
+ __ Bind(GetEntryLabel());
+ vixl::aarch64::Label slow_copy_loop;
+ __ Bind(&slow_copy_loop);
+ __ Ldr(tmp_reg, MemOperand(src_curr_addr, element_size, PostIndex));
+ codegen->GetAssembler()->MaybeUnpoisonHeapReference(tmp_reg);
+ // TODO: Inline the mark bit check before calling the runtime?
+ // tmp_reg = ReadBarrier::Mark(tmp_reg);
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
+ // (See ReadBarrierMarkSlowPathARM64::EmitNativeCode for more
+ // explanations.)
+ DCHECK_NE(tmp_.reg(), LR);
+ DCHECK_NE(tmp_.reg(), WSP);
+ DCHECK_NE(tmp_.reg(), WZR);
+ // IP0 is used internally by the ReadBarrierMarkRegX entry point
+ // as a temporary (and not preserved). It thus cannot be used by
+ // any live register in this slow path.
+ DCHECK_NE(LocationFrom(src_curr_addr).reg(), IP0);
+ DCHECK_NE(LocationFrom(dst_curr_addr).reg(), IP0);
+ DCHECK_NE(LocationFrom(src_stop_addr).reg(), IP0);
+ DCHECK_NE(tmp_.reg(), IP0);
+ DCHECK(0 <= tmp_.reg() && tmp_.reg() < kNumberOfWRegisters) << tmp_.reg();
+ int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(tmp_.reg());
+ // This runtime call does not require a stack map.
+ codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+ codegen->GetAssembler()->MaybePoisonHeapReference(tmp_reg);
+ __ Str(tmp_reg, MemOperand(dst_curr_addr, element_size, PostIndex));
+ __ Cmp(src_curr_addr, src_stop_addr);
+ __ B(&slow_copy_loop, ne);
+ __ B(GetExitLabel());
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathARM64"; }
+
+ private:
+ Location tmp_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARM64);
+};
#undef __
bool IntrinsicLocationsBuilderARM64::TryDispatch(HInvoke* invoke) {
@@ -2035,9 +2102,9 @@ static constexpr int32_t kSystemArrayCopyThreshold = 128;
// We want to use two temporary registers in order to reduce the register pressure in arm64.
// So we don't use the CodeGenerator::CreateSystemArrayCopyLocationSummary.
void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) {
- // TODO(rpl): Implement read barriers in the SystemArrayCopy
- // intrinsic and re-enable it (b/29516905).
- if (kEmitCompilerReadBarrier) {
+ // The only read barrier implementation supporting the
+ // SystemArrayCopy intrinsic is the Baker-style read barriers.
+ if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
return;
}
@@ -2090,12 +2157,20 @@ void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) {
locations->AddTemp(Location::RequiresRegister());
locations->AddTemp(Location::RequiresRegister());
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // Temporary register IP0, obtained from the VIXL scratch register
+ // pool, cannot be used in ReadBarrierSystemArrayCopySlowPathARM64
+ // (because that register is clobbered by ReadBarrierMarkRegX
+ // entry points). Get an extra temporary register from the
+ // register allocator.
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
- // TODO(rpl): Implement read barriers in the SystemArrayCopy
- // intrinsic and re-enable it (b/29516905).
- DCHECK(!kEmitCompilerReadBarrier);
+ // The only read barrier implementation supporting the
+ // SystemArrayCopy intrinsic is the Baker-style read barriers.
+ DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
MacroAssembler* masm = GetVIXLAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -2104,6 +2179,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
Register src = XRegisterFrom(locations->InAt(0));
Location src_pos = locations->InAt(1);
@@ -2111,10 +2187,12 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
Location dest_pos = locations->InAt(3);
Location length = locations->InAt(4);
Register temp1 = WRegisterFrom(locations->GetTemp(0));
+ Location temp1_loc = LocationFrom(temp1);
Register temp2 = WRegisterFrom(locations->GetTemp(1));
+ Location temp2_loc = LocationFrom(temp2);
- SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
- codegen_->AddSlowPath(slow_path);
+ SlowPathCodeARM64* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
+ codegen_->AddSlowPath(intrinsic_slow_path);
vixl::aarch64::Label conditions_on_positions_validated;
SystemArrayCopyOptimizations optimizations(invoke);
@@ -2130,7 +2208,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
DCHECK_GE(src_pos_constant, dest_pos_constant);
} else if (src_pos_constant < dest_pos_constant) {
__ Cmp(src, dest);
- __ B(slow_path->GetEntryLabel(), eq);
+ __ B(intrinsic_slow_path->GetEntryLabel(), eq);
}
// Checked when building locations.
DCHECK(!optimizations.GetDestinationIsSource()
@@ -2141,7 +2219,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
__ B(&conditions_on_positions_validated, ne);
}
__ Cmp(WRegisterFrom(dest_pos), src_pos_constant);
- __ B(slow_path->GetEntryLabel(), gt);
+ __ B(intrinsic_slow_path->GetEntryLabel(), gt);
}
} else {
if (!optimizations.GetDestinationIsSource()) {
@@ -2150,19 +2228,19 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
}
__ Cmp(RegisterFrom(src_pos, invoke->InputAt(1)->GetType()),
OperandFrom(dest_pos, invoke->InputAt(3)->GetType()));
- __ B(slow_path->GetEntryLabel(), lt);
+ __ B(intrinsic_slow_path->GetEntryLabel(), lt);
}
__ Bind(&conditions_on_positions_validated);
if (!optimizations.GetSourceIsNotNull()) {
// Bail out if the source is null.
- __ Cbz(src, slow_path->GetEntryLabel());
+ __ Cbz(src, intrinsic_slow_path->GetEntryLabel());
}
if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
// Bail out if the destination is null.
- __ Cbz(dest, slow_path->GetEntryLabel());
+ __ Cbz(dest, intrinsic_slow_path->GetEntryLabel());
}
// We have already checked in the LocationsBuilder for the constant case.
@@ -2170,17 +2248,17 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
!optimizations.GetCountIsSourceLength() &&
!optimizations.GetCountIsDestinationLength()) {
// If the length is negative, bail out.
- __ Tbnz(WRegisterFrom(length), kWRegSize - 1, slow_path->GetEntryLabel());
+ __ Tbnz(WRegisterFrom(length), kWRegSize - 1, intrinsic_slow_path->GetEntryLabel());
// If the length >= 128 then (currently) prefer native implementation.
__ Cmp(WRegisterFrom(length), kSystemArrayCopyThreshold);
- __ B(slow_path->GetEntryLabel(), ge);
+ __ B(intrinsic_slow_path->GetEntryLabel(), ge);
}
// Validity checks: source.
CheckSystemArrayCopyPosition(masm,
src_pos,
src,
length,
- slow_path,
+ intrinsic_slow_path,
temp1,
optimizations.GetCountIsSourceLength());
@@ -2189,90 +2267,236 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
dest_pos,
dest,
length,
- slow_path,
+ intrinsic_slow_path,
temp1,
optimizations.GetCountIsDestinationLength());
{
// We use a block to end the scratch scope before the write barrier, thus
// freeing the temporary registers so they can be used in `MarkGCCard`.
UseScratchRegisterScope temps(masm);
+ // Note: Because it is acquired from VIXL's scratch register pool,
+ // `temp3` might be IP0, and thus cannot be used as `ref` argument
+ // of CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier
+ // calls below (see ReadBarrierMarkSlowPathARM64 for more details).
Register temp3 = temps.AcquireW();
+
if (!optimizations.GetDoesNotNeedTypeCheck()) {
// Check whether all elements of the source array are assignable to the component
// type of the destination array. We do two checks: the classes are the same,
// or the destination is Object[]. If none of these checks succeed, we go to the
// slow path.
- __ Ldr(temp1, MemOperand(dest, class_offset));
- __ Ldr(temp2, MemOperand(src, class_offset));
- bool did_unpoison = false;
- if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
- !optimizations.GetSourceIsNonPrimitiveArray()) {
- // One or two of the references need to be unpoisoned. Unpoison them
- // both to make the identity check valid.
- codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
- codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
- did_unpoison = true;
- }
- if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
- // Bail out if the destination is not a non primitive array.
- // /* HeapReference<Class> */ temp3 = temp1->component_type_
- __ Ldr(temp3, HeapOperand(temp1, component_offset));
- __ Cbz(temp3, slow_path->GetEntryLabel());
- codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
- __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
- static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
- __ Cbnz(temp3, slow_path->GetEntryLabel());
- }
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+ temp1_loc,
+ src.W(),
+ class_offset,
+ temp2,
+ /* needs_null_check */ false,
+ /* use_load_acquire */ false);
+ // Bail out if the source is not a non primitive array.
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+ temp1_loc,
+ temp1,
+ component_offset,
+ temp2,
+ /* needs_null_check */ false,
+ /* use_load_acquire */ false);
+ __ Cbz(temp1, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `temp1` has been unpoisoned
+ // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_);
+ __ Ldrh(temp1, HeapOperand(temp1, primitive_offset));
+ static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+ __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
+ }
- if (!optimizations.GetSourceIsNonPrimitiveArray()) {
- // Bail out if the source is not a non primitive array.
- // /* HeapReference<Class> */ temp3 = temp2->component_type_
- __ Ldr(temp3, HeapOperand(temp2, component_offset));
- __ Cbz(temp3, slow_path->GetEntryLabel());
- codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
- __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
- static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
- __ Cbnz(temp3, slow_path->GetEntryLabel());
- }
+ // /* HeapReference<Class> */ temp1 = dest->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+ temp1_loc,
+ dest.W(),
+ class_offset,
+ temp2,
+ /* needs_null_check */ false,
+ /* use_load_acquire */ false);
+
+ if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+ // Bail out if the destination is not a non primitive array.
+ //
+ // Register `temp1` is not trashed by the read barrier emitted
+ // by GenerateFieldLoadWithBakerReadBarrier below, as that
+ // method produces a call to a ReadBarrierMarkRegX entry point,
+ // which saves all potentially live registers, including
+ // temporaries such a `temp1`.
+ // /* HeapReference<Class> */ temp2 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+ temp2_loc,
+ temp1,
+ component_offset,
+ temp3,
+ /* needs_null_check */ false,
+ /* use_load_acquire */ false);
+ __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `temp2` has been unpoisoned
+ // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
+ __ Ldrh(temp2, HeapOperand(temp2, primitive_offset));
+ static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+ __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel());
+ }
- __ Cmp(temp1, temp2);
+ // For the same reason given earlier, `temp1` is not trashed by the
+ // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
+ // /* HeapReference<Class> */ temp2 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+ temp2_loc,
+ src.W(),
+ class_offset,
+ temp3,
+ /* needs_null_check */ false,
+ /* use_load_acquire */ false);
+ // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
+ __ Cmp(temp1, temp2);
+
+ if (optimizations.GetDestinationIsTypedObjectArray()) {
+ vixl::aarch64::Label do_copy;
+ __ B(&do_copy, eq);
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+ temp1_loc,
+ temp1,
+ component_offset,
+ temp2,
+ /* needs_null_check */ false,
+ /* use_load_acquire */ false);
+ // /* HeapReference<Class> */ temp1 = temp1->super_class_
+ // We do not need to emit a read barrier for the following
+ // heap reference load, as `temp1` is only used in a
+ // comparison with null below, and this reference is not
+ // kept afterwards.
+ __ Ldr(temp1, HeapOperand(temp1, super_offset));
+ __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
+ __ Bind(&do_copy);
+ } else {
+ __ B(intrinsic_slow_path->GetEntryLabel(), ne);
+ }
+ } else {
+ // Non read barrier code.
+
+ // /* HeapReference<Class> */ temp1 = dest->klass_
+ __ Ldr(temp1, MemOperand(dest, class_offset));
+ // /* HeapReference<Class> */ temp2 = src->klass_
+ __ Ldr(temp2, MemOperand(src, class_offset));
+ bool did_unpoison = false;
+ if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
+ !optimizations.GetSourceIsNonPrimitiveArray()) {
+ // One or two of the references need to be unpoisoned. Unpoison them
+ // both to make the identity check valid.
+ codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
+ codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
+ did_unpoison = true;
+ }
+
+ if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+ // Bail out if the destination is not a non primitive array.
+ // /* HeapReference<Class> */ temp3 = temp1->component_type_
+ __ Ldr(temp3, HeapOperand(temp1, component_offset));
+ __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
+ codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
+ // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
+ __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
+ static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+ __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
+ }
- if (optimizations.GetDestinationIsTypedObjectArray()) {
- vixl::aarch64::Label do_copy;
- __ B(&do_copy, eq);
- if (!did_unpoison) {
+ if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+ // Bail out if the source is not a non primitive array.
+ // /* HeapReference<Class> */ temp3 = temp2->component_type_
+ __ Ldr(temp3, HeapOperand(temp2, component_offset));
+ __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
+ codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
+ // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
+ __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
+ static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+ __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
+ }
+
+ __ Cmp(temp1, temp2);
+
+ if (optimizations.GetDestinationIsTypedObjectArray()) {
+ vixl::aarch64::Label do_copy;
+ __ B(&do_copy, eq);
+ if (!did_unpoison) {
+ codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
+ }
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ __ Ldr(temp1, HeapOperand(temp1, component_offset));
codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
+ // /* HeapReference<Class> */ temp1 = temp1->super_class_
+ __ Ldr(temp1, HeapOperand(temp1, super_offset));
+ // No need to unpoison the result, we're comparing against null.
+ __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
+ __ Bind(&do_copy);
+ } else {
+ __ B(intrinsic_slow_path->GetEntryLabel(), ne);
}
- // /* HeapReference<Class> */ temp1 = temp1->component_type_
- __ Ldr(temp1, HeapOperand(temp1, component_offset));
- codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
- // /* HeapReference<Class> */ temp1 = temp1->super_class_
- __ Ldr(temp1, HeapOperand(temp1, super_offset));
- // No need to unpoison the result, we're comparing against null.
- __ Cbnz(temp1, slow_path->GetEntryLabel());
- __ Bind(&do_copy);
- } else {
- __ B(slow_path->GetEntryLabel(), ne);
}
} else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
// Bail out if the source is not a non primitive array.
- // /* HeapReference<Class> */ temp1 = src->klass_
- __ Ldr(temp1, HeapOperand(src.W(), class_offset));
- codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
- // /* HeapReference<Class> */ temp3 = temp1->component_type_
- __ Ldr(temp3, HeapOperand(temp1, component_offset));
- __ Cbz(temp3, slow_path->GetEntryLabel());
- codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3);
- __ Ldrh(temp3, HeapOperand(temp3, primitive_offset));
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+ temp1_loc,
+ src.W(),
+ class_offset,
+ temp2,
+ /* needs_null_check */ false,
+ /* use_load_acquire */ false);
+ // /* HeapReference<Class> */ temp2 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
+ temp2_loc,
+ temp1,
+ component_offset,
+ temp3,
+ /* needs_null_check */ false,
+ /* use_load_acquire */ false);
+ __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `temp2` has been unpoisoned
+ // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ } else {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ __ Ldr(temp1, HeapOperand(src.W(), class_offset));
+ codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1);
+ // /* HeapReference<Class> */ temp2 = temp1->component_type_
+ __ Ldr(temp2, HeapOperand(temp1, component_offset));
+ __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
+ codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
+ }
+ // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
+ __ Ldrh(temp2, HeapOperand(temp2, primitive_offset));
static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
- __ Cbnz(temp3, slow_path->GetEntryLabel());
+ __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel());
}
Register src_curr_addr = temp1.X();
Register dst_curr_addr = temp2.X();
- Register src_stop_addr = temp3.X();
+ Register src_stop_addr;
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // Temporary register IP0, obtained from the VIXL scratch
+ // register pool as `temp3`, cannot be used in
+ // ReadBarrierSystemArrayCopySlowPathARM64 (because that
+ // register is clobbered by ReadBarrierMarkRegX entry points).
+ // So another temporary register allocated by the register
+ // allocator instead.
+ DCHECK_EQ(LocationFrom(temp3).reg(), IP0);
+ src_stop_addr = XRegisterFrom(locations->GetTemp(2));
+ } else {
+ src_stop_addr = temp3.X();
+ }
GenSystemArrayCopyAddresses(masm,
Primitive::kPrimNot,
@@ -2285,25 +2509,98 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
dst_curr_addr,
src_stop_addr);
- // Iterate over the arrays and do a raw copy of the objects. We don't need to
- // poison/unpoison.
- vixl::aarch64::Label loop, done;
const int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
- __ Bind(&loop);
- __ Cmp(src_curr_addr, src_stop_addr);
- __ B(&done, eq);
- {
+
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // SystemArrayCopy implementation for Baker read barriers (see
+ // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier):
+ //
+ // if (src_ptr != end_ptr) {
+ // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+ // if (is_gray) {
+ // // Slow-path copy.
+ // do {
+ // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
+ // } while (src_ptr != end_ptr)
+ // } else {
+ // // Fast-path copy.
+ // do {
+ // *dest_ptr++ = *src_ptr++;
+ // } while (src_ptr != end_ptr)
+ // }
+ // }
+
+ vixl::aarch64::Label loop, done;
+
+ // Don't enter copy loop if `length == 0`.
+ __ Cmp(src_curr_addr, src_stop_addr);
+ __ B(&done, eq);
+
Register tmp = temps.AcquireW();
+ // Make sure `tmp` is not IP0, as it is clobbered by
+ // ReadBarrierMarkRegX entry points in
+ // ReadBarrierSystemArrayCopySlowPathARM64.
+ DCHECK_NE(LocationFrom(tmp).reg(), IP0);
+
+ // /* int32_t */ monitor = src->monitor_
+ __ Ldr(tmp, HeapOperand(src.W(), monitor_offset));
+ // /* LockWord */ lock_word = LockWord(monitor)
+ static_assert(sizeof(LockWord) == sizeof(int32_t),
+ "art::LockWord and int32_t have different sizes.");
+
+ // Introduce a dependency on the lock_word including rb_state,
+ // to prevent load-load reordering, and without using
+ // a memory barrier (which would be more expensive).
+ // `src` is unchanged by this operation, but its value now depends
+ // on `tmp`.
+ __ Add(src.X(), src.X(), Operand(tmp.X(), LSR, 32));
+
+ // Slow path used to copy array when `src` is gray.
+ SlowPathCodeARM64* read_barrier_slow_path =
+ new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARM64(invoke, LocationFrom(tmp));
+ codegen_->AddSlowPath(read_barrier_slow_path);
+
+ // Given the numeric representation, it's enough to check the low bit of the rb_state.
+ static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+ static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+ __ Tbnz(tmp, LockWord::kReadBarrierStateShift, read_barrier_slow_path->GetEntryLabel());
+
+ // Fast-path copy.
+ // Iterate over the arrays and do a raw copy of the objects. We don't need to
+ // poison/unpoison.
+ __ Bind(&loop);
__ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
__ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
+ __ Cmp(src_curr_addr, src_stop_addr);
+ __ B(&loop, ne);
+
+ __ Bind(read_barrier_slow_path->GetExitLabel());
+ __ Bind(&done);
+ } else {
+ // Non read barrier code.
+
+ // Iterate over the arrays and do a raw copy of the objects. We don't need to
+ // poison/unpoison.
+ vixl::aarch64::Label loop, done;
+ __ Bind(&loop);
+ __ Cmp(src_curr_addr, src_stop_addr);
+ __ B(&done, eq);
+ {
+ Register tmp = temps.AcquireW();
+ __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
+ __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
+ }
+ __ B(&loop);
+ __ Bind(&done);
}
- __ B(&loop);
- __ Bind(&done);
}
// We only need one card marking on the destination array.
codegen_->MarkGCCard(dest.W(), Register(), /* value_can_be_null */ false);
- __ Bind(slow_path->GetExitLabel());
+ __ Bind(intrinsic_slow_path->GetExitLabel());
}
static void GenIsInfinite(LocationSummary* locations,
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 22f4181b92..49d6c1952c 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -70,6 +70,105 @@ static void MoveArguments(HInvoke* invoke, CodeGeneratorX86* codegen) {
using IntrinsicSlowPathX86 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86>;
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<X86Assembler*>(codegen->GetAssembler())-> // NOLINT
+
+// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
+class ReadBarrierSystemArrayCopySlowPathX86 : public SlowPathCode {
+ public:
+ explicit ReadBarrierSystemArrayCopySlowPathX86(HInstruction* instruction)
+ : SlowPathCode(instruction) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
+ LocationSummary* locations = instruction_->GetLocations();
+ DCHECK(locations->CanCall());
+ DCHECK(instruction_->IsInvokeStaticOrDirect())
+ << "Unexpected instruction in read barrier arraycopy slow path: "
+ << instruction_->DebugName();
+ DCHECK(instruction_->GetLocations()->Intrinsified());
+ DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
+
+ int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
+ uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
+
+ Register src = locations->InAt(0).AsRegister<Register>();
+ Location src_pos = locations->InAt(1);
+ Register dest = locations->InAt(2).AsRegister<Register>();
+ Location dest_pos = locations->InAt(3);
+ Location length = locations->InAt(4);
+ Location temp1_loc = locations->GetTemp(0);
+ Register temp1 = temp1_loc.AsRegister<Register>();
+ Register temp2 = locations->GetTemp(1).AsRegister<Register>();
+ Register temp3 = locations->GetTemp(2).AsRegister<Register>();
+
+ __ Bind(GetEntryLabel());
+ // In this code path, registers `temp1`, `temp2`, and `temp3`
+ // (resp.) are not used for the base source address, the base
+ // destination address, and the end source address (resp.), as in
+ // other SystemArrayCopy intrinsic code paths. Instead they are
+ // (resp.) used for:
+ // - the loop index (`i`);
+ // - the source index (`src_index`) and the loaded (source)
+ // reference (`value`); and
+ // - the destination index (`dest_index`).
+
+ // i = 0
+ __ xorl(temp1, temp1);
+ NearLabel loop;
+ __ Bind(&loop);
+ // value = src_array[i + src_pos]
+ if (src_pos.IsConstant()) {
+ int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
+ int32_t adjusted_offset = offset + constant * element_size;
+ __ movl(temp2, Address(src, temp1, ScaleFactor::TIMES_4, adjusted_offset));
+ } else {
+ __ leal(temp2, Address(src_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0));
+ __ movl(temp2, Address(src, temp2, ScaleFactor::TIMES_4, offset));
+ }
+ __ MaybeUnpoisonHeapReference(temp2);
+ // TODO: Inline the mark bit check before calling the runtime?
+ // value = ReadBarrier::Mark(value)
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
+ // (See ReadBarrierMarkSlowPathX86::EmitNativeCode for more
+ // explanations.)
+ DCHECK_NE(temp2, ESP);
+ DCHECK(0 <= temp2 && temp2 < kNumberOfCpuRegisters) << temp2;
+ int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86PointerSize>(temp2);
+ // This runtime call does not require a stack map.
+ x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+ __ MaybePoisonHeapReference(temp2);
+ // dest_array[i + dest_pos] = value
+ if (dest_pos.IsConstant()) {
+ int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+ int32_t adjusted_offset = offset + constant * element_size;
+ __ movl(Address(dest, temp1, ScaleFactor::TIMES_4, adjusted_offset), temp2);
+ } else {
+ __ leal(temp3, Address(dest_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0));
+ __ movl(Address(dest, temp3, ScaleFactor::TIMES_4, offset), temp2);
+ }
+ // ++i
+ __ addl(temp1, Immediate(1));
+ // if (i != length) goto loop
+ x86_codegen->GenerateIntCompare(temp1_loc, length);
+ __ j(kNotEqual, &loop);
+ __ jmp(GetExitLabel());
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathX86"; }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86);
+};
+
+#undef __
+
#define __ assembler->
static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke, bool is64bit) {
@@ -2678,9 +2777,9 @@ static bool IsSameInput(HInstruction* instruction, size_t input0, size_t input1)
}
void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) {
- // TODO(rpl): Implement read barriers in the SystemArrayCopy
- // intrinsic and re-enable it (b/29516905).
- if (kEmitCompilerReadBarrier) {
+ // The only read barrier implementation supporting the
+ // SystemArrayCopy intrinsic is the Baker-style read barriers.
+ if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
return;
}
@@ -2710,9 +2809,9 @@ void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
- // TODO(rpl): Implement read barriers in the SystemArrayCopy
- // intrinsic and re-enable it (b/29516905).
- DCHECK(!kEmitCompilerReadBarrier);
+ // The only read barrier implementation supporting the
+ // SystemArrayCopy intrinsic is the Baker-style read barriers.
+ DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
X86Assembler* assembler = GetAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -2721,17 +2820,21 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
Register src = locations->InAt(0).AsRegister<Register>();
Location src_pos = locations->InAt(1);
Register dest = locations->InAt(2).AsRegister<Register>();
Location dest_pos = locations->InAt(3);
- Location length = locations->InAt(4);
- Register temp1 = locations->GetTemp(0).AsRegister<Register>();
- Register temp2 = locations->GetTemp(1).AsRegister<Register>();
+ Location length_arg = locations->InAt(4);
+ Location length = length_arg;
+ Location temp1_loc = locations->GetTemp(0);
+ Register temp1 = temp1_loc.AsRegister<Register>();
+ Location temp2_loc = locations->GetTemp(1);
+ Register temp2 = temp2_loc.AsRegister<Register>();
- SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
- codegen_->AddSlowPath(slow_path);
+ SlowPathCode* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
+ codegen_->AddSlowPath(intrinsic_slow_path);
NearLabel conditions_on_positions_validated;
SystemArrayCopyOptimizations optimizations(invoke);
@@ -2747,7 +2850,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
DCHECK_GE(src_pos_constant, dest_pos_constant);
} else if (src_pos_constant < dest_pos_constant) {
__ cmpl(src, dest);
- __ j(kEqual, slow_path->GetEntryLabel());
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
}
} else {
if (!optimizations.GetDestinationIsSource()) {
@@ -2755,7 +2858,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
__ j(kNotEqual, &conditions_on_positions_validated);
}
__ cmpl(dest_pos.AsRegister<Register>(), Immediate(src_pos_constant));
- __ j(kGreater, slow_path->GetEntryLabel());
+ __ j(kGreater, intrinsic_slow_path->GetEntryLabel());
}
} else {
if (!optimizations.GetDestinationIsSource()) {
@@ -2765,10 +2868,10 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
if (dest_pos.IsConstant()) {
int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
__ cmpl(src_pos.AsRegister<Register>(), Immediate(dest_pos_constant));
- __ j(kLess, slow_path->GetEntryLabel());
+ __ j(kLess, intrinsic_slow_path->GetEntryLabel());
} else {
__ cmpl(src_pos.AsRegister<Register>(), dest_pos.AsRegister<Register>());
- __ j(kLess, slow_path->GetEntryLabel());
+ __ j(kLess, intrinsic_slow_path->GetEntryLabel());
}
}
@@ -2777,16 +2880,17 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
if (!optimizations.GetSourceIsNotNull()) {
// Bail out if the source is null.
__ testl(src, src);
- __ j(kEqual, slow_path->GetEntryLabel());
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
}
if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
// Bail out if the destination is null.
__ testl(dest, dest);
- __ j(kEqual, slow_path->GetEntryLabel());
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
}
- Register temp3 = locations->GetTemp(2).AsRegister<Register>();
+ Location temp3_loc = locations->GetTemp(2);
+ Register temp3 = temp3_loc.AsRegister<Register>();
if (length.IsStackSlot()) {
__ movl(temp3, Address(ESP, length.GetStackIndex()));
length = Location::RegisterLocation(temp3);
@@ -2798,7 +2902,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
!optimizations.GetCountIsSourceLength() &&
!optimizations.GetCountIsDestinationLength()) {
__ testl(length.AsRegister<Register>(), length.AsRegister<Register>());
- __ j(kLess, slow_path->GetEntryLabel());
+ __ j(kLess, intrinsic_slow_path->GetEntryLabel());
}
// Validity checks: source.
@@ -2806,7 +2910,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
src_pos,
src,
length,
- slow_path,
+ intrinsic_slow_path,
temp1,
optimizations.GetCountIsSourceLength());
@@ -2815,7 +2919,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
dest_pos,
dest,
length,
- slow_path,
+ intrinsic_slow_path,
temp1,
optimizations.GetCountIsDestinationLength());
@@ -2824,72 +2928,159 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
// type of the destination array. We do two checks: the classes are the same,
// or the destination is Object[]. If none of these checks succeed, we go to the
// slow path.
+
if (!optimizations.GetSourceIsNonPrimitiveArray()) {
- // /* HeapReference<Class> */ temp1 = temp1->klass_
- __ movl(temp1, Address(src, class_offset));
- __ MaybeUnpoisonHeapReference(temp1);
- // Bail out if the source is not a non primitive array.
- // /* HeapReference<Class> */ temp1 = temp1->component_type_
- __ movl(temp1, Address(temp1, component_offset));
- __ testl(temp1, temp1);
- __ j(kEqual, slow_path->GetEntryLabel());
- __ MaybeUnpoisonHeapReference(temp1);
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
+ // Bail out if the source is not a non primitive array.
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
+ __ testl(temp1, temp1);
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `temp1` has been unpoisoned
+ // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ } else {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ __ movl(temp1, Address(src, class_offset));
+ __ MaybeUnpoisonHeapReference(temp1);
+ // Bail out if the source is not a non primitive array.
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ __ movl(temp1, Address(temp1, component_offset));
+ __ testl(temp1, temp1);
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ __ MaybeUnpoisonHeapReference(temp1);
+ }
__ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot));
- __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
}
- if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
- // /* HeapReference<Class> */ temp1 = temp1->klass_
- __ movl(temp1, Address(dest, class_offset));
- __ MaybeUnpoisonHeapReference(temp1);
- // Bail out if the destination is not a non primitive array.
- // /* HeapReference<Class> */ temp2 = temp1->component_type_
- __ movl(temp2, Address(temp1, component_offset));
- __ testl(temp2, temp2);
- __ j(kEqual, slow_path->GetEntryLabel());
- __ MaybeUnpoisonHeapReference(temp2);
- __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot));
- __ j(kNotEqual, slow_path->GetEntryLabel());
- // Re-poison the heap reference to make the compare instruction below
- // compare two poisoned references.
- __ PoisonHeapReference(temp1);
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ if (length.Equals(Location::RegisterLocation(temp3))) {
+ // When Baker read barriers are enabled, register `temp3`,
+ // which in the present case contains the `length` parameter,
+ // will be overwritten below. Make the `length` location
+ // reference the original stack location; it will be moved
+ // back to `temp3` later if necessary.
+ DCHECK(length_arg.IsStackSlot());
+ length = length_arg;
+ }
+
+ // /* HeapReference<Class> */ temp1 = dest->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, dest, class_offset, temp2_loc, /* needs_null_check */ false);
+
+ if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+ // Bail out if the destination is not a non primitive array.
+ //
+ // Register `temp1` is not trashed by the read barrier emitted
+ // by GenerateFieldLoadWithBakerReadBarrier below, as that
+ // method produces a call to a ReadBarrierMarkRegX entry point,
+ // which saves all potentially live registers, including
+ // temporaries such a `temp1`.
+ // /* HeapReference<Class> */ temp2 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp2_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false);
+ __ testl(temp2, temp2);
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `temp2` has been unpoisoned
+ // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot));
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
+ }
+
+ // For the same reason given earlier, `temp1` is not trashed by the
+ // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
+ // /* HeapReference<Class> */ temp2 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check */ false);
+ // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
+ __ cmpl(temp1, temp2);
+
+ if (optimizations.GetDestinationIsTypedObjectArray()) {
+ NearLabel do_copy;
+ __ j(kEqual, &do_copy);
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
+ // We do not need to emit a read barrier for the following
+ // heap reference load, as `temp1` is only used in a
+ // comparison with null below, and this reference is not
+ // kept afterwards.
+ __ cmpl(Address(temp1, super_offset), Immediate(0));
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
+ __ Bind(&do_copy);
+ } else {
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
+ }
} else {
- // /* HeapReference<Class> */ temp1 = temp1->klass_
- __ movl(temp1, Address(dest, class_offset));
- }
+ // Non read barrier code.
- // Note: if poisoning is on, we are here comparing two poisoned references.
- __ cmpl(temp1, Address(src, class_offset));
+ // /* HeapReference<Class> */ temp1 = dest->klass_
+ __ movl(temp1, Address(dest, class_offset));
+ if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+ __ MaybeUnpoisonHeapReference(temp1);
+ // Bail out if the destination is not a non primitive array.
+ // /* HeapReference<Class> */ temp2 = temp1->component_type_
+ __ movl(temp2, Address(temp1, component_offset));
+ __ testl(temp2, temp2);
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ __ MaybeUnpoisonHeapReference(temp2);
+ __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot));
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
+ // Re-poison the heap reference to make the compare instruction below
+ // compare two poisoned references.
+ __ PoisonHeapReference(temp1);
+ }
- if (optimizations.GetDestinationIsTypedObjectArray()) {
- NearLabel do_copy;
- __ j(kEqual, &do_copy);
+ // Note: if heap poisoning is on, we are comparing two poisoned references here.
+ __ cmpl(temp1, Address(src, class_offset));
+
+ if (optimizations.GetDestinationIsTypedObjectArray()) {
+ NearLabel do_copy;
+ __ j(kEqual, &do_copy);
+ __ MaybeUnpoisonHeapReference(temp1);
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ __ movl(temp1, Address(temp1, component_offset));
+ __ MaybeUnpoisonHeapReference(temp1);
+ __ cmpl(Address(temp1, super_offset), Immediate(0));
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
+ __ Bind(&do_copy);
+ } else {
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
+ }
+ }
+ } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+ DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
+ // Bail out if the source is not a non primitive array.
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
+ __ testl(temp1, temp1);
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `temp1` has been unpoisoned
+ // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ } else {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ __ movl(temp1, Address(src, class_offset));
__ MaybeUnpoisonHeapReference(temp1);
// /* HeapReference<Class> */ temp1 = temp1->component_type_
__ movl(temp1, Address(temp1, component_offset));
+ __ testl(temp1, temp1);
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
__ MaybeUnpoisonHeapReference(temp1);
- __ cmpl(Address(temp1, super_offset), Immediate(0));
- __ j(kNotEqual, slow_path->GetEntryLabel());
- __ Bind(&do_copy);
- } else {
- __ j(kNotEqual, slow_path->GetEntryLabel());
}
- } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
- DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
- // Bail out if the source is not a non primitive array.
- // /* HeapReference<Class> */ temp1 = src->klass_
- __ movl(temp1, Address(src, class_offset));
- __ MaybeUnpoisonHeapReference(temp1);
- // /* HeapReference<Class> */ temp1 = temp1->component_type_
- __ movl(temp1, Address(temp1, component_offset));
- __ testl(temp1, temp1);
- __ j(kEqual, slow_path->GetEntryLabel());
- __ MaybeUnpoisonHeapReference(temp1);
__ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot));
- __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
}
- // Compute base source address, base destination address, and end source address.
+ // Compute the base source address in `temp1`.
int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
DCHECK_EQ(element_size, 4);
uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
@@ -2900,35 +3091,136 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
__ leal(temp1, Address(src, src_pos.AsRegister<Register>(), ScaleFactor::TIMES_4, offset));
}
- if (dest_pos.IsConstant()) {
- int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
- __ leal(temp2, Address(dest, element_size * constant + offset));
- } else {
- __ leal(temp2, Address(dest, dest_pos.AsRegister<Register>(), ScaleFactor::TIMES_4, offset));
- }
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // If it is needed (in the case of the fast-path loop), the base
+ // destination address is computed later, as `temp2` is used for
+ // intermediate computations.
- if (length.IsConstant()) {
- int32_t constant = length.GetConstant()->AsIntConstant()->GetValue();
- __ leal(temp3, Address(temp1, element_size * constant));
+ // Compute the end source address in `temp3`.
+ if (length.IsConstant()) {
+ int32_t constant = length.GetConstant()->AsIntConstant()->GetValue();
+ __ leal(temp3, Address(temp1, element_size * constant));
+ } else {
+ if (length.IsStackSlot()) {
+ // Location `length` is again pointing at a stack slot, as
+ // register `temp3` (which was containing the length parameter
+ // earlier) has been overwritten; restore it now
+ DCHECK(length.Equals(length_arg));
+ __ movl(temp3, Address(ESP, length.GetStackIndex()));
+ length = Location::RegisterLocation(temp3);
+ }
+ __ leal(temp3, Address(temp1, length.AsRegister<Register>(), ScaleFactor::TIMES_4, 0));
+ }
+
+ // SystemArrayCopy implementation for Baker read barriers (see
+ // also CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier):
+ //
+ // if (src_ptr != end_ptr) {
+ // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+ // if (is_gray) {
+ // // Slow-path copy.
+ // for (size_t i = 0; i != length; ++i) {
+ // dest_array[dest_pos + i] =
+ // MaybePoison(ReadBarrier::Mark(MaybeUnpoison(src_array[src_pos + i])));
+ // }
+ // } else {
+ // // Fast-path copy.
+ // do {
+ // *dest_ptr++ = *src_ptr++;
+ // } while (src_ptr != end_ptr)
+ // }
+ // }
+
+ NearLabel loop, done;
+
+ // Don't enter copy loop if `length == 0`.
+ __ cmpl(temp1, temp3);
+ __ j(kEqual, &done);
+
+ // /* int32_t */ monitor = src->monitor_
+ __ movl(temp2, Address(src, monitor_offset));
+ // /* LockWord */ lock_word = LockWord(monitor)
+ static_assert(sizeof(LockWord) == sizeof(int32_t),
+ "art::LockWord and int32_t have different sizes.");
+
+ // Load fence to prevent load-load reordering.
+ // Note that this is a no-op, thanks to the x86 memory model.
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+
+ // Slow path used to copy array when `src` is gray.
+ SlowPathCode* read_barrier_slow_path =
+ new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathX86(invoke);
+ codegen_->AddSlowPath(read_barrier_slow_path);
+
+ // Given the numeric representation, it's enough to check the low bit of the
+ // rb_state. We do that by shifting the bit out of the lock word with SHR.
+ static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+ static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+ __ shrl(temp2, Immediate(LockWord::kReadBarrierStateShift + 1));
+ __ j(kCarrySet, read_barrier_slow_path->GetEntryLabel());
+
+ // Fast-path copy.
+
+ // Set the base destination address in `temp2`.
+ if (dest_pos.IsConstant()) {
+ int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+ __ leal(temp2, Address(dest, element_size * constant + offset));
+ } else {
+ __ leal(temp2, Address(dest, dest_pos.AsRegister<Register>(), ScaleFactor::TIMES_4, offset));
+ }
+
+ // Iterate over the arrays and do a raw copy of the objects. We don't need to
+ // poison/unpoison.
+ __ Bind(&loop);
+ __ pushl(Address(temp1, 0));
+ __ cfi().AdjustCFAOffset(4);
+ __ popl(Address(temp2, 0));
+ __ cfi().AdjustCFAOffset(-4);
+ __ addl(temp1, Immediate(element_size));
+ __ addl(temp2, Immediate(element_size));
+ __ cmpl(temp1, temp3);
+ __ j(kNotEqual, &loop);
+
+ __ Bind(read_barrier_slow_path->GetExitLabel());
+ __ Bind(&done);
} else {
- __ leal(temp3, Address(temp1, length.AsRegister<Register>(), ScaleFactor::TIMES_4, 0));
- }
-
- // Iterate over the arrays and do a raw copy of the objects. We don't need to
- // poison/unpoison.
- NearLabel loop, done;
- __ cmpl(temp1, temp3);
- __ j(kEqual, &done);
- __ Bind(&loop);
- __ pushl(Address(temp1, 0));
- __ cfi().AdjustCFAOffset(4);
- __ popl(Address(temp2, 0));
- __ cfi().AdjustCFAOffset(-4);
- __ addl(temp1, Immediate(element_size));
- __ addl(temp2, Immediate(element_size));
- __ cmpl(temp1, temp3);
- __ j(kNotEqual, &loop);
- __ Bind(&done);
+ // Non read barrier code.
+
+ // Compute the base destination address in `temp2`.
+ if (dest_pos.IsConstant()) {
+ int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+ __ leal(temp2, Address(dest, element_size * constant + offset));
+ } else {
+ __ leal(temp2, Address(dest, dest_pos.AsRegister<Register>(), ScaleFactor::TIMES_4, offset));
+ }
+
+ // Compute the end source address in `temp3`.
+ if (length.IsConstant()) {
+ int32_t constant = length.GetConstant()->AsIntConstant()->GetValue();
+ __ leal(temp3, Address(temp1, element_size * constant));
+ } else {
+ __ leal(temp3, Address(temp1, length.AsRegister<Register>(), ScaleFactor::TIMES_4, 0));
+ }
+
+ // Iterate over the arrays and do a raw copy of the objects. We don't need to
+ // poison/unpoison.
+ NearLabel loop, done;
+ __ cmpl(temp1, temp3);
+ __ j(kEqual, &done);
+ __ Bind(&loop);
+ __ pushl(Address(temp1, 0));
+ __ cfi().AdjustCFAOffset(4);
+ __ popl(Address(temp2, 0));
+ __ cfi().AdjustCFAOffset(-4);
+ __ addl(temp1, Immediate(element_size));
+ __ addl(temp2, Immediate(element_size));
+ __ cmpl(temp1, temp3);
+ __ j(kNotEqual, &loop);
+ __ Bind(&done);
+ }
// We only need one card marking on the destination array.
codegen_->MarkGCCard(temp1,
@@ -2937,7 +3229,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
Register(kNoRegister),
/* value_can_be_null */ false);
- __ Bind(slow_path->GetExitLabel());
+ __ Bind(intrinsic_slow_path->GetExitLabel());
}
UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble)
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index ab8b05c3d4..311e1cd6eb 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -64,6 +64,65 @@ static void MoveArguments(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
using IntrinsicSlowPathX86_64 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86_64>;
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())-> // NOLINT
+
+// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
+class ReadBarrierSystemArrayCopySlowPathX86_64 : public SlowPathCode {
+ public:
+ explicit ReadBarrierSystemArrayCopySlowPathX86_64(HInstruction* instruction)
+ : SlowPathCode(instruction) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+ LocationSummary* locations = instruction_->GetLocations();
+ DCHECK(locations->CanCall());
+ DCHECK(instruction_->IsInvokeStaticOrDirect())
+ << "Unexpected instruction in read barrier arraycopy slow path: "
+ << instruction_->DebugName();
+ DCHECK(instruction_->GetLocations()->Intrinsified());
+ DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
+
+ int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
+
+ CpuRegister src_curr_addr = locations->GetTemp(0).AsRegister<CpuRegister>();
+ CpuRegister dst_curr_addr = locations->GetTemp(1).AsRegister<CpuRegister>();
+ CpuRegister src_stop_addr = locations->GetTemp(2).AsRegister<CpuRegister>();
+
+ __ Bind(GetEntryLabel());
+ NearLabel loop;
+ __ Bind(&loop);
+ __ movl(CpuRegister(TMP), Address(src_curr_addr, 0));
+ __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+ // TODO: Inline the mark bit check before calling the runtime?
+ // TMP = ReadBarrier::Mark(TMP);
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
+ int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(TMP);
+ // This runtime call does not require a stack map.
+ x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+ __ MaybePoisonHeapReference(CpuRegister(TMP));
+ __ movl(Address(dst_curr_addr, 0), CpuRegister(TMP));
+ __ addl(src_curr_addr, Immediate(element_size));
+ __ addl(dst_curr_addr, Immediate(element_size));
+ __ cmpl(src_curr_addr, src_stop_addr);
+ __ j(kNotEqual, &loop);
+ __ jmp(GetExitLabel());
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathX86_64"; }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86_64);
+};
+
+#undef __
+
#define __ assembler->
static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
@@ -1053,9 +1112,9 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) {
void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
- // TODO(rpl): Implement read barriers in the SystemArrayCopy
- // intrinsic and re-enable it (b/29516905).
- if (kEmitCompilerReadBarrier) {
+ // The only read barrier implementation supporting the
+ // SystemArrayCopy intrinsic is the Baker-style read barriers.
+ if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
return;
}
@@ -1063,9 +1122,9 @@ void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
- // TODO(rpl): Implement read barriers in the SystemArrayCopy
- // intrinsic and re-enable it (b/29516905).
- DCHECK(!kEmitCompilerReadBarrier);
+ // The only read barrier implementation supporting the
+ // SystemArrayCopy intrinsic is the Baker-style read barriers.
+ DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
X86_64Assembler* assembler = GetAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -1074,18 +1133,23 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>();
Location src_pos = locations->InAt(1);
CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>();
Location dest_pos = locations->InAt(3);
Location length = locations->InAt(4);
- CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
- CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
- CpuRegister temp3 = locations->GetTemp(2).AsRegister<CpuRegister>();
+ Location temp1_loc = locations->GetTemp(0);
+ CpuRegister temp1 = temp1_loc.AsRegister<CpuRegister>();
+ Location temp2_loc = locations->GetTemp(1);
+ CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>();
+ Location temp3_loc = locations->GetTemp(2);
+ CpuRegister temp3 = temp3_loc.AsRegister<CpuRegister>();
+ Location TMP_loc = Location::RegisterLocation(TMP);
- SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
- codegen_->AddSlowPath(slow_path);
+ SlowPathCode* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
+ codegen_->AddSlowPath(intrinsic_slow_path);
NearLabel conditions_on_positions_validated;
SystemArrayCopyOptimizations optimizations(invoke);
@@ -1101,7 +1165,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
DCHECK_GE(src_pos_constant, dest_pos_constant);
} else if (src_pos_constant < dest_pos_constant) {
__ cmpl(src, dest);
- __ j(kEqual, slow_path->GetEntryLabel());
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
}
} else {
if (!optimizations.GetDestinationIsSource()) {
@@ -1109,7 +1173,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
__ j(kNotEqual, &conditions_on_positions_validated);
}
__ cmpl(dest_pos.AsRegister<CpuRegister>(), Immediate(src_pos_constant));
- __ j(kGreater, slow_path->GetEntryLabel());
+ __ j(kGreater, intrinsic_slow_path->GetEntryLabel());
}
} else {
if (!optimizations.GetDestinationIsSource()) {
@@ -1119,10 +1183,10 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
if (dest_pos.IsConstant()) {
int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
__ cmpl(src_pos.AsRegister<CpuRegister>(), Immediate(dest_pos_constant));
- __ j(kLess, slow_path->GetEntryLabel());
+ __ j(kLess, intrinsic_slow_path->GetEntryLabel());
} else {
__ cmpl(src_pos.AsRegister<CpuRegister>(), dest_pos.AsRegister<CpuRegister>());
- __ j(kLess, slow_path->GetEntryLabel());
+ __ j(kLess, intrinsic_slow_path->GetEntryLabel());
}
}
@@ -1131,13 +1195,13 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
if (!optimizations.GetSourceIsNotNull()) {
// Bail out if the source is null.
__ testl(src, src);
- __ j(kEqual, slow_path->GetEntryLabel());
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
}
if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
// Bail out if the destination is null.
__ testl(dest, dest);
- __ j(kEqual, slow_path->GetEntryLabel());
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
}
// If the length is negative, bail out.
@@ -1146,7 +1210,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
!optimizations.GetCountIsSourceLength() &&
!optimizations.GetCountIsDestinationLength()) {
__ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>());
- __ j(kLess, slow_path->GetEntryLabel());
+ __ j(kLess, intrinsic_slow_path->GetEntryLabel());
}
// Validity checks: source.
@@ -1154,7 +1218,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
src_pos,
src,
length,
- slow_path,
+ intrinsic_slow_path,
temp1,
optimizations.GetCountIsSourceLength());
@@ -1163,7 +1227,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
dest_pos,
dest,
length,
- slow_path,
+ intrinsic_slow_path,
temp1,
optimizations.GetCountIsDestinationLength());
@@ -1172,38 +1236,80 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
// type of the destination array. We do two checks: the classes are the same,
// or the destination is Object[]. If none of these checks succeed, we go to the
// slow path.
- __ movl(temp1, Address(dest, class_offset));
- __ movl(temp2, Address(src, class_offset));
+
bool did_unpoison = false;
- if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
- !optimizations.GetSourceIsNonPrimitiveArray()) {
- // One or two of the references need to be unpoisoned. Unpoison them
- // both to make the identity check valid.
- __ MaybeUnpoisonHeapReference(temp1);
- __ MaybeUnpoisonHeapReference(temp2);
- did_unpoison = true;
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // /* HeapReference<Class> */ temp1 = dest->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, dest, class_offset, temp3_loc, /* needs_null_check */ false);
+ // Register `temp1` is not trashed by the read barrier emitted
+ // by GenerateFieldLoadWithBakerReadBarrier below, as that
+ // method produces a call to a ReadBarrierMarkRegX entry point,
+ // which saves all potentially live registers, including
+ // temporaries such a `temp1`.
+ // /* HeapReference<Class> */ temp2 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check */ false);
+ // If heap poisoning is enabled, `temp1` and `temp2` have been
+ // unpoisoned by the the previous calls to
+ // GenerateFieldLoadWithBakerReadBarrier.
+ } else {
+ // /* HeapReference<Class> */ temp1 = dest->klass_
+ __ movl(temp1, Address(dest, class_offset));
+ // /* HeapReference<Class> */ temp2 = src->klass_
+ __ movl(temp2, Address(src, class_offset));
+ if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
+ !optimizations.GetSourceIsNonPrimitiveArray()) {
+ // One or two of the references need to be unpoisoned. Unpoison them
+ // both to make the identity check valid.
+ __ MaybeUnpoisonHeapReference(temp1);
+ __ MaybeUnpoisonHeapReference(temp2);
+ did_unpoison = true;
+ }
}
if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
// Bail out if the destination is not a non primitive array.
- // /* HeapReference<Class> */ TMP = temp1->component_type_
- __ movl(CpuRegister(TMP), Address(temp1, component_offset));
- __ testl(CpuRegister(TMP), CpuRegister(TMP));
- __ j(kEqual, slow_path->GetEntryLabel());
- __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // /* HeapReference<Class> */ TMP = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, TMP_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false);
+ __ testl(CpuRegister(TMP), CpuRegister(TMP));
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `TMP` has been unpoisoned by
+ // the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ } else {
+ // /* HeapReference<Class> */ TMP = temp1->component_type_
+ __ movl(CpuRegister(TMP), Address(temp1, component_offset));
+ __ testl(CpuRegister(TMP), CpuRegister(TMP));
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+ }
__ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
- __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
}
if (!optimizations.GetSourceIsNonPrimitiveArray()) {
// Bail out if the source is not a non primitive array.
- // /* HeapReference<Class> */ TMP = temp2->component_type_
- __ movl(CpuRegister(TMP), Address(temp2, component_offset));
- __ testl(CpuRegister(TMP), CpuRegister(TMP));
- __ j(kEqual, slow_path->GetEntryLabel());
- __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // For the same reason given earlier, `temp1` is not trashed by the
+ // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
+ // /* HeapReference<Class> */ TMP = temp2->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, TMP_loc, temp2, component_offset, temp3_loc, /* needs_null_check */ false);
+ __ testl(CpuRegister(TMP), CpuRegister(TMP));
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ // If heap poisoning is enabled, `TMP` has been unpoisoned by
+ // the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+ } else {
+ // /* HeapReference<Class> */ TMP = temp2->component_type_
+ __ movl(CpuRegister(TMP), Address(temp2, component_offset));
+ __ testl(CpuRegister(TMP), CpuRegister(TMP));
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+ }
__ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
- __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
}
__ cmpl(temp1, temp2);
@@ -1211,34 +1317,56 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
if (optimizations.GetDestinationIsTypedObjectArray()) {
NearLabel do_copy;
__ j(kEqual, &do_copy);
- if (!did_unpoison) {
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false);
+ // We do not need to emit a read barrier for the following
+ // heap reference load, as `temp1` is only used in a
+ // comparison with null below, and this reference is not
+ // kept afterwards.
+ __ cmpl(Address(temp1, super_offset), Immediate(0));
+ } else {
+ if (!did_unpoison) {
+ __ MaybeUnpoisonHeapReference(temp1);
+ }
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ __ movl(temp1, Address(temp1, component_offset));
__ MaybeUnpoisonHeapReference(temp1);
+ // No need to unpoison the following heap reference load, as
+ // we're comparing against null.
+ __ cmpl(Address(temp1, super_offset), Immediate(0));
}
- // /* HeapReference<Class> */ temp1 = temp1->component_type_
- __ movl(temp1, Address(temp1, component_offset));
- __ MaybeUnpoisonHeapReference(temp1);
- // /* HeapReference<Class> */ temp1 = temp1->super_class_
- __ movl(temp1, Address(temp1, super_offset));
- // No need to unpoison the result, we're comparing against null.
- __ testl(temp1, temp1);
- __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
__ Bind(&do_copy);
} else {
- __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
}
} else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
// Bail out if the source is not a non primitive array.
- // /* HeapReference<Class> */ temp1 = src->klass_
- __ movl(temp1, Address(src, class_offset));
- __ MaybeUnpoisonHeapReference(temp1);
- // /* HeapReference<Class> */ TMP = temp1->component_type_
- __ movl(CpuRegister(TMP), Address(temp1, component_offset));
- __ testl(CpuRegister(TMP), CpuRegister(TMP));
- __ j(kEqual, slow_path->GetEntryLabel());
- __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, temp1_loc, src, class_offset, temp3_loc, /* needs_null_check */ false);
+ // /* HeapReference<Class> */ TMP = temp1->component_type_
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, TMP_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false);
+ __ testl(CpuRegister(TMP), CpuRegister(TMP));
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ } else {
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ __ movl(temp1, Address(src, class_offset));
+ __ MaybeUnpoisonHeapReference(temp1);
+ // /* HeapReference<Class> */ TMP = temp1->component_type_
+ __ movl(CpuRegister(TMP), Address(temp1, component_offset));
+ // No need to unpoison `TMP` now, as we're comparing against null.
+ __ testl(CpuRegister(TMP), CpuRegister(TMP));
+ __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
+ __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+ }
__ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
- __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
}
// Compute base source address, base destination address, and end source address.
@@ -1266,19 +1394,86 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
__ leal(temp3, Address(temp1, length.AsRegister<CpuRegister>(), ScaleFactor::TIMES_4, 0));
}
- // Iterate over the arrays and do a raw copy of the objects. We don't need to
- // poison/unpoison.
- NearLabel loop, done;
- __ cmpl(temp1, temp3);
- __ j(kEqual, &done);
- __ Bind(&loop);
- __ movl(CpuRegister(TMP), Address(temp1, 0));
- __ movl(Address(temp2, 0), CpuRegister(TMP));
- __ addl(temp1, Immediate(element_size));
- __ addl(temp2, Immediate(element_size));
- __ cmpl(temp1, temp3);
- __ j(kNotEqual, &loop);
- __ Bind(&done);
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // SystemArrayCopy implementation for Baker read barriers (see
+ // also CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier):
+ //
+ // if (src_ptr != end_ptr) {
+ // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+ // if (is_gray) {
+ // // Slow-path copy.
+ // do {
+ // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
+ // } while (src_ptr != end_ptr)
+ // } else {
+ // // Fast-path copy.
+ // do {
+ // *dest_ptr++ = *src_ptr++;
+ // } while (src_ptr != end_ptr)
+ // }
+ // }
+
+ NearLabel loop, done;
+
+ // Don't enter copy loop if `length == 0`.
+ __ cmpl(temp1, temp3);
+ __ j(kEqual, &done);
+
+ // /* int32_t */ monitor = src->monitor_
+ __ movl(CpuRegister(TMP), Address(src, monitor_offset));
+ // /* LockWord */ lock_word = LockWord(monitor)
+ static_assert(sizeof(LockWord) == sizeof(int32_t),
+ "art::LockWord and int32_t have different sizes.");
+
+ // Load fence to prevent load-load reordering.
+ // Note that this is a no-op, thanks to the x86-64 memory model.
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+
+ // Slow path used to copy array when `src` is gray.
+ SlowPathCode* read_barrier_slow_path =
+ new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathX86_64(invoke);
+ codegen_->AddSlowPath(read_barrier_slow_path);
+
+ // Given the numeric representation, it's enough to check the low bit of the
+ // rb_state. We do that by shifting the bit out of the lock word with SHR.
+ static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
+ static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+ __ shrl(CpuRegister(TMP), Immediate(LockWord::kReadBarrierStateShift + 1));
+ __ j(kCarrySet, read_barrier_slow_path->GetEntryLabel());
+
+ // Fast-path copy.
+ // Iterate over the arrays and do a raw copy of the objects. We don't need to
+ // poison/unpoison.
+ __ Bind(&loop);
+ __ movl(CpuRegister(TMP), Address(temp1, 0));
+ __ movl(Address(temp2, 0), CpuRegister(TMP));
+ __ addl(temp1, Immediate(element_size));
+ __ addl(temp2, Immediate(element_size));
+ __ cmpl(temp1, temp3);
+ __ j(kNotEqual, &loop);
+
+ __ Bind(read_barrier_slow_path->GetExitLabel());
+ __ Bind(&done);
+ } else {
+ // Non read barrier code.
+
+ // Iterate over the arrays and do a raw copy of the objects. We don't need to
+ // poison/unpoison.
+ NearLabel loop, done;
+ __ cmpl(temp1, temp3);
+ __ j(kEqual, &done);
+ __ Bind(&loop);
+ __ movl(CpuRegister(TMP), Address(temp1, 0));
+ __ movl(Address(temp2, 0), CpuRegister(TMP));
+ __ addl(temp1, Immediate(element_size));
+ __ addl(temp2, Immediate(element_size));
+ __ cmpl(temp1, temp3);
+ __ j(kNotEqual, &loop);
+ __ Bind(&done);
+ }
// We only need one card marking on the destination array.
codegen_->MarkGCCard(temp1,
@@ -1287,7 +1482,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
CpuRegister(kNoRegister),
/* value_can_be_null */ false);
- __ Bind(slow_path->GetExitLabel());
+ __ Bind(intrinsic_slow_path->GetExitLabel());
}
void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) {
diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc
index a6d234d739..8c0231e1aa 100644
--- a/compiler/optimizing/optimizing_cfi_test.cc
+++ b/compiler/optimizing/optimizing_cfi_test.cc
@@ -157,13 +157,26 @@ class OptimizingCFITest : public CFITest {
TestImpl(isa, #isa, expected_asm, expected_cfi); \
}
+#ifdef ART_ENABLE_CODEGEN_arm
TEST_ISA(kThumb2)
+#endif
+#ifdef ART_ENABLE_CODEGEN_arm64
TEST_ISA(kArm64)
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86
TEST_ISA(kX86)
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86_64
TEST_ISA(kX86_64)
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips
TEST_ISA(kMips)
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips64
TEST_ISA(kMips64)
+#endif
+#ifdef ART_ENABLE_CODEGEN_arm
TEST_F(OptimizingCFITest, kThumb2Adjust) {
std::vector<uint8_t> expected_asm(
expected_asm_kThumb2_adjust,
@@ -184,7 +197,9 @@ TEST_F(OptimizingCFITest, kThumb2Adjust) {
Finish();
Check(kThumb2, "kThumb2_adjust", expected_asm, expected_cfi);
}
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips
TEST_F(OptimizingCFITest, kMipsAdjust) {
// One NOP in delay slot, 1 << 15 NOPS have size 1 << 17 which exceeds 18-bit signed maximum.
static constexpr size_t kNumNops = 1u + (1u << 15);
@@ -212,7 +227,9 @@ TEST_F(OptimizingCFITest, kMipsAdjust) {
Finish();
Check(kMips, "kMips_adjust", expected_asm, expected_cfi);
}
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips64
TEST_F(OptimizingCFITest, kMips64Adjust) {
// One NOP in forbidden slot, 1 << 15 NOPS have size 1 << 17 which exceeds 18-bit signed maximum.
static constexpr size_t kNumNops = 1u + (1u << 15);
@@ -240,6 +257,7 @@ TEST_F(OptimizingCFITest, kMips64Adjust) {
Finish();
Check(kMips64, "kMips64_adjust", expected_asm, expected_cfi);
}
+#endif
#endif // ART_TARGET_ANDROID
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 6aaa15fa02..a1da20bae4 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -180,6 +180,7 @@ class PassObserver : public ValueObject {
private:
void StartPass(const char* pass_name) {
+ VLOG(compiler) << "Starting pass: " << pass_name;
// Dump graph first, then start timer.
if (visualizer_enabled_) {
visualizer_.DumpGraph(pass_name, /* is_after_pass */ false, graph_in_bad_state_);
@@ -438,11 +439,7 @@ static HOptimization* BuildOptimization(
StackHandleScopeCollection* handles,
SideEffectsAnalysis* most_recent_side_effects,
HInductionVarAnalysis* most_recent_induction) {
- if (opt_name == arm::InstructionSimplifierArm::kInstructionSimplifierArmPassName) {
- return new (arena) arm::InstructionSimplifierArm(graph, stats);
- } else if (opt_name == arm64::InstructionSimplifierArm64::kInstructionSimplifierArm64PassName) {
- return new (arena) arm64::InstructionSimplifierArm64(graph, stats);
- } else if (opt_name == BoundsCheckElimination::kBoundsCheckEliminationPassName) {
+ if (opt_name == BoundsCheckElimination::kBoundsCheckEliminationPassName) {
CHECK(most_recent_side_effects != nullptr && most_recent_induction != nullptr);
return new (arena) BoundsCheckElimination(graph,
*most_recent_side_effects,
@@ -482,16 +479,30 @@ static HOptimization* BuildOptimization(
} else if (opt_name == LoadStoreElimination::kLoadStoreEliminationPassName) {
CHECK(most_recent_side_effects != nullptr);
return new (arena) LoadStoreElimination(graph, *most_recent_side_effects);
+ } else if (opt_name == SideEffectsAnalysis::kSideEffectsAnalysisPassName) {
+ return new (arena) SideEffectsAnalysis(graph);
+#ifdef ART_ENABLE_CODEGEN_arm
+ } else if (opt_name == arm::DexCacheArrayFixups::kDexCacheArrayFixupsArmPassName) {
+ return new (arena) arm::DexCacheArrayFixups(graph, stats);
+ } else if (opt_name == arm::InstructionSimplifierArm::kInstructionSimplifierArmPassName) {
+ return new (arena) arm::InstructionSimplifierArm(graph, stats);
+#endif
+#ifdef ART_ENABLE_CODEGEN_arm64
+ } else if (opt_name == arm64::InstructionSimplifierArm64::kInstructionSimplifierArm64PassName) {
+ return new (arena) arm64::InstructionSimplifierArm64(graph, stats);
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips
} else if (opt_name == mips::DexCacheArrayFixups::kDexCacheArrayFixupsMipsPassName) {
return new (arena) mips::DexCacheArrayFixups(graph, codegen, stats);
} else if (opt_name == mips::PcRelativeFixups::kPcRelativeFixupsMipsPassName) {
return new (arena) mips::PcRelativeFixups(graph, codegen, stats);
- } else if (opt_name == SideEffectsAnalysis::kSideEffectsAnalysisPassName) {
- return new (arena) SideEffectsAnalysis(graph);
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86
} else if (opt_name == x86::PcRelativeFixups::kPcRelativeFixupsX86PassName) {
return new (arena) x86::PcRelativeFixups(graph, codegen, stats);
} else if (opt_name == x86::X86MemoryOperandGeneration::kX86MemoryOperandGenerationPassName) {
return new (arena) x86::X86MemoryOperandGeneration(graph, codegen, stats);
+#endif
}
return nullptr;
}
@@ -581,6 +592,7 @@ void OptimizingCompiler::RunArchOptimizations(InstructionSet instruction_set,
HGraph* graph,
CodeGenerator* codegen,
PassObserver* pass_observer) const {
+ UNUSED(codegen); // To avoid compilation error when compiling for svelte
OptimizingCompilerStats* stats = compilation_stats_.get();
ArenaAllocator* arena = graph->GetArena();
switch (instruction_set) {
diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc
index cfdb41ab62..a21595fe03 100644
--- a/compiler/optimizing/register_allocator_graph_color.cc
+++ b/compiler/optimizing/register_allocator_graph_color.cc
@@ -227,7 +227,8 @@ class InterferenceNode : public ArenaObject<kArenaAllocRegisterAllocator> {
out_degree_(interval->HasRegister() ? std::numeric_limits<size_t>::max() : 0),
alias_(this),
spill_weight_(ComputeSpillWeight(interval, liveness)),
- requires_color_(interval->RequiresRegister()) {
+ requires_color_(interval->RequiresRegister()),
+ needs_spill_slot_(false) {
DCHECK(!interval->IsHighInterval()) << "Pair nodes should be represented by the low interval";
}
@@ -342,6 +343,14 @@ class InterferenceNode : public ArenaObject<kArenaAllocRegisterAllocator> {
return (IsPair() || other->IsPair()) ? 2 : 1;
}
+ bool NeedsSpillSlot() const {
+ return needs_spill_slot_;
+ }
+
+ void SetNeedsSpillSlot() {
+ needs_spill_slot_ = true;
+ }
+
// The current stage of this node, indicating which worklist it belongs to.
NodeStage stage;
@@ -376,6 +385,8 @@ class InterferenceNode : public ArenaObject<kArenaAllocRegisterAllocator> {
const bool requires_color_;
+ bool needs_spill_slot_;
+
DISALLOW_COPY_AND_ASSIGN(InterferenceNode);
};
@@ -549,10 +560,10 @@ RegisterAllocatorGraphColor::RegisterAllocatorGraphColor(ArenaAllocator* allocat
safepoints_(allocator->Adapter(kArenaAllocRegisterAllocator)),
physical_core_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
physical_fp_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
- int_spill_slot_counter_(0),
- double_spill_slot_counter_(0),
- float_spill_slot_counter_(0),
- long_spill_slot_counter_(0),
+ num_int_spill_slots_(0),
+ num_double_spill_slots_(0),
+ num_float_spill_slots_(0),
+ num_long_spill_slots_(0),
catch_phi_spill_slot_counter_(0),
reserved_art_method_slots_(ComputeReservedArtMethodSlots(*codegen)),
reserved_out_slots_(codegen->GetGraph()->GetMaximumNumberOfOutVRegs()),
@@ -653,6 +664,9 @@ void RegisterAllocatorGraphColor::AllocateRegisters() {
}
if (successful) {
+ // Assign spill slots.
+ AllocateSpillSlots(iteration.GetPrunableNodes());
+
// Compute the maximum number of live registers across safepoints.
// Notice that we do not count globally blocked registers, such as the stack pointer.
if (safepoints.size() > 0) {
@@ -700,10 +714,10 @@ void RegisterAllocatorGraphColor::AllocateRegisters() {
.Resolve(max_safepoint_live_core_regs_,
max_safepoint_live_fp_regs_,
reserved_art_method_slots_ + reserved_out_slots_,
- int_spill_slot_counter_,
- long_spill_slot_counter_,
- float_spill_slot_counter_,
- double_spill_slot_counter_,
+ num_int_spill_slots_,
+ num_long_spill_slots_,
+ num_float_spill_slots_,
+ num_double_spill_slots_,
catch_phi_spill_slot_counter_,
temp_intervals_);
@@ -743,10 +757,10 @@ bool RegisterAllocatorGraphColor::Validate(bool log_fatal_on_failure) {
}
}
- size_t spill_slots = int_spill_slot_counter_
- + long_spill_slot_counter_
- + float_spill_slot_counter_
- + double_spill_slot_counter_
+ size_t spill_slots = num_int_spill_slots_
+ + num_long_spill_slots_
+ + num_float_spill_slots_
+ + num_double_spill_slots_
+ catch_phi_spill_slot_counter_;
bool ok = ValidateIntervals(intervals,
spill_slots,
@@ -1910,7 +1924,7 @@ bool ColoringIteration::ColorInterferenceGraph() {
// be colored, and that we should split.
} else {
// Spill.
- register_allocator_->AllocateSpillSlotFor(interval);
+ node->SetNeedsSpillSlot();
}
}
@@ -1936,52 +1950,156 @@ size_t RegisterAllocatorGraphColor::ComputeMaxSafepointLiveRegisters(
return max_safepoint_live_regs;
}
-void RegisterAllocatorGraphColor::AllocateSpillSlotFor(LiveInterval* interval) {
- LiveInterval* parent = interval->GetParent();
- HInstruction* defined_by = parent->GetDefinedBy();
- if (parent->HasSpillSlot()) {
- // We already have a spill slot for this value that we can reuse.
- } else if (defined_by->IsParameterValue()) {
- // Parameters already have a stack slot.
- parent->SetSpillSlot(codegen_->GetStackSlotOfParameter(defined_by->AsParameterValue()));
- } else if (defined_by->IsCurrentMethod()) {
- // The current method is always at spill slot 0.
- parent->SetSpillSlot(0);
- } else if (defined_by->IsConstant()) {
- // Constants don't need a spill slot.
- } else {
- // Allocate a spill slot based on type.
- size_t* spill_slot_counter;
- switch (interval->GetType()) {
- case Primitive::kPrimDouble:
- spill_slot_counter = &double_spill_slot_counter_;
- break;
- case Primitive::kPrimLong:
- spill_slot_counter = &long_spill_slot_counter_;
- break;
- case Primitive::kPrimFloat:
- spill_slot_counter = &float_spill_slot_counter_;
- break;
- case Primitive::kPrimNot:
- case Primitive::kPrimInt:
- case Primitive::kPrimChar:
- case Primitive::kPrimByte:
- case Primitive::kPrimBoolean:
- case Primitive::kPrimShort:
- spill_slot_counter = &int_spill_slot_counter_;
- break;
- case Primitive::kPrimVoid:
- LOG(FATAL) << "Unexpected type for interval " << interval->GetType();
- UNREACHABLE();
+void RegisterAllocatorGraphColor::AllocateSpillSlots(const ArenaVector<InterferenceNode*>& nodes) {
+ // The register allocation resolver will organize the stack based on value type,
+ // so we assign stack slots for each value type separately.
+ ArenaVector<LiveInterval*> double_intervals(allocator_->Adapter(kArenaAllocRegisterAllocator));
+ ArenaVector<LiveInterval*> long_intervals(allocator_->Adapter(kArenaAllocRegisterAllocator));
+ ArenaVector<LiveInterval*> float_intervals(allocator_->Adapter(kArenaAllocRegisterAllocator));
+ ArenaVector<LiveInterval*> int_intervals(allocator_->Adapter(kArenaAllocRegisterAllocator));
+
+ // The set of parent intervals already handled.
+ ArenaSet<LiveInterval*> seen(allocator_->Adapter(kArenaAllocRegisterAllocator));
+
+ // Find nodes that need spill slots.
+ for (InterferenceNode* node : nodes) {
+ if (!node->NeedsSpillSlot()) {
+ continue;
}
- parent->SetSpillSlot(*spill_slot_counter);
- *spill_slot_counter += parent->NeedsTwoSpillSlots() ? 2 : 1;
- // TODO: Could color stack slots if we wanted to, even if
- // it's just a trivial coloring. See the linear scan implementation,
- // which simply reuses spill slots for values whose live intervals
- // have already ended.
+ LiveInterval* parent = node->GetInterval()->GetParent();
+ if (seen.find(parent) != seen.end()) {
+ // We've already handled this interval.
+ // This can happen if multiple siblings of the same interval request a stack slot.
+ continue;
+ }
+ seen.insert(parent);
+
+ HInstruction* defined_by = parent->GetDefinedBy();
+ if (parent->HasSpillSlot()) {
+ // We already have a spill slot for this value that we can reuse.
+ } else if (defined_by->IsParameterValue()) {
+ // Parameters already have a stack slot.
+ parent->SetSpillSlot(codegen_->GetStackSlotOfParameter(defined_by->AsParameterValue()));
+ } else if (defined_by->IsCurrentMethod()) {
+ // The current method is always at stack slot 0.
+ parent->SetSpillSlot(0);
+ } else if (defined_by->IsConstant()) {
+ // Constants don't need a spill slot.
+ } else {
+ // We need to find a spill slot for this interval. Place it in the correct
+ // worklist to be processed later.
+ switch (node->GetInterval()->GetType()) {
+ case Primitive::kPrimDouble:
+ double_intervals.push_back(parent);
+ break;
+ case Primitive::kPrimLong:
+ long_intervals.push_back(parent);
+ break;
+ case Primitive::kPrimFloat:
+ float_intervals.push_back(parent);
+ break;
+ case Primitive::kPrimNot:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimShort:
+ int_intervals.push_back(parent);
+ break;
+ case Primitive::kPrimVoid:
+ LOG(FATAL) << "Unexpected type for interval " << node->GetInterval()->GetType();
+ UNREACHABLE();
+ }
+ }
+ }
+
+ // Color spill slots for each value type.
+ ColorSpillSlots(&double_intervals, &num_double_spill_slots_);
+ ColorSpillSlots(&long_intervals, &num_long_spill_slots_);
+ ColorSpillSlots(&float_intervals, &num_float_spill_slots_);
+ ColorSpillSlots(&int_intervals, &num_int_spill_slots_);
+}
+
+void RegisterAllocatorGraphColor::ColorSpillSlots(ArenaVector<LiveInterval*>* intervals,
+ size_t* num_stack_slots_used) {
+ // We cannot use the original interference graph here because spill slots are assigned to
+ // all of the siblings of an interval, whereas an interference node represents only a single
+ // sibling. So, we assign spill slots linear-scan-style by sorting all the interval endpoints
+ // by position, and assigning the lowest spill slot available when we encounter an interval
+ // beginning. We ignore lifetime holes for simplicity.
+ ArenaVector<std::tuple<size_t, bool, LiveInterval*>> interval_endpoints(
+ allocator_->Adapter(kArenaAllocRegisterAllocator));
+
+ for (auto it = intervals->begin(), e = intervals->end(); it != e; ++it) {
+ LiveInterval* parent_interval = *it;
+ DCHECK(parent_interval->IsParent());
+ DCHECK(!parent_interval->HasSpillSlot());
+ size_t start = parent_interval->GetStart();
+ size_t end = parent_interval->GetLastSibling()->GetEnd();
+ DCHECK_LT(start, end);
+ interval_endpoints.push_back(std::make_tuple(start, true, parent_interval));
+ interval_endpoints.push_back(std::make_tuple(end, false, parent_interval));
+ }
+
+ // Sort by position.
+ // We explicitly ignore the third entry of each tuple (the interval pointer) in order
+ // to maintain determinism.
+ std::sort(interval_endpoints.begin(), interval_endpoints.end(),
+ [] (const std::tuple<size_t, bool, LiveInterval*>& lhs,
+ const std::tuple<size_t, bool, LiveInterval*>& rhs) {
+ return std::tie(std::get<0>(lhs), std::get<1>(lhs))
+ < std::tie(std::get<0>(rhs), std::get<1>(rhs));
+ });
+
+ ArenaBitVector taken(allocator_, 0, true);
+ for (auto it = interval_endpoints.begin(), end = interval_endpoints.end(); it != end; ++it) {
+ // Extract information from the current tuple.
+ LiveInterval* parent_interval;
+ bool is_interval_beginning;
+ size_t position;
+ std::tie(position, is_interval_beginning, parent_interval) = *it;
+
+ bool needs_two_slots = parent_interval->NeedsTwoSpillSlots();
+
+ if (is_interval_beginning) {
+ DCHECK(!parent_interval->HasSpillSlot());
+ DCHECK_EQ(position, parent_interval->GetStart());
+
+ // Find a free stack slot.
+ size_t slot = 0;
+ for (; taken.IsBitSet(slot) || (needs_two_slots && taken.IsBitSet(slot + 1)); ++slot) {
+ // Skip taken slots.
+ }
+ parent_interval->SetSpillSlot(slot);
+
+ *num_stack_slots_used = std::max(*num_stack_slots_used,
+ needs_two_slots ? slot + 1 : slot + 2);
+ if (needs_two_slots && *num_stack_slots_used % 2 != 0) {
+ // The parallel move resolver requires that there be an even number of spill slots
+ // allocated for pair value types.
+ ++(*num_stack_slots_used);
+ }
+
+ taken.SetBit(slot);
+ if (needs_two_slots) {
+ taken.SetBit(slot + 1);
+ }
+ } else {
+ DCHECK_EQ(position, parent_interval->GetLastSibling()->GetEnd());
+ DCHECK(parent_interval->HasSpillSlot());
+
+ // Free up the stack slot used by this interval.
+ size_t slot = parent_interval->GetSpillSlot();
+ DCHECK(taken.IsBitSet(slot));
+ DCHECK(!needs_two_slots || taken.IsBitSet(slot + 1));
+ taken.ClearBit(slot);
+ if (needs_two_slots) {
+ taken.ClearBit(slot + 1);
+ }
+ }
}
+ DCHECK_EQ(taken.NumSetBits(), 0u);
}
} // namespace art
diff --git a/compiler/optimizing/register_allocator_graph_color.h b/compiler/optimizing/register_allocator_graph_color.h
index 9dddcea685..ed12561d2c 100644
--- a/compiler/optimizing/register_allocator_graph_color.h
+++ b/compiler/optimizing/register_allocator_graph_color.h
@@ -144,9 +144,13 @@ class RegisterAllocatorGraphColor : public RegisterAllocator {
// based on the outgoing interference edges of safepoint nodes.
size_t ComputeMaxSafepointLiveRegisters(const ArenaVector<InterferenceNode*>& safepoints);
- // If necessary, add the given interval to the list of spilled intervals,
- // and make sure it's ready to be spilled to the stack.
- void AllocateSpillSlotFor(LiveInterval* interval);
+ // Assigns stack slots to a list of intervals, ensuring that interfering intervals are not
+ // assigned the same stack slot.
+ void ColorSpillSlots(ArenaVector<LiveInterval*>* nodes,
+ size_t* num_stack_slots_used);
+
+ // Provide stack slots to nodes that need them.
+ void AllocateSpillSlots(const ArenaVector<InterferenceNode*>& nodes);
// Whether iterative move coalescing should be performed. Iterative move coalescing
// improves code quality, but increases compile time.
@@ -170,10 +174,10 @@ class RegisterAllocatorGraphColor : public RegisterAllocator {
ArenaVector<InterferenceNode*> physical_fp_nodes_;
// Allocated stack slot counters.
- size_t int_spill_slot_counter_;
- size_t double_spill_slot_counter_;
- size_t float_spill_slot_counter_;
- size_t long_spill_slot_counter_;
+ size_t num_int_spill_slots_;
+ size_t num_double_spill_slots_;
+ size_t num_float_spill_slots_;
+ size_t num_long_spill_slots_;
size_t catch_phi_spill_slot_counter_;
// Number of stack slots needed for the pointer to the current method.
diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h
index 86548e153b..c52a5a94f4 100644
--- a/compiler/utils/arm/assembler_arm.h
+++ b/compiler/utils/arm/assembler_arm.h
@@ -908,6 +908,12 @@ class ArmAssembler : public Assembler {
// reg = -reg.
rsb(reg, reg, ShifterOperand(0));
}
+ // Poison a heap reference contained in `reg` if heap poisoning is enabled.
+ void MaybePoisonHeapReference(Register reg) {
+ if (kPoisonHeapReferences) {
+ PoisonHeapReference(reg);
+ }
+ }
// Unpoison a heap reference contained in `reg` if heap poisoning is enabled.
void MaybeUnpoisonHeapReference(Register reg) {
if (kPoisonHeapReferences) {
diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc
index 19450b3a32..f91bcfa92e 100644
--- a/compiler/utils/arm64/assembler_arm64.cc
+++ b/compiler/utils/arm64/assembler_arm64.cc
@@ -146,6 +146,12 @@ void Arm64Assembler::UnpoisonHeapReference(Register reg) {
___ Neg(reg, Operand(reg));
}
+void Arm64Assembler::MaybePoisonHeapReference(Register reg) {
+ if (kPoisonHeapReferences) {
+ PoisonHeapReference(reg);
+ }
+}
+
void Arm64Assembler::MaybeUnpoisonHeapReference(Register reg) {
if (kPoisonHeapReferences) {
UnpoisonHeapReference(reg);
diff --git a/compiler/utils/arm64/assembler_arm64.h b/compiler/utils/arm64/assembler_arm64.h
index 2847cb86a8..66a7fed804 100644
--- a/compiler/utils/arm64/assembler_arm64.h
+++ b/compiler/utils/arm64/assembler_arm64.h
@@ -93,6 +93,8 @@ class Arm64Assembler FINAL : public Assembler {
void PoisonHeapReference(vixl::aarch64::Register reg);
// Unpoison a heap reference contained in `reg`.
void UnpoisonHeapReference(vixl::aarch64::Register reg);
+ // Poison a heap reference contained in `reg` if heap poisoning is enabled.
+ void MaybePoisonHeapReference(vixl::aarch64::Register reg);
// Unpoison a heap reference contained in `reg` if heap poisoning is enabled.
void MaybeUnpoisonHeapReference(vixl::aarch64::Register reg);
diff --git a/compiler/utils/jni_macro_assembler.cc b/compiler/utils/jni_macro_assembler.cc
index 797a98cfd5..1b743134ed 100644
--- a/compiler/utils/jni_macro_assembler.cc
+++ b/compiler/utils/jni_macro_assembler.cc
@@ -99,6 +99,7 @@ MacroAsm64UniquePtr JNIMacroAssembler<PointerSize::k64>::Create(
return MacroAsm64UniquePtr(new (arena) x86_64::X86_64JNIMacroAssembler(arena));
#endif
default:
+ UNUSED(arena);
LOG(FATAL) << "Unknown/unsupported 8B InstructionSet: " << instruction_set;
UNREACHABLE();
}
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 92a92a58b9..63aa4a4b8f 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -639,6 +639,12 @@ class X86Assembler FINAL : public Assembler {
void PoisonHeapReference(Register reg) { negl(reg); }
// Unpoison a heap reference contained in `reg`.
void UnpoisonHeapReference(Register reg) { negl(reg); }
+ // Poison a heap reference contained in `reg` if heap poisoning is enabled.
+ void MaybePoisonHeapReference(Register reg) {
+ if (kPoisonHeapReferences) {
+ PoisonHeapReference(reg);
+ }
+ }
// Unpoison a heap reference contained in `reg` if heap poisoning is enabled.
void MaybeUnpoisonHeapReference(Register reg) {
if (kPoisonHeapReferences) {
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 370f49cb05..a4166f965d 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -741,6 +741,12 @@ class X86_64Assembler FINAL : public Assembler {
void PoisonHeapReference(CpuRegister reg) { negl(reg); }
// Unpoison a heap reference contained in `reg`.
void UnpoisonHeapReference(CpuRegister reg) { negl(reg); }
+ // Poison a heap reference contained in `reg` if heap poisoning is enabled.
+ void MaybePoisonHeapReference(CpuRegister reg) {
+ if (kPoisonHeapReferences) {
+ PoisonHeapReference(reg);
+ }
+ }
// Unpoison a heap reference contained in `reg` if heap poisoning is enabled.
void MaybeUnpoisonHeapReference(CpuRegister reg) {
if (kPoisonHeapReferences) {
diff --git a/runtime/Android.mk b/runtime/Android.mk
index c0a0951693..0e50eeb156 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -383,29 +383,6 @@ LIBART_CFLAGS := -DBUILDING_LIBART=1
LIBART_TARGET_CFLAGS :=
LIBART_HOST_CFLAGS :=
-# Default dex2oat instruction set features.
-LIBART_HOST_DEFAULT_INSTRUCTION_SET_FEATURES := default
-LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES := default
-2ND_LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES := default
-ifeq ($(DEX2OAT_TARGET_ARCH),arm)
- ifneq (,$(filter $(DEX2OAT_TARGET_CPU_VARIANT),cortex-a15 krait denver))
- LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES := atomic_ldrd_strd,div
- else
- ifneq (,$(filter $(DEX2OAT_TARGET_CPU_VARIANT),cortex-a7))
- LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES := div
- endif
- endif
-endif
-ifeq ($(2ND_DEX2OAT_TARGET_ARCH),arm)
- ifneq (,$(filter $(DEX2OAT_TARGET_CPU_VARIANT),cortex-a15 krait denver))
- 2ND_LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES := atomic_ldrd_strd,div
- else
- ifneq (,$(filter $(DEX2OAT_TARGET_CPU_VARIANT),cortex-a7))
- 2ND_LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES := div
- endif
- endif
-endif
-
# $(1): target or host
# $(2): ndebug or debug
# $(3): static or shared (note that static only applies for host)
@@ -507,8 +484,6 @@ endif
$$(eval LOCAL_CLANG := $$(ART_TARGET_CLANG))
$$(eval $$(call set-target-local-cflags-vars,$(2)))
LOCAL_ASFLAGS_arm += -no-integrated-as
- LOCAL_CFLAGS_$(DEX2OAT_TARGET_ARCH) += -DART_DEFAULT_INSTRUCTION_SET_FEATURES="$(LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES)"
- LOCAL_CFLAGS_$(2ND_DEX2OAT_TARGET_ARCH) += -DART_DEFAULT_INSTRUCTION_SET_FEATURES="$(2ND_LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES)"
else # host
LOCAL_CLANG := $$(ART_HOST_CLANG)
LOCAL_LDLIBS += -ldl -lpthread
@@ -516,7 +491,6 @@ endif
LOCAL_LDLIBS += -lrt
endif
LOCAL_CFLAGS += $$(ART_HOST_CFLAGS)
- LOCAL_CFLAGS += -DART_DEFAULT_INSTRUCTION_SET_FEATURES="$(LIBART_HOST_DEFAULT_INSTRUCTION_SET_FEATURES)"
LOCAL_ASFLAGS += $$(ART_HOST_ASFLAGS)
ifeq ($$(art_ndebug_or_debug),debug)
@@ -658,9 +632,6 @@ endif
# Clear locally defined variables.
LOCAL_PATH :=
LIBART_COMMON_SRC_FILES :=
-LIBART_HOST_DEFAULT_INSTRUCTION_SET_FEATURES :=
-LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES :=
-2ND_LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES :=
LIBART_HOST_LDFLAGS :=
LIBART_TARGET_LDFLAGS :=
LIBART_TARGET_LDFLAGS_arm :=
diff --git a/runtime/art_method.cc b/runtime/art_method.cc
index d812590cc7..f9bc249a42 100644
--- a/runtime/art_method.cc
+++ b/runtime/art_method.cc
@@ -477,7 +477,7 @@ const OatQuickMethodHeader* ArtMethod::GetOatQuickMethodHeader(uintptr_t pc) {
DCHECK(method_header->Contains(pc))
<< PrettyMethod(this)
- << std::hex << pc << " " << oat_entry_point
+ << " " << std::hex << pc << " " << oat_entry_point
<< " " << (uintptr_t)(method_header->code_ + method_header->code_size_);
return method_header;
}
diff --git a/runtime/dex_instruction-inl.h b/runtime/dex_instruction-inl.h
index 3d0fea07ad..f6ed1f03b7 100644
--- a/runtime/dex_instruction-inl.h
+++ b/runtime/dex_instruction-inl.h
@@ -505,11 +505,11 @@ inline uint16_t Instruction::VRegH_4rcc() const {
}
inline bool Instruction::HasVarArgs() const {
- return FormatOf(Opcode()) == k35c;
+ return (FormatOf(Opcode()) == k35c) || (FormatOf(Opcode()) == k45cc);
}
inline void Instruction::GetVarArgs(uint32_t arg[kMaxVarArgRegs], uint16_t inst_data) const {
- DCHECK_EQ(FormatOf(Opcode()), k35c);
+ DCHECK(HasVarArgs());
/*
* Note that the fields mentioned in the spec don't appear in
diff --git a/runtime/dex_instruction_test.cc b/runtime/dex_instruction_test.cc
index 00c8e07a72..95e4181a8e 100644
--- a/runtime/dex_instruction_test.cc
+++ b/runtime/dex_instruction_test.cc
@@ -92,6 +92,15 @@ TEST(Instruction, PropertiesOf45cc) {
ASSERT_TRUE(ins->HasVRegH());
ASSERT_EQ(32, ins->VRegH());
ASSERT_EQ(32, ins->VRegH_45cc());
+
+ ASSERT_TRUE(ins->HasVarArgs());
+
+ uint32_t arg_regs[Instruction::kMaxVarArgRegs];
+ ins->GetVarArgs(arg_regs);
+ ASSERT_EQ(0xeu, arg_regs[0]);
+ ASSERT_EQ(0xfu, arg_regs[1]);
+ ASSERT_EQ(0xau, arg_regs[2]);
+ ASSERT_EQ(0xcu, arg_regs[3]);
}
TEST(Instruction, PropertiesOf4rcc) {
@@ -118,6 +127,8 @@ TEST(Instruction, PropertiesOf4rcc) {
ASSERT_TRUE(ins->HasVRegH());
ASSERT_EQ(32, ins->VRegH());
ASSERT_EQ(32, ins->VRegH_4rcc());
+
+ ASSERT_FALSE(ins->HasVarArgs());
}
} // namespace art
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index 7afe6f9ab4..42816a04f1 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -435,10 +435,8 @@ void ConcurrentCopying::FlipThreadRoots() {
gc_barrier_->Init(self, 0);
ThreadFlipVisitor thread_flip_visitor(this, heap_->use_tlab_);
FlipCallback flip_callback(this);
- heap_->ThreadFlipBegin(self); // Sync with JNI critical calls.
size_t barrier_count = Runtime::Current()->FlipThreadRoots(
&thread_flip_visitor, &flip_callback, this);
- heap_->ThreadFlipEnd(self);
{
ScopedThreadStateChange tsc(self, kWaitingForCheckPointsToRun);
gc_barrier_->Increment(self, barrier_count);
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index 2335ccbaad..3904160fd4 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -413,7 +413,7 @@ class MarkSweep::MarkObjectSlowPath {
if (UNLIKELY(obj == nullptr || !IsAligned<kPageSize>(obj) ||
(kIsDebugBuild && large_object_space != nullptr &&
!large_object_space->Contains(obj)))) {
- LOG(INTERNAL_FATAL) << "Tried to mark " << obj << " not contained by any spaces\n";
+ LOG(INTERNAL_FATAL) << "Tried to mark " << obj << " not contained by any spaces";
if (holder_ != nullptr) {
size_t holder_size = holder_->SizeOf();
ArtField* field = holder_->FindFieldByOffset(offset_);
@@ -441,7 +441,7 @@ class MarkSweep::MarkObjectSlowPath {
for (size_t i = 0; i < holder_size / sizeof(uint32_t); ++i) {
uint32_t* p = reinterpret_cast<uint32_t*>(holder_);
LOG(INTERNAL_FATAL) << &p[i] << ": " << "holder+" << (i * sizeof(uint32_t)) << " = "
- << std::hex << p[i] << "\n";
+ << std::hex << p[i];
}
}
PrintFileToLog("/proc/self/maps", LogSeverity::INTERNAL_FATAL);
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 39f26e7fe2..638c1d841a 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -878,9 +878,13 @@ void Heap::IncrementDisableThreadFlip(Thread* self) {
MutexLock mu(self, *thread_flip_lock_);
bool has_waited = false;
uint64_t wait_start = NanoTime();
- while (thread_flip_running_) {
- has_waited = true;
- thread_flip_cond_->Wait(self);
+ if (thread_flip_running_) {
+ TimingLogger::ScopedTiming split("IncrementDisableThreadFlip",
+ GetCurrentGcIteration()->GetTimings());
+ while (thread_flip_running_) {
+ has_waited = true;
+ thread_flip_cond_->Wait(self);
+ }
}
++disable_thread_flip_count_;
if (has_waited) {
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index 1cf9dd1612..f948be79c9 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -136,7 +136,7 @@ void Class::SetDexCache(DexCache* new_dex_cache) {
void Class::SetClassSize(uint32_t new_class_size) {
if (kIsDebugBuild && new_class_size < GetClassSize()) {
DumpClass(LOG(INTERNAL_FATAL), kDumpClassFullDetail);
- LOG(INTERNAL_FATAL) << new_class_size << " vs " << GetClassSize() << "\n";
+ LOG(INTERNAL_FATAL) << new_class_size << " vs " << GetClassSize();
LOG(FATAL) << " class=" << PrettyTypeOf(this);
}
// Not called within a transaction.
diff --git a/runtime/native/dalvik_system_ZygoteHooks.cc b/runtime/native/dalvik_system_ZygoteHooks.cc
index 198a52e70d..fe3cbe74f5 100644
--- a/runtime/native/dalvik_system_ZygoteHooks.cc
+++ b/runtime/native/dalvik_system_ZygoteHooks.cc
@@ -18,8 +18,6 @@
#include <stdlib.h>
-#include <cutils/process_name.h>
-
#include "arch/instruction_set.h"
#include "debugger.h"
#include "java_vm_ext.h"
diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h
index 3aa1fc256d..216d8a7194 100644
--- a/runtime/thread-inl.h
+++ b/runtime/thread-inl.h
@@ -224,6 +224,7 @@ inline ThreadState Thread::TransitionFromSuspendedToRunnable() {
thread_to_pass = this;
}
MutexLock mu(thread_to_pass, *Locks::thread_suspend_count_lock_);
+ ScopedTransitioningToRunnable scoped_transitioning_to_runnable(this);
old_state_and_flags.as_int = tls32_.state_and_flags.as_int;
DCHECK_EQ(old_state_and_flags.as_struct.state, old_state);
while ((old_state_and_flags.as_struct.flags & kSuspendRequest) != 0) {
diff --git a/runtime/thread.cc b/runtime/thread.cc
index b35a614e99..0457ba0d26 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -1217,10 +1217,8 @@ void Thread::FullSuspendCheck() {
ScopedTrace trace(__FUNCTION__);
VLOG(threads) << this << " self-suspending";
// Make thread appear suspended to other threads, release mutator_lock_.
- tls32_.suspended_at_suspend_check = true;
// Transition to suspended and back to runnable, re-acquire share on mutator_lock_.
ScopedThreadSuspension(this, kSuspended);
- tls32_.suspended_at_suspend_check = false;
VLOG(threads) << this << " self-reviving";
}
@@ -1635,7 +1633,7 @@ Thread::Thread(bool daemon) : tls32_(daemon), wait_monitor_(nullptr), interrupte
}
tlsPtr_.flip_function = nullptr;
tlsPtr_.thread_local_mark_stack = nullptr;
- tls32_.suspended_at_suspend_check = false;
+ tls32_.is_transitioning_to_runnable = false;
}
bool Thread::IsStillStarting() const {
@@ -1773,7 +1771,7 @@ Thread::~Thread() {
CHECK(tlsPtr_.checkpoint_function == nullptr);
CHECK_EQ(checkpoint_overflow_.size(), 0u);
CHECK(tlsPtr_.flip_function == nullptr);
- CHECK_EQ(tls32_.suspended_at_suspend_check, false);
+ CHECK_EQ(tls32_.is_transitioning_to_runnable, false);
// Make sure we processed all deoptimization requests.
CHECK(tlsPtr_.deoptimization_context_stack == nullptr) << "Missed deoptimization";
diff --git a/runtime/thread.h b/runtime/thread.h
index 840b7817f8..1c2d4ab533 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -1085,8 +1085,12 @@ class Thread {
return tlsPtr_.nested_signal_state;
}
- bool IsSuspendedAtSuspendCheck() const {
- return tls32_.suspended_at_suspend_check;
+ bool IsTransitioningToRunnable() const {
+ return tls32_.is_transitioning_to_runnable;
+ }
+
+ void SetIsTransitioningToRunnable(bool value) {
+ tls32_.is_transitioning_to_runnable = value;
}
void PushVerifier(verifier::MethodVerifier* verifier);
@@ -1264,7 +1268,7 @@ class Thread {
suspend_count(0), debug_suspend_count(0), thin_lock_thread_id(0), tid(0),
daemon(is_daemon), throwing_OutOfMemoryError(false), no_thread_suspension(0),
thread_exit_check_count(0), handling_signal_(false),
- suspended_at_suspend_check(false), ready_for_debug_invoke(false),
+ is_transitioning_to_runnable(false), ready_for_debug_invoke(false),
debug_method_entry_(false), is_gc_marking(false), weak_ref_access_enabled(true),
disable_thread_flip_count(0) {
}
@@ -1306,10 +1310,10 @@ class Thread {
// True if signal is being handled by this thread.
bool32_t handling_signal_;
- // True if the thread is suspended in FullSuspendCheck(). This is
- // used to distinguish runnable threads that are suspended due to
- // a normal suspend check from other threads.
- bool32_t suspended_at_suspend_check;
+ // True if the thread is in TransitionFromSuspendedToRunnable(). This is used to distinguish the
+ // non-runnable threads (eg. kNative, kWaiting) that are about to transition to runnable from
+ // the rest of them.
+ bool32_t is_transitioning_to_runnable;
// True if the thread has been suspended by a debugger event. This is
// used to invoke method from the debugger which is only allowed when
@@ -1588,6 +1592,26 @@ class ScopedDebugDisallowReadBarriers {
Thread* const self_;
};
+class ScopedTransitioningToRunnable : public ValueObject {
+ public:
+ explicit ScopedTransitioningToRunnable(Thread* self)
+ : self_(self) {
+ DCHECK_EQ(self, Thread::Current());
+ if (kUseReadBarrier) {
+ self_->SetIsTransitioningToRunnable(true);
+ }
+ }
+
+ ~ScopedTransitioningToRunnable() {
+ if (kUseReadBarrier) {
+ self_->SetIsTransitioningToRunnable(false);
+ }
+ }
+
+ private:
+ Thread* const self_;
+};
+
std::ostream& operator<<(std::ostream& os, const Thread& thread);
std::ostream& operator<<(std::ostream& os, const StackedShadowFrameType& thread);
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index 419ecec696..688514cd76 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -405,6 +405,8 @@ size_t ThreadList::FlipThreadRoots(Closure* thread_flip_visitor,
Locks::thread_suspend_count_lock_->AssertNotHeld(self);
CHECK_NE(self->GetState(), kRunnable);
+ collector->GetHeap()->ThreadFlipBegin(self); // Sync with JNI critical calls.
+
SuspendAllInternal(self, self, nullptr);
// Run the flip callback for the collector.
@@ -414,26 +416,31 @@ size_t ThreadList::FlipThreadRoots(Closure* thread_flip_visitor,
collector->RegisterPause(NanoTime() - start_time);
// Resume runnable threads.
- std::vector<Thread*> runnable_threads;
+ size_t runnable_thread_count = 0;
std::vector<Thread*> other_threads;
{
+ TimingLogger::ScopedTiming split2("ResumeRunnableThreads", collector->GetTimings());
MutexLock mu(self, *Locks::thread_list_lock_);
MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
--suspend_all_count_;
for (const auto& thread : list_) {
+ // Set the flip function for all threads because Thread::DumpState/DumpJavaStack() (invoked by
+ // a checkpoint) may cause the flip function to be run for a runnable/suspended thread before
+ // a runnable thread runs it for itself or we run it for a suspended thread below.
+ thread->SetFlipFunction(thread_flip_visitor);
if (thread == self) {
continue;
}
- // Set the flip function for both runnable and suspended threads
- // because Thread::DumpState/DumpJavaStack() (invoked by a
- // checkpoint) may cause the flip function to be run for a
- // runnable/suspended thread before a runnable threads runs it
- // for itself or we run it for a suspended thread below.
- thread->SetFlipFunction(thread_flip_visitor);
- if (thread->IsSuspendedAtSuspendCheck()) {
+ // Resume early the threads that were runnable but are suspended just for this thread flip or
+ // about to transition from non-runnable (eg. kNative at the SOA entry in a JNI function) to
+ // runnable (both cases waiting inside Thread::TransitionFromSuspendedToRunnable), or waiting
+ // for the thread flip to end at the JNI critical section entry (kWaitingForGcThreadFlip),
+ ThreadState state = thread->GetState();
+ if (state == kWaitingForGcThreadFlip ||
+ thread->IsTransitioningToRunnable()) {
// The thread will resume right after the broadcast.
thread->ModifySuspendCount(self, -1, nullptr, false);
- runnable_threads.push_back(thread);
+ ++runnable_thread_count;
} else {
other_threads.push_back(thread);
}
@@ -441,8 +448,11 @@ size_t ThreadList::FlipThreadRoots(Closure* thread_flip_visitor,
Thread::resume_cond_->Broadcast(self);
}
+ collector->GetHeap()->ThreadFlipEnd(self);
+
// Run the closure on the other threads and let them resume.
{
+ TimingLogger::ScopedTiming split3("FlipOtherThreads", collector->GetTimings());
ReaderMutexLock mu(self, *Locks::mutator_lock_);
for (const auto& thread : other_threads) {
Closure* flip_func = thread->GetFlipFunction();
@@ -451,11 +461,15 @@ size_t ThreadList::FlipThreadRoots(Closure* thread_flip_visitor,
}
}
// Run it for self.
- thread_flip_visitor->Run(self);
+ Closure* flip_func = self->GetFlipFunction();
+ if (flip_func != nullptr) {
+ flip_func->Run(self);
+ }
}
// Resume other threads.
{
+ TimingLogger::ScopedTiming split4("ResumeOtherThreads", collector->GetTimings());
MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
for (const auto& thread : other_threads) {
thread->ModifySuspendCount(self, -1, nullptr, false);
@@ -463,7 +477,7 @@ size_t ThreadList::FlipThreadRoots(Closure* thread_flip_visitor,
Thread::resume_cond_->Broadcast(self);
}
- return runnable_threads.size() + other_threads.size() + 1; // +1 for self.
+ return runnable_thread_count + other_threads.size() + 1; // +1 for self.
}
void ThreadList::SuspendAll(const char* cause, bool long_suspend) {
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 09f969e85e..9e18750396 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -559,12 +559,9 @@ TEST_ART_BROKEN_INTERPRETER_READ_BARRIER_RUN_TESTS :=
# more parallel moves on x86, thus some Checker assertions may fail.
# 527: On ARM64 and ARM, the read barrier instrumentation does not support the HIntermediateAddress
# instruction yet (b/26601270).
-# 537: Expects an array copy to be intrinsified on x86-64, but calling-on-slowpath intrinsics are
-# not yet handled in the read barrier configuration.
TEST_ART_BROKEN_OPTIMIZING_READ_BARRIER_RUN_TESTS := \
484-checker-register-hints \
- 527-checker-array-access-split \
- 537-checker-arraycopy
+ 527-checker-array-access-split
# Tests that should fail in the read barrier configuration with JIT (Optimizing compiler).
TEST_ART_BROKEN_JIT_READ_BARRIER_RUN_TESTS :=
diff --git a/tools/javafuzz/README.md b/tools/javafuzz/README.md
index ca8532ae72..35c057c5bb 100644
--- a/tools/javafuzz/README.md
+++ b/tools/javafuzz/README.md
@@ -1,12 +1,12 @@
JavaFuzz
========
-JavaFuzz is tool for generating random Java programs with the objective of
-fuzz testing the ART infrastructure. Each randomly generated Java program
+JavaFuzz is a tool for generating random Java programs with the objective
+of fuzz testing the ART infrastructure. Each randomly generated Java program
can be run under various modes of execution, such as using the interpreter,
using the optimizing compiler, using an external reference implementation,
or using various target architectures. Any difference between the outputs
-(a divergence) may indicate a bug in one of the execution modes.
+(**divergence**) may indicate a bug in one of the execution modes.
JavaFuzz can be combined with dexfuzz to get multilayered fuzz testing.
@@ -36,6 +36,24 @@ a fixed testing class named Test. So a typical test run looks as follows.
jack -cp ${JACK_CLASSPATH} --output-dex . Test.java
art -classpath classes.dex Test
+How to start the JavaFuzz tests
+===============================
+
+ run_java_fuzz_test.py [--num_tests]
+ [--mode1=mode] [--mode2=mode]
+
+where
+
+ --num_tests: number of tests to run (10000 by default)
+ --mode1:m1
+ --mode2:m2
+ with m1 != m2, and one of
+ ri : reference implementation on host (default for m1)
+ hint : Art interpreter on host
+ hopt : Art optimizing on host (default for m2)
+ tint : Art interpreter on target
+ topt : Art optimizing on target
+
Background
==========
diff --git a/tools/javafuzz/run_java_fuzz_test.py b/tools/javafuzz/run_java_fuzz_test.py
new file mode 100755
index 0000000000..4f192e7c44
--- /dev/null
+++ b/tools/javafuzz/run_java_fuzz_test.py
@@ -0,0 +1,406 @@
+#!/usr/bin/env python2
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import abc
+import argparse
+import subprocess
+import sys
+import os
+
+from tempfile import mkdtemp
+from threading import Timer
+
+# Normalized return codes.
+EXIT_SUCCESS = 0
+EXIT_TIMEOUT = 1
+EXIT_NOTCOMPILED = 2
+EXIT_NOTRUN = 3
+
+#
+# Utility methods.
+#
+
+def RunCommand(cmd, args, out, err, timeout = 5):
+ """Executes a command, and returns its return code.
+
+ Args:
+ cmd: string, a command to execute
+ args: string, arguments to pass to command (or None)
+ out: string, file name to open for stdout (or None)
+ err: string, file name to open for stderr (or None)
+ timeout: int, time out in seconds
+ Returns:
+ return code of running command (forced EXIT_TIMEOUT on timeout)
+ """
+ cmd = 'exec ' + cmd # preserve pid
+ if args != None:
+ cmd = cmd + ' ' + args
+ outf = None
+ if out != None:
+ outf = open(out, mode='w')
+ errf = None
+ if err != None:
+ errf = open(err, mode='w')
+ proc = subprocess.Popen(cmd, stdout=outf, stderr=errf, shell=True)
+ timer = Timer(timeout, proc.kill) # enforces timeout
+ timer.start()
+ proc.communicate()
+ if timer.is_alive():
+ timer.cancel()
+ returncode = proc.returncode
+ else:
+ returncode = EXIT_TIMEOUT
+ if outf != None:
+ outf.close()
+ if errf != None:
+ errf.close()
+ return returncode
+
+def GetJackClassPath():
+ """Returns Jack's classpath."""
+ top = os.environ.get('ANDROID_BUILD_TOP')
+ if top == None:
+ raise FatalError('Cannot find AOSP build top')
+ libdir = top + '/out/host/common/obj/JAVA_LIBRARIES'
+ return libdir + '/core-libart-hostdex_intermediates/classes.jack:' \
+ + libdir + '/core-oj-hostdex_intermediates/classes.jack'
+
+def GetExecutionModeRunner(mode):
+ """Returns a runner for the given execution mode.
+
+ Args:
+ mode: string, execution mode
+ Returns:
+ TestRunner with given execution mode
+ Raises:
+ FatalError: error for unknown execution mode
+ """
+ if mode == 'ri':
+ return TestRunnerRIOnHost()
+ if mode == 'hint':
+ return TestRunnerArtOnHost(True)
+ if mode == 'hopt':
+ return TestRunnerArtOnHost(False)
+ if mode == 'tint':
+ return TestRunnerArtOnTarget(True)
+ if mode == 'topt':
+ return TestRunnerArtOnTarget(False)
+ raise FatalError('Unknown execution mode')
+
+def GetReturnCode(retc):
+ """Returns a string representation of the given normalized return code.
+ Args:
+ retc: int, normalized return code
+ Returns:
+ string representation of normalized return code
+ Raises:
+ FatalError: error for unknown normalized return code
+ """
+ if retc == EXIT_SUCCESS:
+ return 'SUCCESS'
+ if retc == EXIT_TIMEOUT:
+ return 'TIMED-OUT'
+ if retc == EXIT_NOTCOMPILED:
+ return 'NOT-COMPILED'
+ if retc == EXIT_NOTRUN:
+ return 'NOT-RUN'
+ raise FatalError('Unknown normalized return code')
+
+#
+# Execution mode classes.
+#
+
+class TestRunner(object):
+ """Abstraction for running a test in a particular execution mode."""
+ __meta_class__ = abc.ABCMeta
+
+ def GetDescription(self):
+ """Returns a description string of the execution mode."""
+ return self._description
+
+ def GetId(self):
+ """Returns a short string that uniquely identifies the execution mode."""
+ return self._id
+
+ @abc.abstractmethod
+ def CompileAndRunTest(self):
+ """Compile and run the generated test.
+
+ Ensures that the current Test.java in the temporary directory is compiled
+ and executed under the current execution mode. On success, transfers the
+ generated output to the file GetId()_out.txt in the temporary directory.
+ Cleans up after itself.
+
+ Most nonzero return codes are assumed non-divergent, since systems may
+ exit in different ways. This is enforced by normalizing return codes.
+
+ Returns:
+ normalized return code
+ """
+ pass
+
+class TestRunnerRIOnHost(TestRunner):
+ """Concrete test runner of the reference implementation on host."""
+
+ def __init__(self):
+ """Constructor for the RI tester."""
+ self._description = 'RI on host'
+ self._id = 'RI'
+
+ def CompileAndRunTest(self):
+ if RunCommand('javac', 'Test.java',
+ out=None, err=None, timeout=30) == EXIT_SUCCESS:
+ retc = RunCommand('java', 'Test', 'RI_run_out.txt', err=None)
+ if retc != EXIT_SUCCESS and retc != EXIT_TIMEOUT:
+ retc = EXIT_NOTRUN
+ else:
+ retc = EXIT_NOTCOMPILED
+ # Cleanup and return.
+ RunCommand('rm', '-f Test.class', out=None, err=None)
+ return retc
+
+class TestRunnerArtOnHost(TestRunner):
+ """Concrete test runner of Art on host (interpreter or optimizing)."""
+
+ def __init__(self, interpreter):
+ """Constructor for the Art on host tester.
+
+ Args:
+ interpreter: boolean, selects between interpreter or optimizing
+ """
+ self._art_args = '-cp classes.dex Test'
+ if interpreter:
+ self._description = 'Art interpreter on host'
+ self._id = 'HInt'
+ self._art_args = '-Xint ' + self._art_args
+ else:
+ self._description = 'Art optimizing on host'
+ self._id = 'HOpt'
+ self._jack_args = '-cp ' + GetJackClassPath() + ' --output-dex . Test.java'
+
+ def CompileAndRunTest(self):
+ if RunCommand('jack', self._jack_args,
+ out=None, err='jackerr.txt', timeout=30) == EXIT_SUCCESS:
+ out = self.GetId() + '_run_out.txt'
+ retc = RunCommand('art', self._art_args, out, 'arterr.txt')
+ if retc != EXIT_SUCCESS and retc != EXIT_TIMEOUT:
+ retc = EXIT_NOTRUN
+ else:
+ retc = EXIT_NOTCOMPILED
+ # Cleanup and return.
+ RunCommand('rm', '-rf classes.dex jackerr.txt arterr.txt android-data*',
+ out=None, err=None)
+ return retc
+
+# TODO: very rough first version without proper cache,
+# reuse staszkiewicz' module for properly setting up dalvikvm on target.
+class TestRunnerArtOnTarget(TestRunner):
+ """Concrete test runner of Art on target (interpreter or optimizing)."""
+
+ def __init__(self, interpreter):
+ """Constructor for the Art on target tester.
+
+ Args:
+ interpreter: boolean, selects between interpreter or optimizing
+ """
+ self._dalvik_args = '-cp /data/local/tmp/classes.dex Test'
+ if interpreter:
+ self._description = 'Art interpreter on target'
+ self._id = 'TInt'
+ self._dalvik_args = '-Xint ' + self._dalvik_args
+ else:
+ self._description = 'Art optimizing on target'
+ self._id = 'TOpt'
+ self._jack_args = '-cp ' + GetJackClassPath() + ' --output-dex . Test.java'
+
+ def CompileAndRunTest(self):
+ if RunCommand('jack', self._jack_args,
+ out=None, err='jackerr.txt', timeout=30) == EXIT_SUCCESS:
+ if RunCommand('adb push', 'classes.dex /data/local/tmp/',
+ 'adb.txt', err=None) != EXIT_SUCCESS:
+ raise FatalError('Cannot push to target device')
+ out = self.GetId() + '_run_out.txt'
+ retc = RunCommand('adb shell dalvikvm', self._dalvik_args, out, err=None)
+ if retc != EXIT_SUCCESS and retc != EXIT_TIMEOUT:
+ retc = EXIT_NOTRUN
+ else:
+ retc = EXIT_NOTCOMPILED
+ # Cleanup and return.
+ RunCommand('rm', '-f classes.dex jackerr.txt adb.txt',
+ out=None, err=None)
+ RunCommand('adb shell', 'rm -f /data/local/tmp/classes.dex',
+ out=None, err=None)
+ return retc
+
+#
+# Tester classes.
+#
+
+class FatalError(Exception):
+ """Fatal error in the tester."""
+ pass
+
+class JavaFuzzTester(object):
+ """Tester that runs JavaFuzz many times and report divergences."""
+
+ def __init__(self, num_tests, mode1, mode2):
+ """Constructor for the tester.
+
+ Args:
+ num_tests: int, number of tests to run
+ mode1: string, execution mode for first runner
+ mode2: string, execution mode for second runner
+ """
+ self._num_tests = num_tests
+ self._runner1 = GetExecutionModeRunner(mode1)
+ self._runner2 = GetExecutionModeRunner(mode2)
+ self._save_dir = None
+ self._tmp_dir = None
+ # Statistics.
+ self._test = 0
+ self._num_success = 0
+ self._num_not_compiled = 0
+ self._num_not_run = 0
+ self._num_timed_out = 0
+ self._num_divergences = 0
+
+ def __enter__(self):
+ """On entry, enters new temp directory after saving current directory.
+
+ Raises:
+ FatalError: error when temp directory cannot be constructed
+ """
+ self._save_dir = os.getcwd()
+ self._tmp_dir = mkdtemp(dir="/tmp/")
+ if self._tmp_dir == None:
+ raise FatalError('Cannot obtain temp directory')
+ os.chdir(self._tmp_dir)
+ return self
+
+ def __exit__(self, etype, evalue, etraceback):
+ """On exit, re-enters previously saved current directory and cleans up."""
+ os.chdir(self._save_dir)
+ if self._num_divergences == 0:
+ RunCommand('rm', '-rf ' + self._tmp_dir, out=None, err=None)
+
+ def Run(self):
+ """Runs JavaFuzz many times and report divergences."""
+ print
+ print '**\n**** JavaFuzz Testing\n**'
+ print
+ print '#Tests :', self._num_tests
+ print 'Directory :', self._tmp_dir
+ print 'Exec-mode1:', self._runner1.GetDescription()
+ print 'Exec-mode2:', self._runner2.GetDescription()
+ print
+ self.ShowStats()
+ for self._test in range(1, self._num_tests + 1):
+ self.RunJavaFuzzTest()
+ self.ShowStats()
+ if self._num_divergences == 0:
+ print '\n\nsuccess (no divergences)\n'
+ else:
+ print '\n\nfailure (divergences)\n'
+
+ def ShowStats(self):
+ """Shows current statistics (on same line) while tester is running."""
+ print '\rTests:', self._test, \
+ 'Success:', self._num_success, \
+ 'Not-compiled:', self._num_not_compiled, \
+ 'Not-run:', self._num_not_run, \
+ 'Timed-out:', self._num_timed_out, \
+ 'Divergences:', self._num_divergences,
+ sys.stdout.flush()
+
+ def RunJavaFuzzTest(self):
+ """Runs a single JavaFuzz test, comparing two execution modes."""
+ self.ConstructTest()
+ retc1 = self._runner1.CompileAndRunTest()
+ retc2 = self._runner2.CompileAndRunTest()
+ self.CheckForDivergence(retc1, retc2)
+ self.CleanupTest()
+
+ def ConstructTest(self):
+ """Use JavaFuzz to generate next Test.java test.
+
+ Raises:
+ FatalError: error when javafuzz fails
+ """
+ if RunCommand('javafuzz', args=None,
+ out='Test.java', err=None) != EXIT_SUCCESS:
+ raise FatalError('Unexpected error while running JavaFuzz')
+
+ def CheckForDivergence(self, retc1, retc2):
+ """Checks for divergences and updates statistics.
+
+ Args:
+ retc1: int, normalized return code of first runner
+ retc2: int, normalized return code of second runner
+ """
+ if retc1 == retc2:
+ # Non-divergent in return code.
+ if retc1 == EXIT_SUCCESS:
+ # Both compilations and runs were successful, inspect generated output.
+ args = self._runner1.GetId() + '_run_out.txt ' \
+ + self._runner2.GetId() + '_run_out.txt'
+ if RunCommand('diff', args, out=None, err=None) != EXIT_SUCCESS:
+ self.ReportDivergence('divergence in output')
+ else:
+ self._num_success += 1
+ elif retc1 == EXIT_TIMEOUT:
+ self._num_timed_out += 1
+ elif retc1 == EXIT_NOTCOMPILED:
+ self._num_not_compiled += 1
+ else:
+ self._num_not_run += 1
+ else:
+ # Divergent in return code.
+ self.ReportDivergence('divergence in return code: ' +
+ GetReturnCode(retc1) + ' vs. ' +
+ GetReturnCode(retc2))
+
+ def ReportDivergence(self, reason):
+ """Reports and saves a divergence."""
+ self._num_divergences += 1
+ print '\n', self._test, reason
+ # Save.
+ ddir = 'divergence' + str(self._test)
+ RunCommand('mkdir', ddir, out=None, err=None)
+ RunCommand('mv', 'Test.java *.txt ' + ddir, out=None, err=None)
+
+ def CleanupTest(self):
+ """Cleans up after a single test run."""
+ RunCommand('rm', '-f Test.java *.txt', out=None, err=None)
+
+
+def main():
+ # Handle arguments.
+ parser = argparse.ArgumentParser()
+ parser.add_argument('--num_tests', default=10000,
+ type=int, help='number of tests to run')
+ parser.add_argument('--mode1', default='ri',
+ help='execution mode 1 (default: ri)')
+ parser.add_argument('--mode2', default='hopt',
+ help='execution mode 2 (default: hopt)')
+ args = parser.parse_args()
+ if args.mode1 == args.mode2:
+ raise FatalError("Identical execution modes given")
+ # Run the JavaFuzz tester.
+ with JavaFuzzTester(args.num_tests, args.mode1, args.mode2) as fuzzer:
+ fuzzer.Run()
+
+if __name__ == "__main__":
+ main()