diff options
279 files changed, 11071 insertions, 3601 deletions
diff --git a/benchmark/Android.mk b/benchmark/Android.mk index 09aca98337..a4a603ad04 100644 --- a/benchmark/Android.mk +++ b/benchmark/Android.mk @@ -19,6 +19,7 @@ LOCAL_PATH := $(call my-dir) include art/build/Android.common_build.mk LIBARTBENCHMARK_COMMON_SRC_FILES := \ + jobject-benchmark/jobject_benchmark.cc \ jni-perf/perf_jni.cc \ scoped-primitive-array/scoped_primitive_array.cc diff --git a/benchmark/jobject-benchmark/info.txt b/benchmark/jobject-benchmark/info.txt new file mode 100644 index 0000000000..f2a256a3e6 --- /dev/null +++ b/benchmark/jobject-benchmark/info.txt @@ -0,0 +1,7 @@ +Benchmark for jobject functions + +Measures performance of: +Add/RemoveLocalRef +Add/RemoveGlobalRef +Add/RemoveWeakGlobalRef +Decoding local, weak, global, handle scope jobjects. diff --git a/benchmark/jobject-benchmark/jobject_benchmark.cc b/benchmark/jobject-benchmark/jobject_benchmark.cc new file mode 100644 index 0000000000..e7ca9ebc1e --- /dev/null +++ b/benchmark/jobject-benchmark/jobject_benchmark.cc @@ -0,0 +1,103 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "jni.h" + +#include "mirror/class-inl.h" +#include "scoped_thread_state_change.h" + +namespace art { +namespace { + +extern "C" JNIEXPORT void JNICALL Java_JObjectBenchmark_timeAddRemoveLocal( + JNIEnv* env, jobject jobj, jint reps) { + ScopedObjectAccess soa(env); + mirror::Object* obj = soa.Decode<mirror::Object*>(jobj); + CHECK(obj != nullptr); + for (jint i = 0; i < reps; ++i) { + jobject ref = soa.Env()->AddLocalReference<jobject>(obj); + soa.Env()->DeleteLocalRef(ref); + } +} + +extern "C" JNIEXPORT void JNICALL Java_JObjectBenchmark_timeDecodeLocal( + JNIEnv* env, jobject jobj, jint reps) { + ScopedObjectAccess soa(env); + mirror::Object* obj = soa.Decode<mirror::Object*>(jobj); + CHECK(obj != nullptr); + jobject ref = soa.Env()->AddLocalReference<jobject>(obj); + for (jint i = 0; i < reps; ++i) { + CHECK_EQ(soa.Decode<mirror::Object*>(ref), obj); + } + soa.Env()->DeleteLocalRef(ref); +} + +extern "C" JNIEXPORT void JNICALL Java_JObjectBenchmark_timeAddRemoveGlobal( + JNIEnv* env, jobject jobj, jint reps) { + ScopedObjectAccess soa(env); + mirror::Object* obj = soa.Decode<mirror::Object*>(jobj); + CHECK(obj != nullptr); + for (jint i = 0; i < reps; ++i) { + jobject ref = soa.Vm()->AddGlobalRef(soa.Self(), obj); + soa.Vm()->DeleteGlobalRef(soa.Self(), ref); + } +} + +extern "C" JNIEXPORT void JNICALL Java_JObjectBenchmark_timeDecodeGlobal( + JNIEnv* env, jobject jobj, jint reps) { + ScopedObjectAccess soa(env); + mirror::Object* obj = soa.Decode<mirror::Object*>(jobj); + CHECK(obj != nullptr); + jobject ref = soa.Vm()->AddGlobalRef(soa.Self(), obj); + for (jint i = 0; i < reps; ++i) { + CHECK_EQ(soa.Decode<mirror::Object*>(ref), obj); + } + soa.Vm()->DeleteGlobalRef(soa.Self(), ref); +} + +extern "C" JNIEXPORT void JNICALL Java_JObjectBenchmark_timeAddRemoveWeakGlobal( + JNIEnv* env, jobject jobj, jint reps) { + ScopedObjectAccess soa(env); + mirror::Object* obj = soa.Decode<mirror::Object*>(jobj); + CHECK(obj != nullptr); + for (jint i = 0; i < reps; ++i) { + jobject ref = soa.Vm()->AddWeakGlobalRef(soa.Self(), obj); + soa.Vm()->DeleteWeakGlobalRef(soa.Self(), ref); + } +} + +extern "C" JNIEXPORT void JNICALL Java_JObjectBenchmark_timeDecodeWeakGlobal( + JNIEnv* env, jobject jobj, jint reps) { + ScopedObjectAccess soa(env); + mirror::Object* obj = soa.Decode<mirror::Object*>(jobj); + CHECK(obj != nullptr); + jobject ref = soa.Vm()->AddWeakGlobalRef(soa.Self(), obj); + for (jint i = 0; i < reps; ++i) { + CHECK_EQ(soa.Decode<mirror::Object*>(ref), obj); + } + soa.Vm()->DeleteWeakGlobalRef(soa.Self(), ref); +} + +extern "C" JNIEXPORT void JNICALL Java_JObjectBenchmark_timeDecodeHandleScopeRef( + JNIEnv* env, jobject jobj, jint reps) { + ScopedObjectAccess soa(env); + for (jint i = 0; i < reps; ++i) { + soa.Decode<mirror::Object*>(jobj); + } +} + +} // namespace +} // namespace art diff --git a/benchmark/jobject-benchmark/src/JObjectBenchmark.java b/benchmark/jobject-benchmark/src/JObjectBenchmark.java new file mode 100644 index 0000000000..f4c059c58b --- /dev/null +++ b/benchmark/jobject-benchmark/src/JObjectBenchmark.java @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import com.google.caliper.SimpleBenchmark; + +public class JObjectBenchmark extends SimpleBenchmark { + public JObjectBenchmark() { + // Make sure to link methods before benchmark starts. + System.loadLibrary("artbenchmark"); + timeAddRemoveLocal(1); + timeDecodeLocal(1); + timeAddRemoveGlobal(1); + timeDecodeGlobal(1); + timeAddRemoveWeakGlobal(1); + timeDecodeWeakGlobal(1); + timeDecodeHandleScopeRef(1); + } + + public native void timeAddRemoveLocal(int reps); + public native void timeDecodeLocal(int reps); + public native void timeAddRemoveGlobal(int reps); + public native void timeDecodeGlobal(int reps); + public native void timeAddRemoveWeakGlobal(int reps); + public native void timeDecodeWeakGlobal(int reps); + public native void timeDecodeHandleScopeRef(int reps); +} diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk index a4434872da..b50712429e 100644 --- a/build/Android.common_build.mk +++ b/build/Android.common_build.mk @@ -295,18 +295,22 @@ else art_cflags += -DIMT_SIZE=64 endif -ifeq ($(ART_USE_OPTIMIZING_COMPILER),true) - art_cflags += -DART_USE_OPTIMIZING_COMPILER=1 -endif - ifeq ($(ART_HEAP_POISONING),true) art_cflags += -DART_HEAP_POISONING=1 art_asflags += -DART_HEAP_POISONING=1 endif +# +# Used to change the read barrier type. Valid values are BAKER, BROOKS, TABLELOOKUP. +# The default is BAKER. +# +ART_READ_BARRIER_TYPE ?= BAKER + ifeq ($(ART_USE_READ_BARRIER),true) art_cflags += -DART_USE_READ_BARRIER=1 + art_cflags += -DART_READ_BARRIER_TYPE_IS_$(ART_READ_BARRIER_TYPE)=1 art_asflags += -DART_USE_READ_BARRIER=1 + art_asflags += -DART_READ_BARRIER_TYPE_IS_$(ART_READ_BARRIER_TYPE)=1 endif ifeq ($(ART_USE_TLAB),true) @@ -396,7 +400,6 @@ art_debug_cflags := art_non_debug_cflags := art_host_non_debug_cflags := art_target_non_debug_cflags := -art_default_gc_type := art_default_gc_type_cflags := ART_HOST_LDLIBS := diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk index 71a55bb111..9775f6a5d7 100644 --- a/build/Android.gtest.mk +++ b/build/Android.gtest.mk @@ -351,6 +351,7 @@ COMPILER_GTEST_HOST_SRC_FILES_mips := \ COMPILER_GTEST_HOST_SRC_FILES_mips64 := \ $(COMPILER_GTEST_COMMON_SRC_FILES_mips64) \ + compiler/utils/mips64/assembler_mips64_test.cc \ COMPILER_GTEST_HOST_SRC_FILES_x86 := \ $(COMPILER_GTEST_COMMON_SRC_FILES_x86) \ diff --git a/build/Android.oat.mk b/build/Android.oat.mk index 6e17ed38d6..3272c27f2b 100644 --- a/build/Android.oat.mk +++ b/build/Android.oat.mk @@ -52,18 +52,12 @@ define create-core-oat-host-rules core_pic_infix := core_dex2oat_dependency := $(DEX2OAT_DEPENDENCY) - # With the optimizing compiler, we want to rerun dex2oat whenever there is - # a dex2oat change to catch regressions early. - ifeq ($(ART_USE_OPTIMIZING_COMPILER), true) - core_dex2oat_dependency := $(DEX2OAT) - endif - ifeq ($(1),default) core_compile_options += --compiler-backend=Quick endif ifeq ($(1),optimizing) core_compile_options += --compiler-backend=Optimizing - core_dex2oat_dependency := $(DEX2OAT) + core_dex2oat_dependency += $(DEX2OAT) core_infix := -optimizing endif ifeq ($(1),interpreter) @@ -178,17 +172,13 @@ define create-core-oat-target-rules core_pic_infix := core_dex2oat_dependency := $(DEX2OAT_DEPENDENCY) - # With the optimizing compiler, we want to rerun dex2oat whenever there is - # a dex2oat change to catch regressions early. - ifeq ($(ART_USE_OPTIMIZING_COMPILER), true) - core_dex2oat_dependency := $(DEX2OAT) - endif - ifeq ($(1),default) core_compile_options += --compiler-backend=Quick endif ifeq ($(1),optimizing) core_compile_options += --compiler-backend=Optimizing + # With the optimizing compiler, we want to rerun dex2oat whenever there is + # a dex2oat change to catch regressions early. core_dex2oat_dependency := $(DEX2OAT) core_infix := -optimizing endif diff --git a/compiler/Android.mk b/compiler/Android.mk index 41e9744777..96e13ac9a3 100644 --- a/compiler/Android.mk +++ b/compiler/Android.mk @@ -158,6 +158,7 @@ LIBART_COMPILER_SRC_FILES_mips64 := \ $(LIBART_COMPILER_SRC_FILES_mips) \ jni/quick/mips64/calling_convention_mips64.cc \ optimizing/code_generator_mips64.cc \ + optimizing/intrinsics_mips64.cc \ utils/mips64/assembler_mips64.cc \ utils/mips64/managed_register_mips64.cc \ diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h index dc2bc5c3f4..67b4428324 100644 --- a/compiler/common_compiler_test.h +++ b/compiler/common_compiler_test.h @@ -92,7 +92,7 @@ class CommonCompilerTest : public CommonRuntimeTest { void UnreserveImageSpace(); - Compiler::Kind compiler_kind_ = kUseOptimizingCompiler ? Compiler::kOptimizing : Compiler::kQuick; + Compiler::Kind compiler_kind_ = Compiler::kOptimizing; std::unique_ptr<CompilerOptions> compiler_options_; std::unique_ptr<VerificationResults> verification_results_; std::unique_ptr<DexFileToMethodInlinerMap> method_inliner_map_; diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc index 7082bedc5e..d5ac34186b 100644 --- a/compiler/dex/quick/codegen_util.cc +++ b/compiler/dex/quick/codegen_util.cc @@ -1126,7 +1126,7 @@ CompiledMethod* Mir2Lir::GetCompiledMethod() { for (size_t i = 0 ; i < core_vmap_table_.size(); ++i) { // Copy, stripping out the phys register sort key. vmap_encoder.PushBackUnsigned( - ~(-1 << VREG_NUM_WIDTH) & (core_vmap_table_[i] + VmapTable::kEntryAdjustment)); + ~(~0u << VREG_NUM_WIDTH) & (core_vmap_table_[i] + VmapTable::kEntryAdjustment)); } // Push a marker to take place of lr. vmap_encoder.PushBackUnsigned(VmapTable::kAdjustedFpMarker); @@ -1141,7 +1141,7 @@ CompiledMethod* Mir2Lir::GetCompiledMethod() { for (size_t i = 0 ; i < fp_vmap_table_.size(); ++i) { // Copy, stripping out the phys register sort key. vmap_encoder.PushBackUnsigned( - ~(-1 << VREG_NUM_WIDTH) & (fp_vmap_table_[i] + VmapTable::kEntryAdjustment)); + ~(~0u << VREG_NUM_WIDTH) & (fp_vmap_table_[i] + VmapTable::kEntryAdjustment)); } } } else { diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc index 1f114cf336..3c5c2fe010 100755 --- a/compiler/dex/quick/gen_invoke.cc +++ b/compiler/dex/quick/gen_invoke.cc @@ -148,7 +148,7 @@ void Mir2Lir::CallRuntimeHelperImmRegLocation(QuickEntrypointEnum trampoline, in if (arg1.wide == 0) { LoadValueDirectFixed(arg1, TargetReg(kArg1, arg1)); } else { - RegStorage r_tmp = TargetReg(cu_->instruction_set == kMips ? kArg2 : kArg1, kWide); + RegStorage r_tmp = TargetReg(kArg2, kWide); LoadValueDirectWideFixed(arg1, r_tmp); } LoadConstant(TargetReg(kArg0, kNotWide), arg0); diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc index b5ecf9c418..1cd742abac 100644 --- a/compiler/dex/quick/quick_compiler.cc +++ b/compiler/dex/quick/quick_compiler.cc @@ -391,9 +391,9 @@ static int kAllOpcodes[] = { Instruction::IGET_SHORT_QUICK, Instruction::INVOKE_LAMBDA, Instruction::UNUSED_F4, - Instruction::UNUSED_F5, + Instruction::CAPTURE_VARIABLE, Instruction::CREATE_LAMBDA, - Instruction::UNUSED_F7, + Instruction::LIBERATE_VARIABLE, Instruction::BOX_LAMBDA, Instruction::UNBOX_LAMBDA, Instruction::UNUSED_FA, diff --git a/compiler/dex/ssa_transformation.cc b/compiler/dex/ssa_transformation.cc index 939bf40564..6ed666b9f7 100644 --- a/compiler/dex/ssa_transformation.cc +++ b/compiler/dex/ssa_transformation.cc @@ -535,37 +535,76 @@ void MIRGraph::DoDFSPreOrderSSARename(BasicBlock* block) { if (block->visited || block->hidden) { return; } - block->visited = true; - /* Process this block */ - DoSSAConversion(block); + typedef struct { + BasicBlock* bb; + int32_t* ssa_map; + } BasicBlockInfo; + BasicBlockInfo temp; - /* Save SSA map snapshot */ ScopedArenaAllocator allocator(&cu_->arena_stack); + ScopedArenaVector<BasicBlockInfo> bi_stack(allocator.Adapter()); + ScopedArenaVector<BasicBlock*> succ_stack(allocator.Adapter()); + uint32_t num_vregs = GetNumOfCodeAndTempVRs(); - int32_t* saved_ssa_map = allocator.AllocArray<int32_t>(num_vregs, kArenaAllocDalvikToSSAMap); - size_t map_size = sizeof(saved_ssa_map[0]) * num_vregs; - memcpy(saved_ssa_map, vreg_to_ssa_map_, map_size); - - if (block->fall_through != NullBasicBlockId) { - DoDFSPreOrderSSARename(GetBasicBlock(block->fall_through)); - /* Restore SSA map snapshot */ - memcpy(vreg_to_ssa_map_, saved_ssa_map, map_size); - } - if (block->taken != NullBasicBlockId) { - DoDFSPreOrderSSARename(GetBasicBlock(block->taken)); - /* Restore SSA map snapshot */ - memcpy(vreg_to_ssa_map_, saved_ssa_map, map_size); - } - if (block->successor_block_list_type != kNotUsed) { - for (SuccessorBlockInfo* successor_block_info : block->successor_blocks) { - BasicBlock* succ_bb = GetBasicBlock(successor_block_info->block); - DoDFSPreOrderSSARename(succ_bb); - /* Restore SSA map snapshot */ - memcpy(vreg_to_ssa_map_, saved_ssa_map, map_size); + size_t map_size = sizeof(int32_t) * num_vregs; + temp.bb = block; + temp.ssa_map = vreg_to_ssa_map_; + bi_stack.push_back(temp); + + while (!bi_stack.empty()) { + temp = bi_stack.back(); + bi_stack.pop_back(); + BasicBlock* b = temp.bb; + + if (b->visited || b->hidden) { + continue; + } + b->visited = true; + + /* Restore SSA map snapshot, except for the first block */ + if (b != block) { + memcpy(vreg_to_ssa_map_, temp.ssa_map, map_size); + } + + /* Process this block */ + DoSSAConversion(b); + + /* If there are no successor, taken, and fall through blocks, continue */ + if (b->successor_block_list_type == kNotUsed && + b->taken == NullBasicBlockId && + b->fall_through == NullBasicBlockId) { + continue; + } + + /* Save SSA map snapshot */ + int32_t* saved_ssa_map = + allocator.AllocArray<int32_t>(num_vregs, kArenaAllocDalvikToSSAMap); + memcpy(saved_ssa_map, vreg_to_ssa_map_, map_size); + + if (b->successor_block_list_type != kNotUsed) { + for (SuccessorBlockInfo* successor_block_info : b->successor_blocks) { + BasicBlock* succ_bb = GetBasicBlock(successor_block_info->block); + succ_stack.push_back(succ_bb); + } + while (!succ_stack.empty()) { + temp.bb = succ_stack.back(); + succ_stack.pop_back(); + temp.ssa_map = saved_ssa_map; + bi_stack.push_back(temp); + } + } + if (b->taken != NullBasicBlockId) { + temp.bb = GetBasicBlock(b->taken); + temp.ssa_map = saved_ssa_map; + bi_stack.push_back(temp); + } + if (b->fall_through != NullBasicBlockId) { + temp.bb = GetBasicBlock(b->fall_through); + temp.ssa_map = saved_ssa_map; + bi_stack.push_back(temp); } } - return; } } // namespace art diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index 59f3749c8c..960f4d9b7c 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -14,8 +14,11 @@ * limitations under the License. */ -#include "base/arena_containers.h" #include "bounds_check_elimination.h" + +#include <limits> + +#include "base/arena_containers.h" #include "induction_var_range.h" #include "nodes.h" @@ -48,11 +51,11 @@ class ValueBound : public ValueObject { if (right == 0) { return false; } - if ((right > 0) && (left <= INT_MAX - right)) { + if ((right > 0) && (left <= (std::numeric_limits<int32_t>::max() - right))) { // No overflow. return false; } - if ((right < 0) && (left >= INT_MIN - right)) { + if ((right < 0) && (left >= (std::numeric_limits<int32_t>::min() - right))) { // No underflow. return false; } @@ -120,8 +123,8 @@ class ValueBound : public ValueObject { return instruction_ == nullptr; } - static ValueBound Min() { return ValueBound(nullptr, INT_MIN); } - static ValueBound Max() { return ValueBound(nullptr, INT_MAX); } + static ValueBound Min() { return ValueBound(nullptr, std::numeric_limits<int32_t>::min()); } + static ValueBound Max() { return ValueBound(nullptr, std::numeric_limits<int32_t>::max()); } bool Equals(ValueBound bound) const { return instruction_ == bound.instruction_ && constant_ == bound.constant_; @@ -213,7 +216,7 @@ class ValueBound : public ValueObject { int32_t new_constant; if (c > 0) { - if (constant_ > INT_MAX - c) { + if (constant_ > (std::numeric_limits<int32_t>::max() - c)) { *overflow = true; return Max(); } @@ -227,7 +230,7 @@ class ValueBound : public ValueObject { *overflow = true; return Max(); } else { - if (constant_ < INT_MIN - c) { + if (constant_ < (std::numeric_limits<int32_t>::min() - c)) { *underflow = true; return Min(); } @@ -256,8 +259,8 @@ class ArrayAccessInsideLoopFinder : public ValueObject { explicit ArrayAccessInsideLoopFinder(HInstruction* induction_variable) : induction_variable_(induction_variable), found_array_length_(nullptr), - offset_low_(INT_MAX), - offset_high_(INT_MIN) { + offset_low_(std::numeric_limits<int32_t>::max()), + offset_high_(std::numeric_limits<int32_t>::min()) { Run(); } @@ -407,7 +410,7 @@ class ArrayAccessInsideLoopFinder : public ValueObject { * of an existing value range, NewArray or a loop phi corresponding to an * incrementing/decrementing array index (MonotonicValueRange). */ -class ValueRange : public ArenaObject<kArenaAllocMisc> { +class ValueRange : public ArenaObject<kArenaAllocBoundsCheckElimination> { public: ValueRange(ArenaAllocator* allocator, ValueBound lower, ValueBound upper) : allocator_(allocator), lower_(lower), upper_(upper) {} @@ -492,7 +495,7 @@ class MonotonicValueRange : public ValueRange { HInstruction* initial, int32_t increment, ValueBound bound) - // To be conservative, give it full range [INT_MIN, INT_MAX] in case it's + // To be conservative, give it full range [Min(), Max()] in case it's // used as a regular value range, due to possible overflow/underflow. : ValueRange(allocator, ValueBound::Min(), ValueBound::Max()), induction_variable_(induction_variable), @@ -554,19 +557,19 @@ class MonotonicValueRange : public ValueRange { if (increment_ > 0) { // Monotonically increasing. ValueBound lower = ValueBound::NarrowLowerBound(bound_, range->GetLower()); - if (!lower.IsConstant() || lower.GetConstant() == INT_MIN) { + if (!lower.IsConstant() || lower.GetConstant() == std::numeric_limits<int32_t>::min()) { // Lower bound isn't useful. Leave it to deoptimization. return this; } - // We currently conservatively assume max array length is INT_MAX. If we can - // make assumptions about the max array length, e.g. due to the max heap size, + // We currently conservatively assume max array length is Max(). + // If we can make assumptions about the max array length, e.g. due to the max heap size, // divided by the element size (such as 4 bytes for each integer array), we can // lower this number and rule out some possible overflows. - int32_t max_array_len = INT_MAX; + int32_t max_array_len = std::numeric_limits<int32_t>::max(); // max possible integer value of range's upper value. - int32_t upper = INT_MAX; + int32_t upper = std::numeric_limits<int32_t>::max(); // Try to lower upper. ValueBound upper_bound = range->GetUpper(); if (upper_bound.IsConstant()) { @@ -593,7 +596,7 @@ class MonotonicValueRange : public ValueRange { ((int64_t)upper - (int64_t)initial_constant) / increment_ * increment_; } } - if (last_num_in_sequence <= INT_MAX - increment_) { + if (last_num_in_sequence <= (std::numeric_limits<int32_t>::max() - increment_)) { // No overflow. The sequence will be stopped by the upper bound test as expected. return new (GetAllocator()) ValueRange(GetAllocator(), lower, range->GetUpper()); } @@ -604,7 +607,7 @@ class MonotonicValueRange : public ValueRange { DCHECK_NE(increment_, 0); // Monotonically decreasing. ValueBound upper = ValueBound::NarrowUpperBound(bound_, range->GetUpper()); - if ((!upper.IsConstant() || upper.GetConstant() == INT_MAX) && + if ((!upper.IsConstant() || upper.GetConstant() == std::numeric_limits<int32_t>::max()) && !upper.IsRelatedToArrayLength()) { // Upper bound isn't useful. Leave it to deoptimization. return this; @@ -614,7 +617,7 @@ class MonotonicValueRange : public ValueRange { // for common cases. if (range->GetLower().IsConstant()) { int32_t constant = range->GetLower().GetConstant(); - if (constant >= INT_MIN - increment_) { + if (constant >= (std::numeric_limits<int32_t>::min() - increment_)) { return new (GetAllocator()) ValueRange(GetAllocator(), range->GetLower(), upper); } } @@ -1099,7 +1102,8 @@ class BCEVisitor : public HGraphVisitor { // Very large constant index is considered as an anomaly. This is a threshold // beyond which we don't bother to apply the deoptimization technique since // it's likely some AIOOBE will be thrown. - static constexpr int32_t kMaxConstantForAddingDeoptimize = INT_MAX - 1024 * 1024; + static constexpr int32_t kMaxConstantForAddingDeoptimize = + std::numeric_limits<int32_t>::max() - 1024 * 1024; // Added blocks for loop body entry test. bool IsAddedBlock(HBasicBlock* block) const { @@ -1108,7 +1112,14 @@ class BCEVisitor : public HGraphVisitor { BCEVisitor(HGraph* graph, HInductionVarAnalysis* induction_analysis) : HGraphVisitor(graph), - maps_(graph->GetBlocks().size()), + maps_(graph->GetBlocks().size(), + ArenaSafeMap<int, ValueRange*>( + std::less<int>(), + graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)), + graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)), + first_constant_index_bounds_check_map_( + std::less<int>(), + graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)), need_to_revisit_block_(false), initial_block_size_(graph->GetBlocks().size()), induction_range_(induction_analysis) {} @@ -1133,14 +1144,9 @@ class BCEVisitor : public HGraphVisitor { // Added blocks don't keep value ranges. return nullptr; } - int block_id = basic_block->GetBlockId(); - if (maps_.at(block_id) == nullptr) { - std::unique_ptr<ArenaSafeMap<int, ValueRange*>> map( - new ArenaSafeMap<int, ValueRange*>( - std::less<int>(), GetGraph()->GetArena()->Adapter())); - maps_.at(block_id) = std::move(map); - } - return maps_.at(block_id).get(); + uint32_t block_id = basic_block->GetBlockId(); + DCHECK_LT(block_id, maps_.size()); + return &maps_[block_id]; } // Traverse up the dominator tree to look for value range info. @@ -1467,8 +1473,8 @@ class BCEVisitor : public HGraphVisitor { // Once we have an array access like 'array[5] = 1', we record array.length >= 6. // We currently don't do it for non-constant index since a valid array[i] can't prove // a valid array[i-1] yet due to the lower bound side. - if (constant == INT_MAX) { - // INT_MAX as an index will definitely throw AIOOBE. + if (constant == std::numeric_limits<int32_t>::max()) { + // Max() as an index will definitely throw AIOOBE. return; } ValueBound lower = ValueBound(nullptr, constant + 1); @@ -1690,8 +1696,8 @@ class BCEVisitor : public HGraphVisitor { // The value of left input of instruction equals (left + c). // (array_length + 1) or smaller divided by two or more - // always generate a value in [INT_MIN, array_length]. - // This is true even if array_length is INT_MAX. + // always generate a value in [Min(), array_length]. + // This is true even if array_length is Max(). if (left->IsArrayLength() && c <= 1) { if (instruction->IsUShr() && c < 0) { // Make sure for unsigned shift, left side is not negative. @@ -1701,7 +1707,7 @@ class BCEVisitor : public HGraphVisitor { } ValueRange* range = new (GetGraph()->GetArena()) ValueRange( GetGraph()->GetArena(), - ValueBound(nullptr, INT_MIN), + ValueBound(nullptr, std::numeric_limits<int32_t>::min()), ValueBound(left, 0)); GetValueRangeMap(instruction->GetBlock())->Overwrite(instruction->GetId(), range); } @@ -1811,7 +1817,7 @@ class BCEVisitor : public HGraphVisitor { continue; } HIntConstant* lower_bound_const_instr = nullptr; - int32_t lower_bound_const = INT_MIN; + int32_t lower_bound_const = std::numeric_limits<int32_t>::min(); size_t counter = 0; // Count the constant indexing for which bounds checks haven't // been removed yet. @@ -1838,11 +1844,11 @@ class BCEVisitor : public HGraphVisitor { } } - std::vector<std::unique_ptr<ArenaSafeMap<int, ValueRange*>>> maps_; + ArenaVector<ArenaSafeMap<int, ValueRange*>> maps_; // Map an HArrayLength instruction's id to the first HBoundsCheck instruction in // a block that checks a constant index against that HArrayLength. - SafeMap<int, HBoundsCheck*> first_constant_index_bounds_check_map_; + ArenaSafeMap<int, HBoundsCheck*> first_constant_index_bounds_check_map_; // For the block, there is at least one HArrayLength instruction for which there // is more than one bounds check instruction with constant indexing. And it's diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index 9d70124a4c..5acc5fda71 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -140,11 +140,11 @@ class SwitchTable : public ValueObject { void HGraphBuilder::InitializeLocals(uint16_t count) { graph_->SetNumberOfVRegs(count); - locals_.SetSize(count); + locals_.resize(count); for (int i = 0; i < count; i++) { HLocal* local = new (arena_) HLocal(i); entry_block_->AddInstruction(local); - locals_.Put(i, local); + locals_[i] = local; } } @@ -156,7 +156,7 @@ void HGraphBuilder::InitializeParameters(uint16_t number_of_parameters) { graph_->SetNumberOfInVRegs(number_of_parameters); const char* shorty = dex_compilation_unit_->GetShorty(); - int locals_index = locals_.Size() - number_of_parameters; + int locals_index = locals_.size() - number_of_parameters; int parameter_index = 0; if (!dex_compilation_unit_->IsStatic()) { @@ -262,22 +262,6 @@ bool HGraphBuilder::SkipCompilation(const DexFile::CodeItem& code_item, return false; } -static const DexFile::TryItem* GetTryItem(HBasicBlock* block, - const DexFile::CodeItem& code_item, - const ArenaBitVector& can_block_throw) { - DCHECK(!block->IsSingleTryBoundary()); - - // Block does not contain throwing instructions. Even if it is covered by - // a TryItem, we will consider it not in a try block. - if (!can_block_throw.IsBitSet(block->GetBlockId())) { - return nullptr; - } - - // Instructions in the block may throw. Find a TryItem covering this block. - int32_t try_item_idx = DexFile::FindTryItem(code_item, block->GetDexPc()); - return (try_item_idx == -1) ? nullptr : DexFile::GetTryItems(code_item, try_item_idx); -} - void HGraphBuilder::CreateBlocksForTryCatch(const DexFile::CodeItem& code_item) { if (code_item.tries_size_ == 0) { return; @@ -316,18 +300,18 @@ void HGraphBuilder::CreateBlocksForTryCatch(const DexFile::CodeItem& code_item) } } -void HGraphBuilder::SplitTryBoundaryEdge(HBasicBlock* predecessor, - HBasicBlock* successor, - HTryBoundary::BoundaryKind kind, - const DexFile::CodeItem& code_item, - const DexFile::TryItem& try_item) { - // Split the edge with a single TryBoundary instruction. - HTryBoundary* try_boundary = new (arena_) HTryBoundary(kind, successor->GetDexPc()); - HBasicBlock* try_entry_block = graph_->SplitEdge(predecessor, successor); - try_entry_block->AddInstruction(try_boundary); - - // Link the TryBoundary to the handlers of `try_item`. - for (CatchHandlerIterator it(code_item, try_item); it.HasNext(); it.Next()) { +// Returns the TryItem stored for `block` or nullptr if there is no info for it. +static const DexFile::TryItem* GetTryItem( + HBasicBlock* block, + const ArenaSafeMap<uint32_t, const DexFile::TryItem*>& try_block_info) { + auto iterator = try_block_info.find(block->GetBlockId()); + return (iterator == try_block_info.end()) ? nullptr : iterator->second; +} + +void HGraphBuilder::LinkToCatchBlocks(HTryBoundary* try_boundary, + const DexFile::CodeItem& code_item, + const DexFile::TryItem* try_item) { + for (CatchHandlerIterator it(code_item, *try_item); it.HasNext(); it.Next()) { try_boundary->AddExceptionHandler(FindBlockStartingAt(it.GetHandlerAddress())); } } @@ -337,132 +321,103 @@ void HGraphBuilder::InsertTryBoundaryBlocks(const DexFile::CodeItem& code_item) return; } - // Bit vector stores information on which blocks contain throwing instructions. - // Must be expandable because catch blocks may be split into two. - ArenaBitVector can_block_throw(arena_, graph_->GetBlocks().size(), /* expandable */ true); + // Keep a map of all try blocks and their respective TryItems. We do not use + // the block's pointer but rather its id to ensure deterministic iteration. + ArenaSafeMap<uint32_t, const DexFile::TryItem*> try_block_info( + std::less<uint32_t>(), arena_->Adapter(kArenaAllocGraphBuilder)); + + // Obtain TryItem information for blocks with throwing instructions, and split + // blocks which are both try & catch to simplify the graph. + // NOTE: We are appending new blocks inside the loop, so we need to use index + // because iterators can be invalidated. We remember the initial size to avoid + // iterating over the new blocks which cannot throw. + for (size_t i = 0, e = graph_->GetBlocks().size(); i < e; ++i) { + HBasicBlock* block = graph_->GetBlocks()[i]; + + // Do not bother creating exceptional edges for try blocks which have no + // throwing instructions. In that case we simply assume that the block is + // not covered by a TryItem. This prevents us from creating a throw-catch + // loop for synchronized blocks. + if (block->HasThrowingInstructions()) { + // Try to find a TryItem covering the block. + DCHECK_NE(block->GetDexPc(), kNoDexPc) << "Block must have a dec_pc to find its TryItem."; + const int32_t try_item_idx = DexFile::FindTryItem(code_item, block->GetDexPc()); + if (try_item_idx != -1) { + // Block throwing and in a TryItem. Store the try block information. + HBasicBlock* throwing_block = block; + if (block->IsCatchBlock()) { + // Simplify blocks which are both try and catch, otherwise we would + // need a strategy for splitting exceptional edges. We split the block + // after the move-exception (if present) and mark the first part not + // throwing. The normal-flow edge between them will be split later. + HInstruction* first_insn = block->GetFirstInstruction(); + if (first_insn->IsLoadException()) { + // Catch block starts with a LoadException. Split the block after + // the StoreLocal and ClearException which must come after the load. + DCHECK(first_insn->GetNext()->IsStoreLocal()); + DCHECK(first_insn->GetNext()->GetNext()->IsClearException()); + throwing_block = block->SplitBefore(first_insn->GetNext()->GetNext()->GetNext()); + } else { + // Catch block does not load the exception. Split at the beginning + // to create an empty catch block. + throwing_block = block->SplitBefore(first_insn); + } + } - // Scan blocks and mark those which contain throwing instructions. - // NOTE: We're appending new blocks inside the loop, so we need to use index because iterators - // can be invalidated. We remember the initial size to avoid iterating over the new blocks. - for (size_t block_id = 0u, end = graph_->GetBlocks().size(); block_id != end; ++block_id) { - HBasicBlock* block = graph_->GetBlocks()[block_id]; - bool can_throw = false; - for (HInstructionIterator insn(block->GetInstructions()); !insn.Done(); insn.Advance()) { - if (insn.Current()->CanThrow()) { - can_throw = true; - break; + try_block_info.Put(throwing_block->GetBlockId(), + DexFile::GetTryItems(code_item, try_item_idx)); } } + } - if (can_throw) { - if (block->IsCatchBlock()) { - // Catch blocks are always considered an entry point into the TryItem in - // order to avoid splitting exceptional edges. We split the block after - // the move-exception (if present) and mark the first part non-throwing. - // Later on, a TryBoundary will be inserted between the two blocks. - HInstruction* first_insn = block->GetFirstInstruction(); - if (first_insn->IsLoadException()) { - // Catch block starts with a LoadException. Split the block after the - // StoreLocal and ClearException which must come after the load. - DCHECK(first_insn->GetNext()->IsStoreLocal()); - DCHECK(first_insn->GetNext()->GetNext()->IsClearException()); - block = block->SplitBefore(first_insn->GetNext()->GetNext()->GetNext()); - } else { - // Catch block does not load the exception. Split at the beginning to - // create an empty catch block. - block = block->SplitBefore(first_insn); - } + // Do a pass over the try blocks and insert entering TryBoundaries where at + // least one predecessor is not covered by the same TryItem as the try block. + // We do not split each edge separately, but rather create one boundary block + // that all predecessors are relinked to. This preserves loop headers (b/23895756). + for (auto entry : try_block_info) { + HBasicBlock* try_block = graph_->GetBlock(entry.first); + for (HBasicBlock* predecessor : try_block->GetPredecessors()) { + if (GetTryItem(predecessor, try_block_info) != entry.second) { + // Found a predecessor not covered by the same TryItem. Insert entering + // boundary block. + HTryBoundary* try_entry = + new (arena_) HTryBoundary(HTryBoundary::kEntry, try_block->GetDexPc()); + try_block->CreateImmediateDominator()->AddInstruction(try_entry); + LinkToCatchBlocks(try_entry, code_item, entry.second); + break; } - can_block_throw.SetBit(block->GetBlockId()); - } - } - - // Iterate over all blocks, find those covered by some TryItem and: - // (a) split edges which enter/exit the try range, - // (b) create TryBoundary instructions in the new blocks, - // (c) link the new blocks to corresponding exception handlers. - // We cannot iterate only over blocks in `branch_targets_` because switch-case - // blocks share the same dex_pc. - // NOTE: We're appending new blocks inside the loop, so we need to use index because iterators - // can be invalidated. We remember the initial size to avoid iterating over the new blocks. - for (size_t block_id = 0u, end = graph_->GetBlocks().size(); block_id != end; ++block_id) { - HBasicBlock* try_block = graph_->GetBlocks()[block_id]; - // TryBoundary blocks are added at the end of the list and not iterated over. - DCHECK(!try_block->IsSingleTryBoundary()); - - // Find the TryItem for this block. - const DexFile::TryItem* try_item = GetTryItem(try_block, code_item, can_block_throw); - if (try_item == nullptr) { - continue; - } - - // Catch blocks were split earlier and cannot throw. - DCHECK(!try_block->IsCatchBlock()); - - // Find predecessors which are not covered by the same TryItem range. Such - // edges enter the try block and will have a TryBoundary inserted. - for (size_t i = 0; i < try_block->GetPredecessors().size(); ++i) { - HBasicBlock* predecessor = try_block->GetPredecessor(i); - if (predecessor->IsSingleTryBoundary()) { - // The edge was already split because of an exit from a neighbouring - // TryItem. We split it again and insert an entry point. - if (kIsDebugBuild) { - HTryBoundary* last_insn = predecessor->GetLastInstruction()->AsTryBoundary(); - const DexFile::TryItem* predecessor_try_item = - GetTryItem(predecessor->GetSinglePredecessor(), code_item, can_block_throw); - DCHECK(!last_insn->IsEntry()); - DCHECK_EQ(last_insn->GetNormalFlowSuccessor(), try_block); - DCHECK(try_block->IsFirstIndexOfPredecessor(predecessor, i)); - DCHECK_NE(try_item, predecessor_try_item); - } - } else if (GetTryItem(predecessor, code_item, can_block_throw) != try_item) { - // This is an entry point into the TryItem and the edge has not been - // split yet. That means that `predecessor` is not in a TryItem, or - // it is in a different TryItem and we happened to iterate over this - // block first. We split the edge and insert an entry point. - } else { - // Not an edge on the boundary of the try block. + } + } + + // Do a second pass over the try blocks and insert exit TryBoundaries where + // the successor is not in the same TryItem. + for (auto entry : try_block_info) { + HBasicBlock* try_block = graph_->GetBlock(entry.first); + // NOTE: Do not use iterators because SplitEdge would invalidate them. + for (size_t i = 0, e = try_block->GetSuccessors().size(); i < e; ++i) { + HBasicBlock* successor = try_block->GetSuccessor(i); + + // If the successor is a try block, all of its predecessors must be + // covered by the same TryItem. Otherwise the previous pass would have + // created a non-throwing boundary block. + if (GetTryItem(successor, try_block_info) != nullptr) { + DCHECK_EQ(entry.second, GetTryItem(successor, try_block_info)); continue; } - SplitTryBoundaryEdge(predecessor, try_block, HTryBoundary::kEntry, code_item, *try_item); - } - - // Find successors which are not covered by the same TryItem range. Such - // edges exit the try block and will have a TryBoundary inserted. - for (HBasicBlock* successor : try_block->GetSuccessors()) { - if (successor->IsCatchBlock()) { - // A catch block is always considered an entry point into its TryItem. - // We therefore assume this is an exit point, regardless of whether - // the catch block is in a different TryItem or not. - } else if (successor->IsSingleTryBoundary()) { - // The edge was already split because of an entry into a neighbouring - // TryItem. We split it again and insert an exit. - if (kIsDebugBuild) { - HTryBoundary* last_insn = successor->GetLastInstruction()->AsTryBoundary(); - const DexFile::TryItem* successor_try_item = - GetTryItem(last_insn->GetNormalFlowSuccessor(), code_item, can_block_throw); - DCHECK_EQ(try_block, successor->GetSinglePredecessor()); - DCHECK(last_insn->IsEntry()); - DCHECK_NE(try_item, successor_try_item); - } - } else if (GetTryItem(successor, code_item, can_block_throw) != try_item) { - // This is an exit out of the TryItem and the edge has not been split - // yet. That means that either `successor` is not in a TryItem, or it - // is in a different TryItem and we happened to iterate over this - // block first. We split the edge and insert an exit. - HInstruction* last_instruction = try_block->GetLastInstruction(); - if (last_instruction->IsReturn() || last_instruction->IsReturnVoid()) { - DCHECK_EQ(successor, exit_block_); - // Control flow exits the try block with a Return(Void). Because - // splitting the edge would invalidate the invariant that Return - // always jumps to Exit, we move the Return outside the try block. - successor = try_block->SplitBefore(last_instruction); - } - } else { - // Not an edge on the boundary of the try block. - continue; + + // Preserve the invariant that Return(Void) always jumps to Exit by moving + // it outside the try block if necessary. + HInstruction* last_instruction = try_block->GetLastInstruction(); + if (last_instruction->IsReturn() || last_instruction->IsReturnVoid()) { + DCHECK_EQ(successor, exit_block_); + successor = try_block->SplitBefore(last_instruction); } - SplitTryBoundaryEdge(try_block, successor, HTryBoundary::kExit, code_item, *try_item); + + // Insert TryBoundary and link to catch blocks. + HTryBoundary* try_exit = + new (arena_) HTryBoundary(HTryBoundary::kExit, successor->GetDexPc()); + graph_->SplitEdge(try_block, successor)->AddInstruction(try_exit); + LinkToCatchBlocks(try_exit, code_item, entry.second); } } } @@ -554,11 +509,11 @@ void HGraphBuilder::MaybeUpdateCurrentBlock(size_t dex_pc) { bool HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr, const uint16_t* code_end, size_t* number_of_branches) { - branch_targets_.SetSize(code_end - code_ptr); + branch_targets_.resize(code_end - code_ptr, nullptr); // Create the first block for the dex instructions, single successor of the entry block. HBasicBlock* block = new (arena_) HBasicBlock(graph_, 0); - branch_targets_.Put(0, block); + branch_targets_[0] = block; entry_block_->AddSuccessor(block); // Iterate over all instructions and find branching instructions. Create blocks for @@ -602,7 +557,7 @@ bool HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr, // Create a block for the switch-case logic. The block gets the dex_pc // of the SWITCH instruction because it is part of its semantics. block = new (arena_) HBasicBlock(graph_, dex_pc); - branch_targets_.Put(table.GetDexPcForIndex(i), block); + branch_targets_[table.GetDexPcForIndex(i)] = block; } // Fall-through. Add a block if there is more code afterwards. @@ -626,15 +581,15 @@ bool HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr, HBasicBlock* HGraphBuilder::FindBlockStartingAt(int32_t dex_pc) const { DCHECK_GE(dex_pc, 0); - DCHECK_LT(static_cast<size_t>(dex_pc), branch_targets_.Size()); - return branch_targets_.Get(dex_pc); + DCHECK_LT(static_cast<size_t>(dex_pc), branch_targets_.size()); + return branch_targets_[dex_pc]; } HBasicBlock* HGraphBuilder::FindOrCreateBlockStartingAt(int32_t dex_pc) { HBasicBlock* block = FindBlockStartingAt(dex_pc); if (block == nullptr) { block = new (arena_) HBasicBlock(graph_, dex_pc); - branch_targets_.Put(dex_pc, block); + branch_targets_[dex_pc] = block; } return block; } @@ -985,7 +940,8 @@ HClinitCheck* HGraphBuilder::ProcessClinitCheckForInvoke( storage_index, *dex_compilation_unit_->GetDexFile(), is_outer_class, - dex_pc); + dex_pc, + /*needs_access_check*/ false); current_block_->AddInstruction(load_class); clinit_check = new (arena_) HClinitCheck(load_class, dex_pc); current_block_->AddInstruction(clinit_check); @@ -1231,6 +1187,12 @@ void HGraphBuilder::PotentiallySimplifyFakeString(uint16_t original_dex_register } } +static Primitive::Type GetFieldAccessType(const DexFile& dex_file, uint16_t field_index) { + const DexFile::FieldId& field_id = dex_file.GetFieldId(field_index); + const char* type = dex_file.GetFieldTypeDescriptor(field_id); + return Primitive::GetType(type[0]); +} + bool HGraphBuilder::BuildInstanceFieldAccess(const Instruction& instruction, uint32_t dex_pc, bool is_put) { @@ -1250,44 +1212,61 @@ bool HGraphBuilder::BuildInstanceFieldAccess(const Instruction& instruction, ArtField* resolved_field = compiler_driver_->ComputeInstanceFieldInfo(field_index, dex_compilation_unit_, is_put, soa); - if (resolved_field == nullptr) { - MaybeRecordStat(MethodCompilationStat::kNotCompiledUnresolvedField); - return false; - } - - Primitive::Type field_type = resolved_field->GetTypeAsPrimitiveType(); HInstruction* object = LoadLocal(obj_reg, Primitive::kPrimNot, dex_pc); - current_block_->AddInstruction(new (arena_) HNullCheck(object, dex_pc)); + HInstruction* null_check = new (arena_) HNullCheck(object, dex_pc); + current_block_->AddInstruction(null_check); + + Primitive::Type field_type = (resolved_field == nullptr) + ? GetFieldAccessType(*dex_file_, field_index) + : resolved_field->GetTypeAsPrimitiveType(); if (is_put) { Temporaries temps(graph_); - HInstruction* null_check = current_block_->GetLastInstruction(); // We need one temporary for the null check. temps.Add(null_check); HInstruction* value = LoadLocal(source_or_dest_reg, field_type, dex_pc); - current_block_->AddInstruction(new (arena_) HInstanceFieldSet( - null_check, - value, - field_type, - resolved_field->GetOffset(), - resolved_field->IsVolatile(), - field_index, - *dex_file_, - dex_compilation_unit_->GetDexCache(), - dex_pc)); + HInstruction* field_set = nullptr; + if (resolved_field == nullptr) { + MaybeRecordStat(MethodCompilationStat::kUnresolvedField); + field_set = new (arena_) HUnresolvedInstanceFieldSet(null_check, + value, + field_type, + field_index, + dex_pc); + } else { + field_set = new (arena_) HInstanceFieldSet(null_check, + value, + field_type, + resolved_field->GetOffset(), + resolved_field->IsVolatile(), + field_index, + *dex_file_, + dex_compilation_unit_->GetDexCache(), + dex_pc); + } + current_block_->AddInstruction(field_set); } else { - current_block_->AddInstruction(new (arena_) HInstanceFieldGet( - current_block_->GetLastInstruction(), - field_type, - resolved_field->GetOffset(), - resolved_field->IsVolatile(), - field_index, - *dex_file_, - dex_compilation_unit_->GetDexCache(), - dex_pc)); - - UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction(), dex_pc); + HInstruction* field_get = nullptr; + if (resolved_field == nullptr) { + MaybeRecordStat(MethodCompilationStat::kUnresolvedField); + field_get = new (arena_) HUnresolvedInstanceFieldGet(null_check, + field_type, + field_index, + dex_pc); + } else { + field_get = new (arena_) HInstanceFieldGet(null_check, + field_type, + resolved_field->GetOffset(), + resolved_field->IsVolatile(), + field_index, + *dex_file_, + dex_compilation_unit_->GetDexCache(), + dex_pc); + } + current_block_->AddInstruction(field_get); + UpdateLocal(source_or_dest_reg, field_get, dex_pc); } + return true; } @@ -1327,6 +1306,23 @@ bool HGraphBuilder::IsOutermostCompilingClass(uint16_t type_index) const { return outer_class.Get() == cls.Get(); } +void HGraphBuilder::BuildUnresolvedStaticFieldAccess(const Instruction& instruction, + uint32_t dex_pc, + bool is_put, + Primitive::Type field_type) { + uint32_t source_or_dest_reg = instruction.VRegA_21c(); + uint16_t field_index = instruction.VRegB_21c(); + + if (is_put) { + HInstruction* value = LoadLocal(source_or_dest_reg, field_type, dex_pc); + current_block_->AddInstruction( + new (arena_) HUnresolvedStaticFieldSet(value, field_type, field_index, dex_pc)); + } else { + current_block_->AddInstruction( + new (arena_) HUnresolvedStaticFieldGet(field_type, field_index, dex_pc)); + UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction(), dex_pc); + } +} bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction, uint32_t dex_pc, bool is_put) { @@ -1344,10 +1340,13 @@ bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction, soa, dex_cache, class_loader, dex_compilation_unit_, field_index, true); if (resolved_field == nullptr) { - MaybeRecordStat(MethodCompilationStat::kNotCompiledUnresolvedField); - return false; + MaybeRecordStat(MethodCompilationStat::kUnresolvedField); + Primitive::Type field_type = GetFieldAccessType(*dex_file_, field_index); + BuildUnresolvedStaticFieldAccess(instruction, dex_pc, is_put, field_type); + return true; } + Primitive::Type field_type = resolved_field->GetTypeAsPrimitiveType(); const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile(); Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle( outer_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), outer_dex_file))); @@ -1362,6 +1361,7 @@ bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction, // The compiler driver cannot currently understand multiple dex caches involved. Just bailout. return false; } else { + // TODO: This is rather expensive. Perf it and cache the results if needed. std::pair<bool, bool> pair = compiler_driver_->IsFastStaticField( outer_dex_cache.Get(), GetCompilingClass(), @@ -1370,7 +1370,9 @@ bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction, &storage_index); bool can_easily_access = is_put ? pair.second : pair.first; if (!can_easily_access) { - return false; + MaybeRecordStat(MethodCompilationStat::kUnresolvedFieldNotAFastAccess); + BuildUnresolvedStaticFieldAccess(instruction, dex_pc, is_put, field_type); + return true; } } @@ -1383,7 +1385,8 @@ bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction, storage_index, *dex_compilation_unit_->GetDexFile(), is_outer_class, - dex_pc); + dex_pc, + /*needs_access_check*/ false); current_block_->AddInstruction(constant); HInstruction* cls = constant; @@ -1391,8 +1394,6 @@ bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction, cls = new (arena_) HClinitCheck(constant, dex_pc); current_block_->AddInstruction(cls); } - - Primitive::Type field_type = resolved_field->GetTypeAsPrimitiveType(); if (is_put) { // We need to keep the class alive before loading the value. Temporaries temps(graph_); @@ -1616,7 +1617,9 @@ void HGraphBuilder::BuildFillWideArrayData(HInstruction* object, static TypeCheckKind ComputeTypeCheckKind(Handle<mirror::Class> cls) SHARED_REQUIRES(Locks::mutator_lock_) { - if (cls->IsInterface()) { + if (cls.Get() == nullptr) { + return TypeCheckKind::kUnresolvedCheck; + } else if (cls->IsInterface()) { return TypeCheckKind::kInterfaceCheck; } else if (cls->IsArrayClass()) { if (cls->GetComponentType()->IsObjectClass()) { @@ -1635,11 +1638,20 @@ static TypeCheckKind ComputeTypeCheckKind(Handle<mirror::Class> cls) } } -bool HGraphBuilder::BuildTypeCheck(const Instruction& instruction, +void HGraphBuilder::BuildTypeCheck(const Instruction& instruction, uint8_t destination, uint8_t reference, uint16_t type_index, uint32_t dex_pc) { + bool type_known_final, type_known_abstract, use_declaring_class; + bool can_access = compiler_driver_->CanAccessTypeWithoutChecks( + dex_compilation_unit_->GetDexMethodIndex(), + *dex_compilation_unit_->GetDexFile(), + type_index, + &type_known_final, + &type_known_abstract, + &use_declaring_class); + ScopedObjectAccess soa(Thread::Current()); StackHandleScope<2> hs(soa.Self()); Handle<mirror::DexCache> dex_cache(hs.NewHandle( @@ -1647,22 +1659,14 @@ bool HGraphBuilder::BuildTypeCheck(const Instruction& instruction, soa.Self(), *dex_compilation_unit_->GetDexFile()))); Handle<mirror::Class> resolved_class(hs.NewHandle(dex_cache->GetResolvedType(type_index))); - if ((resolved_class.Get() == nullptr) || - // TODO: Remove this check once the compiler actually knows which - // ArtMethod it is compiling. - (GetCompilingClass() == nullptr) || - !GetCompilingClass()->CanAccess(resolved_class.Get())) { - MaybeRecordStat(MethodCompilationStat::kNotCompiledCantAccesType); - return false; - } - HInstruction* object = LoadLocal(reference, Primitive::kPrimNot, dex_pc); HLoadClass* cls = new (arena_) HLoadClass( graph_->GetCurrentMethod(), type_index, *dex_compilation_unit_->GetDexFile(), IsOutermostCompilingClass(type_index), - dex_pc); + dex_pc, + !can_access); current_block_->AddInstruction(cls); // The class needs a temporary before being used by the type check. @@ -1677,7 +1681,6 @@ bool HGraphBuilder::BuildTypeCheck(const Instruction& instruction, DCHECK_EQ(instruction.Opcode(), Instruction::CHECK_CAST); current_block_->AddInstruction(new (arena_) HCheckCast(object, cls, check_kind, dex_pc)); } - return true; } bool HGraphBuilder::NeedsAccessCheck(uint32_t type_index) const { @@ -1738,8 +1741,14 @@ void HGraphBuilder::BuildPackedSwitch(const Instruction& instruction, uint32_t d } else { // Chained cmp-and-branch, starting from starting_key. for (size_t i = 1; i <= num_entries; i++) { - BuildSwitchCaseHelper(instruction, i, i == num_entries, table, value, - starting_key + i - 1, table.GetEntryAt(i), dex_pc); + BuildSwitchCaseHelper(instruction, + i, + i == num_entries, + table, + value, + starting_key + i - 1, + table.GetEntryAt(i), + dex_pc); } } } @@ -2786,16 +2795,13 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 bool can_access = compiler_driver_->CanAccessTypeWithoutChecks( dex_compilation_unit_->GetDexMethodIndex(), *dex_file_, type_index, &type_known_final, &type_known_abstract, &dont_use_is_referrers_class); - if (!can_access) { - MaybeRecordStat(MethodCompilationStat::kNotCompiledCantAccesType); - return false; - } current_block_->AddInstruction(new (arena_) HLoadClass( graph_->GetCurrentMethod(), type_index, *dex_compilation_unit_->GetDexFile(), IsOutermostCompilingClass(type_index), - dex_pc)); + dex_pc, + !can_access)); UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction(), dex_pc); break; } @@ -2822,18 +2828,14 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 uint8_t destination = instruction.VRegA_22c(); uint8_t reference = instruction.VRegB_22c(); uint16_t type_index = instruction.VRegC_22c(); - if (!BuildTypeCheck(instruction, destination, reference, type_index, dex_pc)) { - return false; - } + BuildTypeCheck(instruction, destination, reference, type_index, dex_pc); break; } case Instruction::CHECK_CAST: { uint8_t reference = instruction.VRegA_21c(); uint16_t type_index = instruction.VRegB_21c(); - if (!BuildTypeCheck(instruction, -1, reference, type_index, dex_pc)) { - return false; - } + BuildTypeCheck(instruction, -1, reference, type_index, dex_pc); break; } @@ -2874,18 +2876,19 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 return true; } // NOLINT(readability/fn_size) -HLocal* HGraphBuilder::GetLocalAt(int register_index) const { - return locals_.Get(register_index); +HLocal* HGraphBuilder::GetLocalAt(uint32_t register_index) const { + DCHECK_LT(register_index, locals_.size()); + return locals_[register_index]; } -void HGraphBuilder::UpdateLocal(int register_index, +void HGraphBuilder::UpdateLocal(uint32_t register_index, HInstruction* instruction, uint32_t dex_pc) const { HLocal* local = GetLocalAt(register_index); current_block_->AddInstruction(new (arena_) HStoreLocal(local, instruction, dex_pc)); } -HInstruction* HGraphBuilder::LoadLocal(int register_index, +HInstruction* HGraphBuilder::LoadLocal(uint32_t register_index, Primitive::Type type, uint32_t dex_pc) const { HLocal* local = GetLocalAt(register_index); diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h index 7f87df6df2..6910d5195c 100644 --- a/compiler/optimizing/builder.h +++ b/compiler/optimizing/builder.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_BUILDER_H_ #define ART_COMPILER_OPTIMIZING_BUILDER_H_ +#include "base/arena_containers.h" #include "base/arena_object.h" #include "dex_file.h" #include "dex_file-inl.h" @@ -24,7 +25,6 @@ #include "driver/dex_compilation_unit.h" #include "optimizing_compiler_stats.h" #include "primitive.h" -#include "utils/growable_array.h" #include "nodes.h" namespace art { @@ -43,8 +43,8 @@ class HGraphBuilder : public ValueObject { const uint8_t* interpreter_metadata, Handle<mirror::DexCache> dex_cache) : arena_(graph->GetArena()), - branch_targets_(graph->GetArena(), 0), - locals_(graph->GetArena(), 0), + branch_targets_(graph->GetArena()->Adapter(kArenaAllocGraphBuilder)), + locals_(graph->GetArena()->Adapter(kArenaAllocGraphBuilder)), entry_block_(nullptr), exit_block_(nullptr), current_block_(nullptr), @@ -64,8 +64,8 @@ class HGraphBuilder : public ValueObject { // Only for unit testing. HGraphBuilder(HGraph* graph, Primitive::Type return_type = Primitive::kPrimInt) : arena_(graph->GetArena()), - branch_targets_(graph->GetArena(), 0), - locals_(graph->GetArena(), 0), + branch_targets_(graph->GetArena()->Adapter(kArenaAllocGraphBuilder)), + locals_(graph->GetArena()->Adapter(kArenaAllocGraphBuilder)), entry_block_(nullptr), exit_block_(nullptr), current_block_(nullptr), @@ -121,21 +121,21 @@ class HGraphBuilder : public ValueObject { // instructions and links them to the corresponding catch blocks. void InsertTryBoundaryBlocks(const DexFile::CodeItem& code_item); - // Splits a single edge, inserting a TryBoundary of given `kind` and linking - // it to exception handlers of `try_item`. - void SplitTryBoundaryEdge(HBasicBlock* predecessor, - HBasicBlock* successor, - HTryBoundary::BoundaryKind kind, - const DexFile::CodeItem& code_item, - const DexFile::TryItem& try_item); + // Iterates over the exception handlers of `try_item`, finds the corresponding + // catch blocks and makes them successors of `try_boundary`. The order of + // successors matches the order in which runtime exception delivery searches + // for a handler. + void LinkToCatchBlocks(HTryBoundary* try_boundary, + const DexFile::CodeItem& code_item, + const DexFile::TryItem* try_item); bool CanDecodeQuickenedInfo() const; uint16_t LookupQuickenedInfo(uint32_t dex_pc); void InitializeLocals(uint16_t count); - HLocal* GetLocalAt(int register_index) const; - void UpdateLocal(int register_index, HInstruction* instruction, uint32_t dex_pc) const; - HInstruction* LoadLocal(int register_index, Primitive::Type type, uint32_t dex_pc) const; + HLocal* GetLocalAt(uint32_t register_index) const; + void UpdateLocal(uint32_t register_index, HInstruction* instruction, uint32_t dex_pc) const; + HInstruction* LoadLocal(uint32_t register_index, Primitive::Type type, uint32_t dex_pc) const; void PotentiallyAddSuspendCheck(HBasicBlock* target, uint32_t dex_pc); void InitializeParameters(uint16_t number_of_parameters); bool NeedsAccessCheck(uint32_t type_index) const; @@ -187,6 +187,10 @@ class HGraphBuilder : public ValueObject { // Builds an instance field access node and returns whether the instruction is supported. bool BuildInstanceFieldAccess(const Instruction& instruction, uint32_t dex_pc, bool is_put); + void BuildUnresolvedStaticFieldAccess(const Instruction& instruction, + uint32_t dex_pc, + bool is_put, + Primitive::Type field_type); // Builds a static field access node and returns whether the instruction is supported. bool BuildStaticFieldAccess(const Instruction& instruction, uint32_t dex_pc, bool is_put); @@ -232,8 +236,7 @@ class HGraphBuilder : public ValueObject { uint32_t dex_pc); // Builds a `HInstanceOf`, or a `HCheckCast` instruction. - // Returns whether we succeeded in building the instruction. - bool BuildTypeCheck(const Instruction& instruction, + void BuildTypeCheck(const Instruction& instruction, uint8_t destination, uint8_t reference, uint16_t type_index, @@ -313,9 +316,9 @@ class HGraphBuilder : public ValueObject { // A list of the size of the dex code holding block information for // the method. If an entry contains a block, then the dex instruction // starting at that entry is the first instruction of a new block. - GrowableArray<HBasicBlock*> branch_targets_; + ArenaVector<HBasicBlock*> branch_targets_; - GrowableArray<HLocal*> locals_; + ArenaVector<HLocal*> locals_; HBasicBlock* entry_block_; HBasicBlock* exit_block_; diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 3c6a41df34..00f316cf98 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -203,13 +203,13 @@ class DisassemblyScope { void CodeGenerator::GenerateSlowPaths() { size_t code_start = 0; - for (size_t i = 0, e = slow_paths_.Size(); i < e; ++i) { + for (SlowPathCode* slow_path : slow_paths_) { if (disasm_info_ != nullptr) { code_start = GetAssembler()->CodeSize(); } - slow_paths_.Get(i)->EmitNativeCode(this); + slow_path->EmitNativeCode(this); if (disasm_info_ != nullptr) { - disasm_info_->AddSlowPathInterval(slow_paths_.Get(i), code_start, GetAssembler()->CodeSize()); + disasm_info_->AddSlowPathInterval(slow_path, code_start, GetAssembler()->CodeSize()); } } } @@ -413,6 +413,150 @@ void CodeGenerator::GenerateInvokeUnresolvedRuntimeCall(HInvokeUnresolved* invok InvokeRuntime(entrypoint, invoke, invoke->GetDexPc(), nullptr); } +void CodeGenerator::CreateUnresolvedFieldLocationSummary( + HInstruction* field_access, + Primitive::Type field_type, + const FieldAccessCallingConvention& calling_convention) { + bool is_instance = field_access->IsUnresolvedInstanceFieldGet() + || field_access->IsUnresolvedInstanceFieldSet(); + bool is_get = field_access->IsUnresolvedInstanceFieldGet() + || field_access->IsUnresolvedStaticFieldGet(); + + ArenaAllocator* allocator = field_access->GetBlock()->GetGraph()->GetArena(); + LocationSummary* locations = + new (allocator) LocationSummary(field_access, LocationSummary::kCall); + + locations->AddTemp(calling_convention.GetFieldIndexLocation()); + + if (is_instance) { + // Add the `this` object for instance field accesses. + locations->SetInAt(0, calling_convention.GetObjectLocation()); + } + + // Note that pSetXXStatic/pGetXXStatic always takes/returns an int or int64 + // regardless of the the type. Because of that we forced to special case + // the access to floating point values. + if (is_get) { + if (Primitive::IsFloatingPointType(field_type)) { + // The return value will be stored in regular registers while register + // allocator expects it in a floating point register. + // Note We don't need to request additional temps because the return + // register(s) are already blocked due the call and they may overlap with + // the input or field index. + // The transfer between the two will be done at codegen level. + locations->SetOut(calling_convention.GetFpuLocation(field_type)); + } else { + locations->SetOut(calling_convention.GetReturnLocation(field_type)); + } + } else { + size_t set_index = is_instance ? 1 : 0; + if (Primitive::IsFloatingPointType(field_type)) { + // The set value comes from a float location while the calling convention + // expects it in a regular register location. Allocate a temp for it and + // make the transfer at codegen. + AddLocationAsTemp(calling_convention.GetSetValueLocation(field_type, is_instance), locations); + locations->SetInAt(set_index, calling_convention.GetFpuLocation(field_type)); + } else { + locations->SetInAt(set_index, + calling_convention.GetSetValueLocation(field_type, is_instance)); + } + } +} + +void CodeGenerator::GenerateUnresolvedFieldAccess( + HInstruction* field_access, + Primitive::Type field_type, + uint32_t field_index, + uint32_t dex_pc, + const FieldAccessCallingConvention& calling_convention) { + LocationSummary* locations = field_access->GetLocations(); + + MoveConstant(locations->GetTemp(0), field_index); + + bool is_instance = field_access->IsUnresolvedInstanceFieldGet() + || field_access->IsUnresolvedInstanceFieldSet(); + bool is_get = field_access->IsUnresolvedInstanceFieldGet() + || field_access->IsUnresolvedStaticFieldGet(); + + if (!is_get && Primitive::IsFloatingPointType(field_type)) { + // Copy the float value to be set into the calling convention register. + // Note that using directly the temp location is problematic as we don't + // support temp register pairs. To avoid boilerplate conversion code, use + // the location from the calling convention. + MoveLocation(calling_convention.GetSetValueLocation(field_type, is_instance), + locations->InAt(is_instance ? 1 : 0), + (Primitive::Is64BitType(field_type) ? Primitive::kPrimLong : Primitive::kPrimInt)); + } + + QuickEntrypointEnum entrypoint = kQuickSet8Static; // Initialize to anything to avoid warnings. + switch (field_type) { + case Primitive::kPrimBoolean: + entrypoint = is_instance + ? (is_get ? kQuickGetBooleanInstance : kQuickSet8Instance) + : (is_get ? kQuickGetBooleanStatic : kQuickSet8Static); + break; + case Primitive::kPrimByte: + entrypoint = is_instance + ? (is_get ? kQuickGetByteInstance : kQuickSet8Instance) + : (is_get ? kQuickGetByteStatic : kQuickSet8Static); + break; + case Primitive::kPrimShort: + entrypoint = is_instance + ? (is_get ? kQuickGetShortInstance : kQuickSet16Instance) + : (is_get ? kQuickGetShortStatic : kQuickSet16Static); + break; + case Primitive::kPrimChar: + entrypoint = is_instance + ? (is_get ? kQuickGetCharInstance : kQuickSet16Instance) + : (is_get ? kQuickGetCharStatic : kQuickSet16Static); + break; + case Primitive::kPrimInt: + case Primitive::kPrimFloat: + entrypoint = is_instance + ? (is_get ? kQuickGet32Instance : kQuickSet32Instance) + : (is_get ? kQuickGet32Static : kQuickSet32Static); + break; + case Primitive::kPrimNot: + entrypoint = is_instance + ? (is_get ? kQuickGetObjInstance : kQuickSetObjInstance) + : (is_get ? kQuickGetObjStatic : kQuickSetObjStatic); + break; + case Primitive::kPrimLong: + case Primitive::kPrimDouble: + entrypoint = is_instance + ? (is_get ? kQuickGet64Instance : kQuickSet64Instance) + : (is_get ? kQuickGet64Static : kQuickSet64Static); + break; + default: + LOG(FATAL) << "Invalid type " << field_type; + } + InvokeRuntime(entrypoint, field_access, dex_pc, nullptr); + + if (is_get && Primitive::IsFloatingPointType(field_type)) { + MoveLocation(locations->Out(), calling_convention.GetReturnLocation(field_type), field_type); + } +} + +void CodeGenerator::CreateLoadClassLocationSummary(HLoadClass* cls, + Location runtime_type_index_location, + Location runtime_return_location) { + ArenaAllocator* allocator = cls->GetBlock()->GetGraph()->GetArena(); + LocationSummary::CallKind call_kind = cls->NeedsAccessCheck() + ? LocationSummary::kCall + : (cls->CanCallRuntime() + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall); + LocationSummary* locations = new (allocator) LocationSummary(cls, call_kind); + locations->SetInAt(0, Location::RequiresRegister()); + if (cls->NeedsAccessCheck()) { + locations->AddTemp(runtime_type_index_location); + locations->SetOut(runtime_return_location); + } else { + locations->SetOut(Location::RequiresRegister()); + } +} + + void CodeGenerator::BlockIfInRegister(Location location, bool is_out) const { // The DCHECKS below check that a register is not specified twice in // the summary. The out location can overlap with an input, so we need diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index a1c6db0a2c..0a3698946e 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -143,6 +143,22 @@ class InvokeDexCallingConventionVisitor { DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor); }; +class FieldAccessCallingConvention { + public: + virtual Location GetObjectLocation() const = 0; + virtual Location GetFieldIndexLocation() const = 0; + virtual Location GetReturnLocation(Primitive::Type type) const = 0; + virtual Location GetSetValueLocation(Primitive::Type type, bool is_instance) const = 0; + virtual Location GetFpuLocation(Primitive::Type type) const = 0; + virtual ~FieldAccessCallingConvention() {} + + protected: + FieldAccessCallingConvention() {} + + private: + DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConvention); +}; + class CodeGenerator { public: // Compiles the graph to executable instructions. Returns whether the compilation @@ -177,6 +193,9 @@ class CodeGenerator { virtual void Bind(HBasicBlock* block) = 0; virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) = 0; virtual void MoveConstant(Location destination, int32_t value) = 0; + virtual void MoveLocation(Location dst, Location src, Primitive::Type dst_type) = 0; + virtual void AddLocationAsTemp(Location location, LocationSummary* locations) = 0; + virtual Assembler* GetAssembler() = 0; virtual const Assembler& GetAssembler() const = 0; virtual size_t GetWordSize() const = 0; @@ -261,7 +280,7 @@ class CodeGenerator { bool IsImplicitNullCheckAllowed(HNullCheck* null_check) const; void AddSlowPath(SlowPathCode* slow_path) { - slow_paths_.Add(slow_path); + slow_paths_.push_back(slow_path); } void SetSrcMap(DefaultSrcMap* src_map) { src_map_ = src_map; } @@ -385,6 +404,23 @@ class CodeGenerator { void GenerateInvokeUnresolvedRuntimeCall(HInvokeUnresolved* invoke); + void CreateUnresolvedFieldLocationSummary( + HInstruction* field_access, + Primitive::Type field_type, + const FieldAccessCallingConvention& calling_convention); + + void GenerateUnresolvedFieldAccess( + HInstruction* field_access, + Primitive::Type field_type, + uint32_t field_index, + uint32_t dex_pc, + const FieldAccessCallingConvention& calling_convention); + + // TODO: This overlaps a bit with MoveFromReturnRegister. Refactor for a better design. + static void CreateLoadClassLocationSummary(HLoadClass* cls, + Location runtime_type_index_location, + Location runtime_return_location); + void SetDisassemblyInformation(DisassemblyInformation* info) { disasm_info_ = info; } DisassemblyInformation* GetDisassemblyInformation() const { return disasm_info_; } @@ -425,9 +461,12 @@ class CodeGenerator { core_spill_mask_(0), fpu_spill_mask_(0), first_register_slot_in_slow_path_(0), - blocked_core_registers_(graph->GetArena()->AllocArray<bool>(number_of_core_registers)), - blocked_fpu_registers_(graph->GetArena()->AllocArray<bool>(number_of_fpu_registers)), - blocked_register_pairs_(graph->GetArena()->AllocArray<bool>(number_of_register_pairs)), + blocked_core_registers_(graph->GetArena()->AllocArray<bool>(number_of_core_registers, + kArenaAllocCodeGenerator)), + blocked_fpu_registers_(graph->GetArena()->AllocArray<bool>(number_of_fpu_registers, + kArenaAllocCodeGenerator)), + blocked_register_pairs_(graph->GetArena()->AllocArray<bool>(number_of_register_pairs, + kArenaAllocCodeGenerator)), number_of_core_registers_(number_of_core_registers), number_of_fpu_registers_(number_of_fpu_registers), number_of_register_pairs_(number_of_register_pairs), @@ -441,10 +480,12 @@ class CodeGenerator { graph_(graph), compiler_options_(compiler_options), src_map_(nullptr), - slow_paths_(graph->GetArena(), 8), + slow_paths_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), current_block_index_(0), is_leaf_(true), - requires_current_method_(false) {} + requires_current_method_(false) { + slow_paths_.reserve(8); + } // Register allocation logic. void AllocateRegistersLocally(HInstruction* instruction) const; @@ -485,8 +526,20 @@ class CodeGenerator { return instruction_set == kX86 || instruction_set == kX86_64; } - // Arm64 has its own type for a label, so we need to templatize this method + // Arm64 has its own type for a label, so we need to templatize these methods // to share the logic. + + template <typename LabelType> + LabelType* CommonInitializeLabels() { + size_t size = GetGraph()->GetBlocks().size(); + LabelType* labels = GetGraph()->GetArena()->AllocArray<LabelType>(size, + kArenaAllocCodeGenerator); + for (size_t i = 0; i != size; ++i) { + new(labels + i) LabelType(); + } + return labels; + } + template <typename LabelType> LabelType* CommonGetLabelOf(LabelType* raw_pointer_to_labels_array, HBasicBlock* block) const { block = FirstNonEmptyBlock(block); @@ -539,7 +592,7 @@ class CodeGenerator { // Native to dex_pc map used for native debugging/profiling tools. DefaultSrcMap* src_map_; - GrowableArray<SlowPathCode*> slow_paths_; + ArenaVector<SlowPathCode*> slow_paths_; // The current block index in `block_order_` of the block // we are generating code for. diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index d7b1d24887..54af41d420 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -361,6 +361,51 @@ class DeoptimizationSlowPathARM : public SlowPathCode { DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM); }; +class ArraySetSlowPathARM : public SlowPathCode { + public: + explicit ArraySetSlowPathARM(HInstruction* instruction) : instruction_(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConvention calling_convention; + HParallelMove parallel_move(codegen->GetGraph()->GetArena()); + parallel_move.AddMove( + locations->InAt(0), + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + Primitive::kPrimNot, + nullptr); + parallel_move.AddMove( + locations->InAt(1), + Location::RegisterLocation(calling_convention.GetRegisterAt(1)), + Primitive::kPrimInt, + nullptr); + parallel_move.AddMove( + locations->InAt(2), + Location::RegisterLocation(calling_convention.GetRegisterAt(2)), + Primitive::kPrimNot, + nullptr); + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + + CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); + arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject), + instruction_, + instruction_->GetDexPc(), + this); + RestoreLiveRegisters(codegen, locations); + __ b(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathARM"; } + + private: + HInstruction* const instruction_; + + DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM); +}; + #undef __ #define __ down_cast<ArmAssembler*>(GetAssembler())-> @@ -432,15 +477,17 @@ CodeGeneratorARM::CodeGeneratorARM(HGraph* graph, arraysize(kFpuCalleeSaves)), compiler_options, stats), - block_labels_(graph->GetArena(), 0), + block_labels_(nullptr), location_builder_(graph, this), instruction_visitor_(graph, this), move_resolver_(graph->GetArena(), this), assembler_(), isa_features_(isa_features), - method_patches_(MethodReferenceComparator(), graph->GetArena()->Adapter()), - call_patches_(MethodReferenceComparator(), graph->GetArena()->Adapter()), - relative_call_patches_(graph->GetArena()->Adapter()) { + method_patches_(MethodReferenceComparator(), + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + call_patches_(MethodReferenceComparator(), + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { // Always save the LR register to mimic Quick. AddAllocatedRegister(Location::RegisterLocation(LR)); } @@ -459,8 +506,8 @@ void CodeGeneratorARM::Finalize(CodeAllocator* allocator) { for (HBasicBlock* block : *block_order_) { // Get the label directly from block_labels_ rather than through GetLabelOf() to avoid // FirstNonEmptyBlock() which could lead to adjusting a label more than once. - DCHECK_LT(static_cast<size_t>(block->GetBlockId()), block_labels_.Size()); - Label* block_label = &block_labels_.GetRawStorage()[block->GetBlockId()]; + DCHECK_LT(block->GetBlockId(), GetGraph()->GetBlocks().size()); + Label* block_label = &block_labels_[block->GetBlockId()]; DCHECK_EQ(block_label->IsBound(), !block->IsSingleJump()); if (block_label->IsBound()) { __ AdjustLabelPosition(block_label); @@ -560,7 +607,12 @@ void CodeGeneratorARM::SetupBlockedRegisters(bool is_baseline) const { } blocked_core_registers_[kCoreSavedRegisterForBaseline] = false; + } + if (is_baseline || GetGraph()->IsDebuggable()) { + // Stubs do not save callee-save floating point registers. If the graph + // is debuggable, we need to deal with these registers differently. For + // now, just block them. for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) { blocked_fpu_registers_[kFpuCalleeSaves[i]] = true; } @@ -855,6 +907,10 @@ void CodeGeneratorARM::Move64(Location destination, Location source) { Primitive::kPrimInt); } else if (source.IsFpuRegister()) { UNIMPLEMENTED(FATAL); + } else if (source.IsFpuRegisterPair()) { + __ vmovrrd(destination.AsRegisterPairLow<Register>(), + destination.AsRegisterPairHigh<Register>(), + FromLowSToD(source.AsFpuRegisterPairLow<SRegister>())); } else { DCHECK(source.IsDoubleStackSlot()); DCHECK(ExpectedPairLayout(destination)); @@ -866,6 +922,10 @@ void CodeGeneratorARM::Move64(Location destination, Location source) { __ LoadDFromOffset(FromLowSToD(destination.AsFpuRegisterPairLow<SRegister>()), SP, source.GetStackIndex()); + } else if (source.IsRegisterPair()) { + __ vmovdrr(FromLowSToD(destination.AsFpuRegisterPairLow<SRegister>()), + source.AsRegisterPairLow<Register>(), + source.AsRegisterPairHigh<Register>()); } else { UNIMPLEMENTED(FATAL); } @@ -987,6 +1047,25 @@ void CodeGeneratorARM::MoveConstant(Location location, int32_t value) { __ LoadImmediate(location.AsRegister<Register>(), value); } +void CodeGeneratorARM::MoveLocation(Location dst, Location src, Primitive::Type dst_type) { + if (Primitive::Is64BitType(dst_type)) { + Move64(dst, src); + } else { + Move32(dst, src); + } +} + +void CodeGeneratorARM::AddLocationAsTemp(Location location, LocationSummary* locations) { + if (location.IsRegister()) { + locations->AddTemp(location); + } else if (location.IsRegisterPair()) { + locations->AddTemp(Location::RegisterLocation(location.AsRegisterPairLow<Register>())); + locations->AddTemp(Location::RegisterLocation(location.AsRegisterPairHigh<Register>())); + } else { + UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location; + } +} + void CodeGeneratorARM::InvokeRuntime(QuickEntrypointEnum entrypoint, HInstruction* instruction, uint32_t dex_pc, @@ -3554,6 +3633,74 @@ void InstructionCodeGeneratorARM::VisitStaticFieldSet(HStaticFieldSet* instructi HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); } +void LocationsBuilderARM::VisitUnresolvedInstanceFieldGet( + HUnresolvedInstanceFieldGet* instruction) { + FieldAccessCallingConventionARM calling_convention; + codegen_->CreateUnresolvedFieldLocationSummary( + instruction, instruction->GetFieldType(), calling_convention); +} + +void InstructionCodeGeneratorARM::VisitUnresolvedInstanceFieldGet( + HUnresolvedInstanceFieldGet* instruction) { + FieldAccessCallingConventionARM calling_convention; + codegen_->GenerateUnresolvedFieldAccess(instruction, + instruction->GetFieldType(), + instruction->GetFieldIndex(), + instruction->GetDexPc(), + calling_convention); +} + +void LocationsBuilderARM::VisitUnresolvedInstanceFieldSet( + HUnresolvedInstanceFieldSet* instruction) { + FieldAccessCallingConventionARM calling_convention; + codegen_->CreateUnresolvedFieldLocationSummary( + instruction, instruction->GetFieldType(), calling_convention); +} + +void InstructionCodeGeneratorARM::VisitUnresolvedInstanceFieldSet( + HUnresolvedInstanceFieldSet* instruction) { + FieldAccessCallingConventionARM calling_convention; + codegen_->GenerateUnresolvedFieldAccess(instruction, + instruction->GetFieldType(), + instruction->GetFieldIndex(), + instruction->GetDexPc(), + calling_convention); +} + +void LocationsBuilderARM::VisitUnresolvedStaticFieldGet( + HUnresolvedStaticFieldGet* instruction) { + FieldAccessCallingConventionARM calling_convention; + codegen_->CreateUnresolvedFieldLocationSummary( + instruction, instruction->GetFieldType(), calling_convention); +} + +void InstructionCodeGeneratorARM::VisitUnresolvedStaticFieldGet( + HUnresolvedStaticFieldGet* instruction) { + FieldAccessCallingConventionARM calling_convention; + codegen_->GenerateUnresolvedFieldAccess(instruction, + instruction->GetFieldType(), + instruction->GetFieldIndex(), + instruction->GetDexPc(), + calling_convention); +} + +void LocationsBuilderARM::VisitUnresolvedStaticFieldSet( + HUnresolvedStaticFieldSet* instruction) { + FieldAccessCallingConventionARM calling_convention; + codegen_->CreateUnresolvedFieldLocationSummary( + instruction, instruction->GetFieldType(), calling_convention); +} + +void InstructionCodeGeneratorARM::VisitUnresolvedStaticFieldSet( + HUnresolvedStaticFieldSet* instruction) { + FieldAccessCallingConventionARM calling_convention; + codegen_->GenerateUnresolvedFieldAccess(instruction, + instruction->GetFieldType(), + instruction->GetFieldIndex(), + instruction->GetDexPc(), + calling_convention); +} + void LocationsBuilderARM::VisitNullCheck(HNullCheck* instruction) { LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock() ? LocationSummary::kCallOnSlowPath @@ -3744,38 +3891,32 @@ void LocationsBuilderARM::VisitArraySet(HArraySet* instruction) { bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); - bool needs_runtime_call = instruction->NeedsTypeCheck(); + bool may_need_runtime_call = instruction->NeedsTypeCheck(); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( - instruction, needs_runtime_call ? LocationSummary::kCall : LocationSummary::kNoCall); - if (needs_runtime_call) { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); - locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); + instruction, + may_need_runtime_call ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + if (Primitive::IsFloatingPointType(value_type)) { + locations->SetInAt(2, Location::RequiresFpuRegister()); } else { - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); - if (Primitive::IsFloatingPointType(value_type)) { - locations->SetInAt(2, Location::RequiresFpuRegister()); - } else { - locations->SetInAt(2, Location::RequiresRegister()); - } + locations->SetInAt(2, Location::RequiresRegister()); + } - if (needs_write_barrier) { - // Temporary registers for the write barrier. - locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too. - locations->AddTemp(Location::RequiresRegister()); - } + if (needs_write_barrier) { + // Temporary registers for the write barrier. + locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too. + locations->AddTemp(Location::RequiresRegister()); } } void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).AsRegister<Register>(); + Register array = locations->InAt(0).AsRegister<Register>(); Location index = locations->InAt(1); Primitive::Type value_type = instruction->GetComponentType(); - bool needs_runtime_call = locations->WillCall(); + bool may_need_runtime_call = locations->CanCall(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); @@ -3787,9 +3928,9 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset; - __ StoreToOffset(kStoreByte, value, obj, offset); + __ StoreToOffset(kStoreByte, value, array, offset); } else { - __ add(IP, obj, ShifterOperand(index.AsRegister<Register>())); + __ add(IP, array, ShifterOperand(index.AsRegister<Register>())); __ StoreToOffset(kStoreByte, value, IP, data_offset); } break; @@ -3802,55 +3943,133 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset; - __ StoreToOffset(kStoreHalfword, value, obj, offset); + __ StoreToOffset(kStoreHalfword, value, array, offset); } else { - __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_2)); + __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_2)); __ StoreToOffset(kStoreHalfword, value, IP, data_offset); } break; } - case Primitive::kPrimInt: case Primitive::kPrimNot: { - if (!needs_runtime_call) { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); - Register value = locations->InAt(2).AsRegister<Register>(); - Register source = value; - if (kPoisonHeapReferences && needs_write_barrier) { - // Note that in the case where `value` is a null reference, - // we do not enter this block, as a null reference does not - // need poisoning. - DCHECK_EQ(value_type, Primitive::kPrimNot); - Register temp = locations->GetTemp(0).AsRegister<Register>(); - __ Mov(temp, value); - __ PoisonHeapReference(temp); - source = temp; - } + uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); + Register value = locations->InAt(2).AsRegister<Register>(); + Register source = value; + + if (instruction->InputAt(2)->IsNullConstant()) { + // Just setting null. if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; - __ StoreToOffset(kStoreWord, source, obj, offset); + __ StoreToOffset(kStoreWord, source, array, offset); } else { DCHECK(index.IsRegister()) << index; - __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4)); + __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4)); __ StoreToOffset(kStoreWord, source, IP, data_offset); } + break; + } + + DCHECK(needs_write_barrier); + Register temp1 = locations->GetTemp(0).AsRegister<Register>(); + Register temp2 = locations->GetTemp(1).AsRegister<Register>(); + uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); + uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); + uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); + Label done; + SlowPathCode* slow_path = nullptr; + + if (may_need_runtime_call) { + slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathARM(instruction); + codegen_->AddSlowPath(slow_path); + if (instruction->GetValueCanBeNull()) { + Label non_zero; + __ CompareAndBranchIfNonZero(value, &non_zero); + if (index.IsConstant()) { + size_t offset = + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; + __ StoreToOffset(kStoreWord, value, array, offset); + } else { + DCHECK(index.IsRegister()) << index; + __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4)); + __ StoreToOffset(kStoreWord, value, IP, data_offset); + } + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ b(&done); + __ Bind(&non_zero); + } + + __ LoadFromOffset(kLoadWord, temp1, array, class_offset); codegen_->MaybeRecordImplicitNullCheck(instruction); - if (needs_write_barrier) { - DCHECK_EQ(value_type, Primitive::kPrimNot); - Register temp = locations->GetTemp(0).AsRegister<Register>(); - Register card = locations->GetTemp(1).AsRegister<Register>(); - codegen_->MarkGCCard(temp, card, obj, value, instruction->GetValueCanBeNull()); + __ MaybeUnpoisonHeapReference(temp1); + __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset); + __ LoadFromOffset(kLoadWord, temp2, value, class_offset); + // No need to poison/unpoison, we're comparing two poisoined references. + __ cmp(temp1, ShifterOperand(temp2)); + if (instruction->StaticTypeOfArrayIsObjectArray()) { + Label do_put; + __ b(&do_put, EQ); + __ MaybeUnpoisonHeapReference(temp1); + __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset); + // No need to poison/unpoison, we're comparing against null. + __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel()); + __ Bind(&do_put); + } else { + __ b(slow_path->GetEntryLabel(), NE); } - } else { + } + + if (kPoisonHeapReferences) { + // Note that in the case where `value` is a null reference, + // we do not enter this block, as a null reference does not + // need poisoning. DCHECK_EQ(value_type, Primitive::kPrimNot); - // Note: if heap poisoning is enabled, pAputObject takes cares - // of poisoning the reference. - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject), - instruction, - instruction->GetDexPc(), - nullptr); + __ Mov(temp1, value); + __ PoisonHeapReference(temp1); + source = temp1; + } + + if (index.IsConstant()) { + size_t offset = + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; + __ StoreToOffset(kStoreWord, source, array, offset); + } else { + DCHECK(index.IsRegister()) << index; + __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4)); + __ StoreToOffset(kStoreWord, source, IP, data_offset); } + + if (!may_need_runtime_call) { + codegen_->MaybeRecordImplicitNullCheck(instruction); + } + + codegen_->MarkGCCard(temp1, temp2, array, value, instruction->GetValueCanBeNull()); + + if (done.IsLinked()) { + __ Bind(&done); + } + + if (slow_path != nullptr) { + __ Bind(slow_path->GetExitLabel()); + } + + break; + } + + case Primitive::kPrimInt: { + uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); + Register value = locations->InAt(2).AsRegister<Register>(); + if (index.IsConstant()) { + size_t offset = + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; + __ StoreToOffset(kStoreWord, value, array, offset); + } else { + DCHECK(index.IsRegister()) << index; + __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4)); + __ StoreToOffset(kStoreWord, value, IP, data_offset); + } + + codegen_->MaybeRecordImplicitNullCheck(instruction); break; } @@ -3860,9 +4079,9 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; - __ StoreToOffset(kStoreWordPair, value.AsRegisterPairLow<Register>(), obj, offset); + __ StoreToOffset(kStoreWordPair, value.AsRegisterPairLow<Register>(), array, offset); } else { - __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_8)); + __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_8)); __ StoreToOffset(kStoreWordPair, value.AsRegisterPairLow<Register>(), IP, data_offset); } break; @@ -3874,9 +4093,9 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { DCHECK(value.IsFpuRegister()); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; - __ StoreSToOffset(value.AsFpuRegister<SRegister>(), obj, offset); + __ StoreSToOffset(value.AsFpuRegister<SRegister>(), array, offset); } else { - __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4)); + __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4)); __ StoreSToOffset(value.AsFpuRegister<SRegister>(), IP, data_offset); } break; @@ -3888,9 +4107,9 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { DCHECK(value.IsFpuRegisterPair()); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; - __ StoreDToOffset(FromLowSToD(value.AsFpuRegisterPairLow<SRegister>()), obj, offset); + __ StoreDToOffset(FromLowSToD(value.AsFpuRegisterPairLow<SRegister>()), array, offset); } else { - __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_8)); + __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_8)); __ StoreDToOffset(FromLowSToD(value.AsFpuRegisterPairLow<SRegister>()), IP, data_offset); } @@ -4034,7 +4253,8 @@ ArmAssembler* ParallelMoveResolverARM::GetAssembler() const { } void ParallelMoveResolverARM::EmitMove(size_t index) { - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; Location source = move->GetSource(); Location destination = move->GetDestination(); @@ -4166,7 +4386,8 @@ void ParallelMoveResolverARM::Exchange(int mem1, int mem2) { } void ParallelMoveResolverARM::EmitSwap(size_t index) { - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; Location source = move->GetSource(); Location destination = move->GetDestination(); @@ -4247,20 +4468,24 @@ void ParallelMoveResolverARM::RestoreScratch(int reg) { } void LocationsBuilderARM::VisitLoadClass(HLoadClass* cls) { - LocationSummary::CallKind call_kind = cls->CanCallRuntime() - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall; - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(cls, call_kind); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister()); + InvokeRuntimeCallingConvention calling_convention; + CodeGenerator::CreateLoadClassLocationSummary( + cls, + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + Location::RegisterLocation(R0)); } void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) { LocationSummary* locations = cls->GetLocations(); Register out = locations->Out().AsRegister<Register>(); Register current_method = locations->InAt(0).AsRegister<Register>(); - if (cls->IsReferrersClass()) { + if (cls->NeedsAccessCheck()) { + codegen_->MoveConstant(locations->GetTemp(0), cls->GetTypeIndex()); + codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInitializeTypeAndVerifyAccess), + cls, + cls->GetDexPc(), + nullptr); + } else if (cls->IsReferrersClass()) { DCHECK(!cls->CanCallRuntime()); DCHECK(!cls->MustGenerateClinitCheck()); __ LoadFromOffset( @@ -4383,6 +4608,7 @@ void LocationsBuilderARM::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kArrayObjectCheck: call_kind = LocationSummary::kNoCall; break; + case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCall; break; @@ -4423,10 +4649,11 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { __ CompareAndBranchIfZero(obj, &zero); } - // In case of an interface check, we put the object class into the object register. + // In case of an interface/unresolved check, we put the object class into the object register. // This is safe, as the register is caller-save, and the object must be in another // register if it survives the runtime call. - Register target = (instruction->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) + Register target = (instruction->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) || + (instruction->GetTypeCheckKind() == TypeCheckKind::kUnresolvedCheck) ? obj : out; __ LoadFromOffset(kLoadWord, target, obj, class_offset); @@ -4507,7 +4734,7 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { } break; } - + case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: default: { codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial), @@ -4548,6 +4775,7 @@ void LocationsBuilderARM::VisitCheckCast(HCheckCast* instruction) { ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; break; + case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCall; break; @@ -4652,6 +4880,7 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { __ CompareAndBranchIfNonZero(temp, slow_path->GetEntryLabel()); break; } + case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: default: codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 1d98789213..16d1d383b4 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -96,6 +96,38 @@ class InvokeDexCallingConventionVisitorARM : public InvokeDexCallingConventionVi DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorARM); }; +class FieldAccessCallingConventionARM : public FieldAccessCallingConvention { + public: + FieldAccessCallingConventionARM() {} + + Location GetObjectLocation() const OVERRIDE { + return Location::RegisterLocation(R1); + } + Location GetFieldIndexLocation() const OVERRIDE { + return Location::RegisterLocation(R0); + } + Location GetReturnLocation(Primitive::Type type) const OVERRIDE { + return Primitive::Is64BitType(type) + ? Location::RegisterPairLocation(R0, R1) + : Location::RegisterLocation(R0); + } + Location GetSetValueLocation(Primitive::Type type, bool is_instance) const OVERRIDE { + return Primitive::Is64BitType(type) + ? Location::RegisterPairLocation(R2, R3) + : (is_instance + ? Location::RegisterLocation(R2) + : Location::RegisterLocation(R1)); + } + Location GetFpuLocation(Primitive::Type type) const OVERRIDE { + return Primitive::Is64BitType(type) + ? Location::FpuRegisterPairLocation(S0, S1) + : Location::FpuRegisterLocation(S0); + } + + private: + DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConventionARM); +}; + class ParallelMoveResolverARM : public ParallelMoveResolverWithSwap { public: ParallelMoveResolverARM(ArenaAllocator* allocator, CodeGeneratorARM* codegen) @@ -225,6 +257,9 @@ class CodeGeneratorARM : public CodeGenerator { void Bind(HBasicBlock* block) OVERRIDE; void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE; void MoveConstant(Location destination, int32_t value) OVERRIDE; + void MoveLocation(Location dst, Location src, Primitive::Type dst_type) OVERRIDE; + void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE; + size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; @@ -299,11 +334,11 @@ class CodeGeneratorARM : public CodeGenerator { void MarkGCCard(Register temp, Register card, Register object, Register value, bool can_be_null); Label* GetLabelOf(HBasicBlock* block) const { - return CommonGetLabelOf<Label>(block_labels_.GetRawStorage(), block); + return CommonGetLabelOf<Label>(block_labels_, block); } void Initialize() OVERRIDE { - block_labels_.SetSize(GetGraph()->GetBlocks().size()); + block_labels_ = CommonInitializeLabels<Label>(); } void Finalize(CodeAllocator* allocator) OVERRIDE; @@ -335,7 +370,7 @@ class CodeGeneratorARM : public CodeGenerator { Literal* DeduplicateMethodCodeLiteral(MethodReference target_method); // Labels for each block that will be compiled. - GrowableArray<Label> block_labels_; + Label* block_labels_; // Indexed by block id. Label frame_entry_label_; LocationsBuilderARM location_builder_; InstructionCodeGeneratorARM instruction_visitor_; diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index d175532f4c..07758e9df7 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -19,7 +19,6 @@ #include "arch/arm64/instruction_set_features_arm64.h" #include "art_method.h" #include "code_generator_utils.h" -#include "common_arm64.h" #include "compiled_method.h" #include "entrypoints/quick/quick_entrypoints.h" #include "entrypoints/quick/quick_entrypoints_enum.h" @@ -477,7 +476,7 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 { class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 { public: explicit DeoptimizationSlowPathARM64(HInstruction* instruction) - : instruction_(instruction) {} + : instruction_(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { __ Bind(GetEntryLabel()); @@ -496,6 +495,52 @@ class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 { DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM64); }; +class ArraySetSlowPathARM64 : public SlowPathCodeARM64 { + public: + explicit ArraySetSlowPathARM64(HInstruction* instruction) : instruction_(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConvention calling_convention; + HParallelMove parallel_move(codegen->GetGraph()->GetArena()); + parallel_move.AddMove( + locations->InAt(0), + LocationFrom(calling_convention.GetRegisterAt(0)), + Primitive::kPrimNot, + nullptr); + parallel_move.AddMove( + locations->InAt(1), + LocationFrom(calling_convention.GetRegisterAt(1)), + Primitive::kPrimInt, + nullptr); + parallel_move.AddMove( + locations->InAt(2), + LocationFrom(calling_convention.GetRegisterAt(2)), + Primitive::kPrimNot, + nullptr); + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + + CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); + arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject), + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); + RestoreLiveRegisters(codegen, locations); + __ B(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathARM64"; } + + private: + HInstruction* const instruction_; + + DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM64); +}; + #undef __ Location InvokeDexCallingConventionVisitorARM64::GetNextLocation(Primitive::Type type) { @@ -542,11 +587,14 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, instruction_visitor_(graph, this), move_resolver_(graph->GetArena(), this), isa_features_(isa_features), - uint64_literals_(std::less<uint64_t>(), graph->GetArena()->Adapter()), - method_patches_(MethodReferenceComparator(), graph->GetArena()->Adapter()), - call_patches_(MethodReferenceComparator(), graph->GetArena()->Adapter()), - relative_call_patches_(graph->GetArena()->Adapter()), - pc_rel_dex_cache_patches_(graph->GetArena()->Adapter()) { + uint64_literals_(std::less<uint64_t>(), + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + method_patches_(MethodReferenceComparator(), + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + call_patches_(MethodReferenceComparator(), + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_rel_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { // Save the link register (containing the return address) to mimic Quick. AddAllocatedRegister(LocationFrom(lr)); } @@ -610,8 +658,9 @@ void ParallelMoveResolverARM64::FreeScratchLocation(Location loc) { } void ParallelMoveResolverARM64::EmitMove(size_t index) { - MoveOperands* move = moves_.Get(index); - codegen_->MoveLocation(move->GetDestination(), move->GetSource()); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; + codegen_->MoveLocation(move->GetDestination(), move->GetSource(), Primitive::kPrimVoid); } void CodeGeneratorARM64::GenerateFrameEntry() { @@ -695,7 +744,9 @@ void CodeGeneratorARM64::Move(HInstruction* instruction, } if (instruction->IsCurrentMethod()) { - MoveLocation(location, Location::DoubleStackSlot(kCurrentMethodStackOffset)); + MoveLocation(location, + Location::DoubleStackSlot(kCurrentMethodStackOffset), + Primitive::kPrimVoid); } else if (locations != nullptr && locations->Out().Equals(location)) { return; } else if (instruction->IsIntConstant() @@ -738,6 +789,14 @@ void CodeGeneratorARM64::MoveConstant(Location location, int32_t value) { __ Mov(RegisterFrom(location, Primitive::kPrimInt), value); } +void CodeGeneratorARM64::AddLocationAsTemp(Location location, LocationSummary* locations) { + if (location.IsRegister()) { + locations->AddTemp(location); + } else { + UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location; + } +} + Location CodeGeneratorARM64::GetStackLocation(HLoadLocal* load) const { Primitive::Type type = load->GetType(); @@ -805,7 +864,12 @@ void CodeGeneratorARM64::SetupBlockedRegisters(bool is_baseline) const { while (!reserved_core_baseline_registers.IsEmpty()) { blocked_core_registers_[reserved_core_baseline_registers.PopLowestIndex().code()] = true; } + } + if (is_baseline || GetGraph()->IsDebuggable()) { + // Stubs do not save callee-save floating point registers. If the graph + // is debuggable, we need to deal with these registers differently. For + // now, just block them. CPURegList reserved_fp_baseline_registers = callee_saved_fp_registers; while (!reserved_fp_baseline_registers.IsEmpty()) { blocked_fpu_registers_[reserved_fp_baseline_registers.PopLowestIndex().code()] = true; @@ -888,7 +952,9 @@ static bool CoherentConstantAndType(Location constant, Primitive::Type type) { (cst->IsDoubleConstant() && type == Primitive::kPrimDouble); } -void CodeGeneratorARM64::MoveLocation(Location destination, Location source, Primitive::Type type) { +void CodeGeneratorARM64::MoveLocation(Location destination, + Location source, + Primitive::Type dst_type) { if (source.Equals(destination)) { return; } @@ -897,7 +963,7 @@ void CodeGeneratorARM64::MoveLocation(Location destination, Location source, Pri // locations. When moving from and to a register, the argument type can be // used to generate 32bit instead of 64bit moves. In debug mode we also // checks the coherency of the locations and the type. - bool unspecified_type = (type == Primitive::kPrimVoid); + bool unspecified_type = (dst_type == Primitive::kPrimVoid); if (destination.IsRegister() || destination.IsFpuRegister()) { if (unspecified_type) { @@ -907,30 +973,44 @@ void CodeGeneratorARM64::MoveLocation(Location destination, Location source, Pri || src_cst->IsFloatConstant() || src_cst->IsNullConstant()))) { // For stack slots and 32bit constants, a 64bit type is appropriate. - type = destination.IsRegister() ? Primitive::kPrimInt : Primitive::kPrimFloat; + dst_type = destination.IsRegister() ? Primitive::kPrimInt : Primitive::kPrimFloat; } else { // If the source is a double stack slot or a 64bit constant, a 64bit // type is appropriate. Else the source is a register, and since the // type has not been specified, we chose a 64bit type to force a 64bit // move. - type = destination.IsRegister() ? Primitive::kPrimLong : Primitive::kPrimDouble; + dst_type = destination.IsRegister() ? Primitive::kPrimLong : Primitive::kPrimDouble; } } - DCHECK((destination.IsFpuRegister() && Primitive::IsFloatingPointType(type)) || - (destination.IsRegister() && !Primitive::IsFloatingPointType(type))); - CPURegister dst = CPURegisterFrom(destination, type); + DCHECK((destination.IsFpuRegister() && Primitive::IsFloatingPointType(dst_type)) || + (destination.IsRegister() && !Primitive::IsFloatingPointType(dst_type))); + CPURegister dst = CPURegisterFrom(destination, dst_type); if (source.IsStackSlot() || source.IsDoubleStackSlot()) { DCHECK(dst.Is64Bits() == source.IsDoubleStackSlot()); __ Ldr(dst, StackOperandFrom(source)); } else if (source.IsConstant()) { - DCHECK(CoherentConstantAndType(source, type)); + DCHECK(CoherentConstantAndType(source, dst_type)); MoveConstant(dst, source.GetConstant()); + } else if (source.IsRegister()) { + if (destination.IsRegister()) { + __ Mov(Register(dst), RegisterFrom(source, dst_type)); + } else { + DCHECK(destination.IsFpuRegister()); + Primitive::Type source_type = Primitive::Is64BitType(dst_type) + ? Primitive::kPrimLong + : Primitive::kPrimInt; + __ Fmov(FPRegisterFrom(destination, dst_type), RegisterFrom(source, source_type)); + } } else { + DCHECK(source.IsFpuRegister()); if (destination.IsRegister()) { - __ Mov(Register(dst), RegisterFrom(source, type)); + Primitive::Type source_type = Primitive::Is64BitType(dst_type) + ? Primitive::kPrimDouble + : Primitive::kPrimFloat; + __ Fmov(RegisterFrom(destination, dst_type), FPRegisterFrom(source, source_type)); } else { DCHECK(destination.IsFpuRegister()); - __ Fmov(FPRegister(dst), FPRegisterFrom(source, type)); + __ Fmov(FPRegister(dst), FPRegisterFrom(source, dst_type)); } } } else { // The destination is not a register. It must be a stack slot. @@ -938,16 +1018,17 @@ void CodeGeneratorARM64::MoveLocation(Location destination, Location source, Pri if (source.IsRegister() || source.IsFpuRegister()) { if (unspecified_type) { if (source.IsRegister()) { - type = destination.IsStackSlot() ? Primitive::kPrimInt : Primitive::kPrimLong; + dst_type = destination.IsStackSlot() ? Primitive::kPrimInt : Primitive::kPrimLong; } else { - type = destination.IsStackSlot() ? Primitive::kPrimFloat : Primitive::kPrimDouble; + dst_type = destination.IsStackSlot() ? Primitive::kPrimFloat : Primitive::kPrimDouble; } } - DCHECK((destination.IsDoubleStackSlot() == Primitive::Is64BitType(type)) && - (source.IsFpuRegister() == Primitive::IsFloatingPointType(type))); - __ Str(CPURegisterFrom(source, type), StackOperandFrom(destination)); + DCHECK((destination.IsDoubleStackSlot() == Primitive::Is64BitType(dst_type)) && + (source.IsFpuRegister() == Primitive::IsFloatingPointType(dst_type))); + __ Str(CPURegisterFrom(source, dst_type), StackOperandFrom(destination)); } else if (source.IsConstant()) { - DCHECK(unspecified_type || CoherentConstantAndType(source, type)) << source << " " << type; + DCHECK(unspecified_type || CoherentConstantAndType(source, dst_type)) + << source << " " << dst_type; UseScratchRegisterScope temps(GetVIXLAssembler()); HConstant* src_cst = source.GetConstant(); CPURegister temp; @@ -1551,76 +1632,136 @@ void InstructionCodeGeneratorARM64::VisitArrayLength(HArrayLength* instruction) } void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) { - if (instruction->NeedsTypeCheck()) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); - locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); - locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2))); + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( + instruction, + instruction->NeedsTypeCheck() ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + if (Primitive::IsFloatingPointType(instruction->InputAt(2)->GetType())) { + locations->SetInAt(2, Location::RequiresFpuRegister()); } else { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); - if (Primitive::IsFloatingPointType(instruction->InputAt(2)->GetType())) { - locations->SetInAt(2, Location::RequiresFpuRegister()); - } else { - locations->SetInAt(2, Location::RequiresRegister()); - } + locations->SetInAt(2, Location::RequiresRegister()); } } void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { Primitive::Type value_type = instruction->GetComponentType(); LocationSummary* locations = instruction->GetLocations(); - bool needs_runtime_call = locations->WillCall(); + bool may_need_runtime_call = locations->CanCall(); + bool needs_write_barrier = + CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); - if (needs_runtime_call) { - // Note: if heap poisoning is enabled, pAputObject takes cares - // of poisoning the reference. - codegen_->InvokeRuntime( - QUICK_ENTRY_POINT(pAputObject), instruction, instruction->GetDexPc(), nullptr); - CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); + Register array = InputRegisterAt(instruction, 0); + CPURegister value = InputCPURegisterAt(instruction, 2); + CPURegister source = value; + Location index = locations->InAt(1); + size_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(value_type)).Uint32Value(); + MemOperand destination = HeapOperand(array); + MacroAssembler* masm = GetVIXLAssembler(); + BlockPoolsScope block_pools(masm); + + if (!needs_write_barrier) { + DCHECK(!may_need_runtime_call); + if (index.IsConstant()) { + offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(value_type); + destination = HeapOperand(array, offset); + } else { + UseScratchRegisterScope temps(masm); + Register temp = temps.AcquireSameSizeAs(array); + __ Add(temp, array, offset); + destination = HeapOperand(temp, + XRegisterFrom(index), + LSL, + Primitive::ComponentSizeShift(value_type)); + } + codegen_->Store(value_type, value, destination); + codegen_->MaybeRecordImplicitNullCheck(instruction); } else { - Register obj = InputRegisterAt(instruction, 0); - CPURegister value = InputCPURegisterAt(instruction, 2); - CPURegister source = value; - Location index = locations->InAt(1); - size_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(value_type)).Uint32Value(); - MemOperand destination = HeapOperand(obj); - MacroAssembler* masm = GetVIXLAssembler(); - BlockPoolsScope block_pools(masm); + DCHECK(needs_write_barrier); + vixl::Label done; + SlowPathCodeARM64* slow_path = nullptr; { // We use a block to end the scratch scope before the write barrier, thus // freeing the temporary registers so they can be used in `MarkGCCard`. UseScratchRegisterScope temps(masm); - - if (kPoisonHeapReferences && value_type == Primitive::kPrimNot) { - DCHECK(value.IsW()); - Register temp = temps.AcquireW(); - __ Mov(temp, value.W()); - GetAssembler()->PoisonHeapReference(temp.W()); - source = temp; - } - + Register temp = temps.AcquireSameSizeAs(array); if (index.IsConstant()) { offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(value_type); - destination = HeapOperand(obj, offset); + destination = HeapOperand(array, offset); } else { - Register temp = temps.AcquireSameSizeAs(obj); - __ Add(temp, obj, offset); destination = HeapOperand(temp, XRegisterFrom(index), LSL, Primitive::ComponentSizeShift(value_type)); } - codegen_->Store(value_type, source, destination); - codegen_->MaybeRecordImplicitNullCheck(instruction); + uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); + uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); + uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); + + if (may_need_runtime_call) { + slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathARM64(instruction); + codegen_->AddSlowPath(slow_path); + if (instruction->GetValueCanBeNull()) { + vixl::Label non_zero; + __ Cbnz(Register(value), &non_zero); + if (!index.IsConstant()) { + __ Add(temp, array, offset); + } + __ Str(wzr, destination); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ B(&done); + __ Bind(&non_zero); + } + + Register temp2 = temps.AcquireSameSizeAs(array); + __ Ldr(temp, HeapOperand(array, class_offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + GetAssembler()->MaybeUnpoisonHeapReference(temp); + __ Ldr(temp, HeapOperand(temp, component_offset)); + __ Ldr(temp2, HeapOperand(Register(value), class_offset)); + // No need to poison/unpoison, we're comparing two poisoned references. + __ Cmp(temp, temp2); + if (instruction->StaticTypeOfArrayIsObjectArray()) { + vixl::Label do_put; + __ B(eq, &do_put); + GetAssembler()->MaybeUnpoisonHeapReference(temp); + __ Ldr(temp, HeapOperand(temp, super_offset)); + // No need to unpoison, we're comparing against null. + __ Cbnz(temp, slow_path->GetEntryLabel()); + __ Bind(&do_put); + } else { + __ B(ne, slow_path->GetEntryLabel()); + } + temps.Release(temp2); + } + + if (kPoisonHeapReferences) { + Register temp2 = temps.AcquireSameSizeAs(array); + DCHECK(value.IsW()); + __ Mov(temp2, value.W()); + GetAssembler()->PoisonHeapReference(temp2); + source = temp2; + } + + if (!index.IsConstant()) { + __ Add(temp, array, offset); + } + __ Str(source, destination); + + if (!may_need_runtime_call) { + codegen_->MaybeRecordImplicitNullCheck(instruction); + } } - if (CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue())) { - codegen_->MarkGCCard(obj, value.W(), instruction->GetValueCanBeNull()); + + codegen_->MarkGCCard(array, value.W(), instruction->GetValueCanBeNull()); + + if (done.IsLinked()) { + __ Bind(&done); + } + + if (slow_path != nullptr) { + __ Bind(slow_path->GetExitLabel()); } } } @@ -2247,6 +2388,7 @@ void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kArrayObjectCheck: call_kind = LocationSummary::kNoCall; break; + case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCall; break; @@ -2288,10 +2430,11 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { __ Cbz(obj, &zero); } - // In case of an interface check, we put the object class into the object register. + // In case of an interface/unresolved check, we put the object class into the object register. // This is safe, as the register is caller-save, and the object must be in another // register if it survives the runtime call. - Register target = (instruction->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) + Register target = (instruction->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) || + (instruction->GetTypeCheckKind() == TypeCheckKind::kUnresolvedCheck) ? obj : out; __ Ldr(target, HeapOperand(obj.W(), class_offset)); @@ -2372,7 +2515,7 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { } break; } - + case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: default: { codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial), @@ -2413,6 +2556,7 @@ void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) { ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; break; + case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCall; break; @@ -2518,6 +2662,7 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { __ Cbnz(temp, slow_path->GetEntryLabel()); break; } + case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: default: codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), @@ -2873,17 +3018,23 @@ void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) { } void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) { - LocationSummary::CallKind call_kind = cls->CanCallRuntime() ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall; - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister()); + InvokeRuntimeCallingConvention calling_convention; + CodeGenerator::CreateLoadClassLocationSummary( + cls, + LocationFrom(calling_convention.GetRegisterAt(0)), + LocationFrom(vixl::x0)); } void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) { Register out = OutputRegister(cls); Register current_method = InputRegisterAt(cls, 0); - if (cls->IsReferrersClass()) { + if (cls->NeedsAccessCheck()) { + codegen_->MoveConstant(cls->GetLocations()->GetTemp(0), cls->GetTypeIndex()); + codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInitializeTypeAndVerifyAccess), + cls, + cls->GetDexPc(), + nullptr); + } else if (cls->IsReferrersClass()) { DCHECK(!cls->CanCallRuntime()); DCHECK(!cls->MustGenerateClinitCheck()); __ Ldr(out, MemOperand(current_method, ArtMethod::DeclaringClassOffset().Int32Value())); @@ -3393,6 +3544,74 @@ void InstructionCodeGeneratorARM64::VisitStaticFieldSet(HStaticFieldSet* instruc HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); } +void LocationsBuilderARM64::VisitUnresolvedInstanceFieldGet( + HUnresolvedInstanceFieldGet* instruction) { + FieldAccessCallingConventionARM64 calling_convention; + codegen_->CreateUnresolvedFieldLocationSummary( + instruction, instruction->GetFieldType(), calling_convention); +} + +void InstructionCodeGeneratorARM64::VisitUnresolvedInstanceFieldGet( + HUnresolvedInstanceFieldGet* instruction) { + FieldAccessCallingConventionARM64 calling_convention; + codegen_->GenerateUnresolvedFieldAccess(instruction, + instruction->GetFieldType(), + instruction->GetFieldIndex(), + instruction->GetDexPc(), + calling_convention); +} + +void LocationsBuilderARM64::VisitUnresolvedInstanceFieldSet( + HUnresolvedInstanceFieldSet* instruction) { + FieldAccessCallingConventionARM64 calling_convention; + codegen_->CreateUnresolvedFieldLocationSummary( + instruction, instruction->GetFieldType(), calling_convention); +} + +void InstructionCodeGeneratorARM64::VisitUnresolvedInstanceFieldSet( + HUnresolvedInstanceFieldSet* instruction) { + FieldAccessCallingConventionARM64 calling_convention; + codegen_->GenerateUnresolvedFieldAccess(instruction, + instruction->GetFieldType(), + instruction->GetFieldIndex(), + instruction->GetDexPc(), + calling_convention); +} + +void LocationsBuilderARM64::VisitUnresolvedStaticFieldGet( + HUnresolvedStaticFieldGet* instruction) { + FieldAccessCallingConventionARM64 calling_convention; + codegen_->CreateUnresolvedFieldLocationSummary( + instruction, instruction->GetFieldType(), calling_convention); +} + +void InstructionCodeGeneratorARM64::VisitUnresolvedStaticFieldGet( + HUnresolvedStaticFieldGet* instruction) { + FieldAccessCallingConventionARM64 calling_convention; + codegen_->GenerateUnresolvedFieldAccess(instruction, + instruction->GetFieldType(), + instruction->GetFieldIndex(), + instruction->GetDexPc(), + calling_convention); +} + +void LocationsBuilderARM64::VisitUnresolvedStaticFieldSet( + HUnresolvedStaticFieldSet* instruction) { + FieldAccessCallingConventionARM64 calling_convention; + codegen_->CreateUnresolvedFieldLocationSummary( + instruction, instruction->GetFieldType(), calling_convention); +} + +void InstructionCodeGeneratorARM64::VisitUnresolvedStaticFieldSet( + HUnresolvedStaticFieldSet* instruction) { + FieldAccessCallingConventionARM64 calling_convention; + codegen_->GenerateUnresolvedFieldAccess(instruction, + instruction->GetFieldType(), + instruction->GetFieldIndex(), + instruction->GetDexPc(), + calling_convention); +} + void LocationsBuilderARM64::VisitSuspendCheck(HSuspendCheck* instruction) { new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath); } diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 89671088c7..a068b48797 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -18,6 +18,7 @@ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM64_H_ #include "code_generator.h" +#include "common_arm64.h" #include "dex/compiler_enums.h" #include "driver/compiler_options.h" #include "nodes.h" @@ -141,6 +142,34 @@ class InvokeDexCallingConventionVisitorARM64 : public InvokeDexCallingConvention DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorARM64); }; +class FieldAccessCallingConventionARM64 : public FieldAccessCallingConvention { + public: + FieldAccessCallingConventionARM64() {} + + Location GetObjectLocation() const OVERRIDE { + return helpers::LocationFrom(vixl::x1); + } + Location GetFieldIndexLocation() const OVERRIDE { + return helpers::LocationFrom(vixl::x0); + } + Location GetReturnLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE { + return helpers::LocationFrom(vixl::x0); + } + Location GetSetValueLocation(Primitive::Type type, bool is_instance) const OVERRIDE { + return Primitive::Is64BitType(type) + ? helpers::LocationFrom(vixl::x2) + : (is_instance + ? helpers::LocationFrom(vixl::x2) + : helpers::LocationFrom(vixl::x1)); + } + Location GetFpuLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE { + return helpers::LocationFrom(vixl::d0); + } + + private: + DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConventionARM64); +}; + class InstructionCodeGeneratorARM64 : public HGraphVisitor { public: InstructionCodeGeneratorARM64(HGraph* graph, CodeGeneratorARM64* codegen); @@ -326,12 +355,7 @@ class CodeGeneratorARM64 : public CodeGenerator { } void Initialize() OVERRIDE { - HGraph* graph = GetGraph(); - int length = graph->GetBlocks().size(); - block_labels_ = graph->GetArena()->AllocArray<vixl::Label>(length); - for (int i = 0; i < length; ++i) { - new(block_labels_ + i) vixl::Label(); - } + block_labels_ = CommonInitializeLabels<vixl::Label>(); } void Finalize(CodeAllocator* allocator) OVERRIDE; @@ -339,10 +363,9 @@ class CodeGeneratorARM64 : public CodeGenerator { // Code generation helpers. void MoveConstant(vixl::CPURegister destination, HConstant* constant); void MoveConstant(Location destination, int32_t value) OVERRIDE; - // The type is optional. When specified it must be coherent with the - // locations, and is used for optimisation and debugging. - void MoveLocation(Location destination, Location source, - Primitive::Type type = Primitive::kPrimVoid); + void MoveLocation(Location dst, Location src, Primitive::Type dst_type) OVERRIDE; + void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE; + void Load(Primitive::Type type, vixl::CPURegister dst, const vixl::MemOperand& src); void Store(Primitive::Type type, vixl::CPURegister rt, const vixl::MemOperand& dst); void LoadAcquire(HInstruction* instruction, vixl::CPURegister dst, const vixl::MemOperand& src); @@ -400,7 +423,7 @@ class CodeGeneratorARM64 : public CodeGenerator { }; // Labels for each block that will be compiled. - vixl::Label* block_labels_; + vixl::Label* block_labels_; // Indexed by block id. vixl::Label frame_entry_label_; LocationsBuilderARM64 location_builder_; diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 8fdd56e0bc..00bb5053f2 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -20,7 +20,9 @@ #include "entrypoints/quick/quick_entrypoints_enum.h" #include "gc/accounting/card_table.h" #include "intrinsics.h" +#include "intrinsics_mips64.h" #include "art_method.h" +#include "code_generator_utils.h" #include "mirror/array-inl.h" #include "mirror/class-inl.h" #include "offsets.h" @@ -36,12 +38,8 @@ static constexpr int kCurrentMethodStackOffset = 0; static constexpr GpuRegister kMethodRegisterArgument = A0; // We need extra temporary/scratch registers (in addition to AT) in some cases. -static constexpr GpuRegister TMP = T8; static constexpr FpuRegister FTMP = F8; -// ART Thread Register. -static constexpr GpuRegister TR = S1; - Location Mips64ReturnLocation(Primitive::Type return_type) { switch (return_type) { case Primitive::kPrimBoolean: @@ -430,7 +428,7 @@ CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph, arraysize(kFpuCalleeSaves)), compiler_options, stats), - block_labels_(graph->GetArena(), 0), + block_labels_(nullptr), location_builder_(graph, this), instruction_visitor_(graph, this), move_resolver_(graph->GetArena(), this), @@ -452,12 +450,14 @@ Mips64Assembler* ParallelMoveResolverMIPS64::GetAssembler() const { } void ParallelMoveResolverMIPS64::EmitMove(size_t index) { - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; codegen_->MoveLocation(move->GetDestination(), move->GetSource(), move->GetType()); } void ParallelMoveResolverMIPS64::EmitSwap(size_t index) { - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; codegen_->SwapLocations(move->GetDestination(), move->GetSource(), move->GetType()); } @@ -617,7 +617,7 @@ void CodeGeneratorMIPS64::Bind(HBasicBlock* block) { void CodeGeneratorMIPS64::MoveLocation(Location destination, Location source, - Primitive::Type type) { + Primitive::Type dst_type) { if (source.Equals(destination)) { return; } @@ -625,7 +625,7 @@ void CodeGeneratorMIPS64::MoveLocation(Location destination, // A valid move can always be inferred from the destination and source // locations. When moving from and to a register, the argument type can be // used to generate 32bit instead of 64bit moves. - bool unspecified_type = (type == Primitive::kPrimVoid); + bool unspecified_type = (dst_type == Primitive::kPrimVoid); DCHECK_EQ(unspecified_type, false); if (destination.IsRegister() || destination.IsFpuRegister()) { @@ -636,21 +636,21 @@ void CodeGeneratorMIPS64::MoveLocation(Location destination, || src_cst->IsFloatConstant() || src_cst->IsNullConstant()))) { // For stack slots and 32bit constants, a 64bit type is appropriate. - type = destination.IsRegister() ? Primitive::kPrimInt : Primitive::kPrimFloat; + dst_type = destination.IsRegister() ? Primitive::kPrimInt : Primitive::kPrimFloat; } else { // If the source is a double stack slot or a 64bit constant, a 64bit // type is appropriate. Else the source is a register, and since the // type has not been specified, we chose a 64bit type to force a 64bit // move. - type = destination.IsRegister() ? Primitive::kPrimLong : Primitive::kPrimDouble; + dst_type = destination.IsRegister() ? Primitive::kPrimLong : Primitive::kPrimDouble; } } - DCHECK((destination.IsFpuRegister() && Primitive::IsFloatingPointType(type)) || - (destination.IsRegister() && !Primitive::IsFloatingPointType(type))); + DCHECK((destination.IsFpuRegister() && Primitive::IsFloatingPointType(dst_type)) || + (destination.IsRegister() && !Primitive::IsFloatingPointType(dst_type))); if (source.IsStackSlot() || source.IsDoubleStackSlot()) { // Move to GPR/FPR from stack LoadOperandType load_type = source.IsStackSlot() ? kLoadWord : kLoadDoubleword; - if (Primitive::IsFloatingPointType(type)) { + if (Primitive::IsFloatingPointType(dst_type)) { __ LoadFpuFromOffset(load_type, destination.AsFpuRegister<FpuRegister>(), SP, @@ -665,31 +665,47 @@ void CodeGeneratorMIPS64::MoveLocation(Location destination, } else if (source.IsConstant()) { // Move to GPR/FPR from constant GpuRegister gpr = AT; - if (!Primitive::IsFloatingPointType(type)) { + if (!Primitive::IsFloatingPointType(dst_type)) { gpr = destination.AsRegister<GpuRegister>(); } - if (type == Primitive::kPrimInt || type == Primitive::kPrimFloat) { + if (dst_type == Primitive::kPrimInt || dst_type == Primitive::kPrimFloat) { __ LoadConst32(gpr, GetInt32ValueOf(source.GetConstant()->AsConstant())); } else { __ LoadConst64(gpr, GetInt64ValueOf(source.GetConstant()->AsConstant())); } - if (type == Primitive::kPrimFloat) { + if (dst_type == Primitive::kPrimFloat) { __ Mtc1(gpr, destination.AsFpuRegister<FpuRegister>()); - } else if (type == Primitive::kPrimDouble) { + } else if (dst_type == Primitive::kPrimDouble) { __ Dmtc1(gpr, destination.AsFpuRegister<FpuRegister>()); } - } else { + } else if (source.IsRegister()) { if (destination.IsRegister()) { // Move to GPR from GPR __ Move(destination.AsRegister<GpuRegister>(), source.AsRegister<GpuRegister>()); } else { + DCHECK(destination.IsFpuRegister()); + if (Primitive::Is64BitType(dst_type)) { + __ Dmtc1(source.AsRegister<GpuRegister>(), destination.AsFpuRegister<FpuRegister>()); + } else { + __ Mtc1(source.AsRegister<GpuRegister>(), destination.AsFpuRegister<FpuRegister>()); + } + } + } else if (source.IsFpuRegister()) { + if (destination.IsFpuRegister()) { // Move to FPR from FPR - if (type == Primitive::kPrimFloat) { + if (dst_type == Primitive::kPrimFloat) { __ MovS(destination.AsFpuRegister<FpuRegister>(), source.AsFpuRegister<FpuRegister>()); } else { - DCHECK_EQ(type, Primitive::kPrimDouble); + DCHECK_EQ(dst_type, Primitive::kPrimDouble); __ MovD(destination.AsFpuRegister<FpuRegister>(), source.AsFpuRegister<FpuRegister>()); } + } else { + DCHECK(destination.IsRegister()); + if (Primitive::Is64BitType(dst_type)) { + __ Dmfc1(destination.AsRegister<GpuRegister>(), source.AsFpuRegister<FpuRegister>()); + } else { + __ Mfc1(destination.AsRegister<GpuRegister>(), source.AsFpuRegister<FpuRegister>()); + } } } } else { // The destination is not a register. It must be a stack slot. @@ -697,13 +713,13 @@ void CodeGeneratorMIPS64::MoveLocation(Location destination, if (source.IsRegister() || source.IsFpuRegister()) { if (unspecified_type) { if (source.IsRegister()) { - type = destination.IsStackSlot() ? Primitive::kPrimInt : Primitive::kPrimLong; + dst_type = destination.IsStackSlot() ? Primitive::kPrimInt : Primitive::kPrimLong; } else { - type = destination.IsStackSlot() ? Primitive::kPrimFloat : Primitive::kPrimDouble; + dst_type = destination.IsStackSlot() ? Primitive::kPrimFloat : Primitive::kPrimDouble; } } - DCHECK((destination.IsDoubleStackSlot() == Primitive::Is64BitType(type)) && - (source.IsFpuRegister() == Primitive::IsFloatingPointType(type))); + DCHECK((destination.IsDoubleStackSlot() == Primitive::Is64BitType(dst_type)) && + (source.IsFpuRegister() == Primitive::IsFloatingPointType(dst_type))); // Move to stack from GPR/FPR StoreOperandType store_type = destination.IsStackSlot() ? kStoreWord : kStoreDoubleword; if (source.IsRegister()) { @@ -861,6 +877,14 @@ void CodeGeneratorMIPS64::MoveConstant(Location location, int32_t value) { __ LoadConst32(location.AsRegister<GpuRegister>(), value); } +void CodeGeneratorMIPS64::AddLocationAsTemp(Location location, LocationSummary* locations) { + if (location.IsRegister()) { + locations->AddTemp(location); + } else { + UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location; + } +} + Location CodeGeneratorMIPS64::GetStackLocation(HLoadLocal* load) const { Primitive::Type type = load->GetType(); @@ -2395,7 +2419,11 @@ void InstructionCodeGeneratorMIPS64::VisitInvokeInterface(HInvokeInterface* invo } void LocationsBuilderMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) { - // TODO intrinsic function + IntrinsicLocationsBuilderMIPS64 intrinsic(codegen_); + if (intrinsic.TryDispatch(invoke)) { + return; + } + HandleInvoke(invoke); } @@ -2404,7 +2432,11 @@ void LocationsBuilderMIPS64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* in // invokes must have been pruned by art::PrepareForRegisterAllocation. DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck()); - // TODO - intrinsic function + IntrinsicLocationsBuilderMIPS64 intrinsic(codegen_); + if (intrinsic.TryDispatch(invoke)) { + return; + } + HandleInvoke(invoke); // While SetupBlockedRegisters() blocks registers S2-S8 due to their @@ -2419,10 +2451,10 @@ void LocationsBuilderMIPS64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* in } } -static bool TryGenerateIntrinsicCode(HInvoke* invoke, - CodeGeneratorMIPS64* codegen ATTRIBUTE_UNUSED) { +static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorMIPS64* codegen) { if (invoke->GetLocations()->Intrinsified()) { - // TODO - intrinsic function + IntrinsicCodeGeneratorMIPS64 intrinsic(codegen); + intrinsic.Dispatch(invoke); return true; } return false; @@ -2531,7 +2563,10 @@ void InstructionCodeGeneratorMIPS64::VisitInvokeStaticOrDirect(HInvokeStaticOrDi } void InstructionCodeGeneratorMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) { - // TODO: Try to generate intrinsics code. + if (TryGenerateIntrinsicCode(invoke, codegen_)) { + return; + } + LocationSummary* locations = invoke->GetLocations(); Location receiver = locations->InAt(0); GpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<GpuRegister>(); @@ -2555,18 +2590,24 @@ void InstructionCodeGeneratorMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) } void LocationsBuilderMIPS64::VisitLoadClass(HLoadClass* cls) { - LocationSummary::CallKind call_kind = cls->CanCallRuntime() ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall; - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister()); + InvokeRuntimeCallingConvention calling_convention; + CodeGenerator::CreateLoadClassLocationSummary( + cls, + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + Location::RegisterLocation(A0)); } void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) { LocationSummary* locations = cls->GetLocations(); GpuRegister out = locations->Out().AsRegister<GpuRegister>(); GpuRegister current_method = locations->InAt(0).AsRegister<GpuRegister>(); - if (cls->IsReferrersClass()) { + if (cls->NeedsAccessCheck()) { + codegen_->MoveConstant(locations->GetTemp(0), cls->GetTypeIndex()); + codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInitializeTypeAndVerifyAccess), + cls, + cls->GetDexPc(), + nullptr); + } else if (cls->IsReferrersClass()) { DCHECK(!cls->CanCallRuntime()); DCHECK(!cls->MustGenerateClinitCheck()); __ LoadFromOffset(kLoadUnsignedWord, out, current_method, @@ -3107,6 +3148,74 @@ void InstructionCodeGeneratorMIPS64::VisitStaticFieldSet(HStaticFieldSet* instru HandleFieldSet(instruction, instruction->GetFieldInfo()); } +void LocationsBuilderMIPS64::VisitUnresolvedInstanceFieldGet( + HUnresolvedInstanceFieldGet* instruction) { + FieldAccessCallingConventionMIPS64 calling_convention; + codegen_->CreateUnresolvedFieldLocationSummary( + instruction, instruction->GetFieldType(), calling_convention); +} + +void InstructionCodeGeneratorMIPS64::VisitUnresolvedInstanceFieldGet( + HUnresolvedInstanceFieldGet* instruction) { + FieldAccessCallingConventionMIPS64 calling_convention; + codegen_->GenerateUnresolvedFieldAccess(instruction, + instruction->GetFieldType(), + instruction->GetFieldIndex(), + instruction->GetDexPc(), + calling_convention); +} + +void LocationsBuilderMIPS64::VisitUnresolvedInstanceFieldSet( + HUnresolvedInstanceFieldSet* instruction) { + FieldAccessCallingConventionMIPS64 calling_convention; + codegen_->CreateUnresolvedFieldLocationSummary( + instruction, instruction->GetFieldType(), calling_convention); +} + +void InstructionCodeGeneratorMIPS64::VisitUnresolvedInstanceFieldSet( + HUnresolvedInstanceFieldSet* instruction) { + FieldAccessCallingConventionMIPS64 calling_convention; + codegen_->GenerateUnresolvedFieldAccess(instruction, + instruction->GetFieldType(), + instruction->GetFieldIndex(), + instruction->GetDexPc(), + calling_convention); +} + +void LocationsBuilderMIPS64::VisitUnresolvedStaticFieldGet( + HUnresolvedStaticFieldGet* instruction) { + FieldAccessCallingConventionMIPS64 calling_convention; + codegen_->CreateUnresolvedFieldLocationSummary( + instruction, instruction->GetFieldType(), calling_convention); +} + +void InstructionCodeGeneratorMIPS64::VisitUnresolvedStaticFieldGet( + HUnresolvedStaticFieldGet* instruction) { + FieldAccessCallingConventionMIPS64 calling_convention; + codegen_->GenerateUnresolvedFieldAccess(instruction, + instruction->GetFieldType(), + instruction->GetFieldIndex(), + instruction->GetDexPc(), + calling_convention); +} + +void LocationsBuilderMIPS64::VisitUnresolvedStaticFieldSet( + HUnresolvedStaticFieldSet* instruction) { + FieldAccessCallingConventionMIPS64 calling_convention; + codegen_->CreateUnresolvedFieldLocationSummary( + instruction, instruction->GetFieldType(), calling_convention); +} + +void InstructionCodeGeneratorMIPS64::VisitUnresolvedStaticFieldSet( + HUnresolvedStaticFieldSet* instruction) { + FieldAccessCallingConventionMIPS64 calling_convention; + codegen_->GenerateUnresolvedFieldAccess(instruction, + instruction->GetFieldType(), + instruction->GetFieldIndex(), + instruction->GetDexPc(), + calling_convention); +} + void LocationsBuilderMIPS64::VisitSuspendCheck(HSuspendCheck* instruction) { new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath); } diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index f66ecb3711..5e8f9e7f30 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -106,6 +106,31 @@ class InvokeRuntimeCallingConvention : public CallingConvention<GpuRegister, Fpu DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention); }; +class FieldAccessCallingConventionMIPS64 : public FieldAccessCallingConvention { + public: + FieldAccessCallingConventionMIPS64() {} + + Location GetObjectLocation() const OVERRIDE { + return Location::RegisterLocation(A1); + } + Location GetFieldIndexLocation() const OVERRIDE { + return Location::RegisterLocation(A0); + } + Location GetReturnLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE { + return Location::RegisterLocation(A0); + } + Location GetSetValueLocation( + Primitive::Type type ATTRIBUTE_UNUSED, bool is_instance) const OVERRIDE { + return is_instance ? Location::RegisterLocation(A2) : Location::RegisterLocation(A1); + } + Location GetFpuLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE { + return Location::FpuRegisterLocation(F0); + } + + private: + DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConventionMIPS64); +}; + class ParallelMoveResolverMIPS64 : public ParallelMoveResolverWithSwap { public: ParallelMoveResolverMIPS64(ArenaAllocator* allocator, CodeGeneratorMIPS64* codegen) @@ -270,21 +295,23 @@ class CodeGeneratorMIPS64 : public CodeGenerator { } Label* GetLabelOf(HBasicBlock* block) const { - return CommonGetLabelOf<Label>(block_labels_.GetRawStorage(), block); + return CommonGetLabelOf<Label>(block_labels_, block); } void Initialize() OVERRIDE { - block_labels_.SetSize(GetGraph()->GetBlocks().size()); + block_labels_ = CommonInitializeLabels<Label>(); } void Finalize(CodeAllocator* allocator) OVERRIDE; // Code generation helpers. - - void MoveLocation(Location destination, Location source, Primitive::Type type); + void MoveLocation(Location dst, Location src, Primitive::Type dst_type) OVERRIDE; void MoveConstant(Location destination, int32_t value) OVERRIDE; + void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE; + + void SwapLocations(Location loc1, Location loc2, Primitive::Type type); // Generate code to invoke a runtime entry point. @@ -315,7 +342,7 @@ class CodeGeneratorMIPS64 : public CodeGenerator { private: // Labels for each block that will be compiled. - GrowableArray<Label> block_labels_; + Label* block_labels_; // Indexed by block id. Label frame_entry_label_; LocationsBuilderMIPS64 location_builder_; InstructionCodeGeneratorMIPS64 instruction_visitor_; diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index ab3d1d1924..b89ca11ad0 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -380,6 +380,51 @@ class DeoptimizationSlowPathX86 : public SlowPathCode { DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86); }; +class ArraySetSlowPathX86 : public SlowPathCode { + public: + explicit ArraySetSlowPathX86(HInstruction* instruction) : instruction_(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConvention calling_convention; + HParallelMove parallel_move(codegen->GetGraph()->GetArena()); + parallel_move.AddMove( + locations->InAt(0), + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + Primitive::kPrimNot, + nullptr); + parallel_move.AddMove( + locations->InAt(1), + Location::RegisterLocation(calling_convention.GetRegisterAt(1)), + Primitive::kPrimInt, + nullptr); + parallel_move.AddMove( + locations->InAt(2), + Location::RegisterLocation(calling_convention.GetRegisterAt(2)), + Primitive::kPrimNot, + nullptr); + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + + CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); + x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject), + instruction_, + instruction_->GetDexPc(), + this); + RestoreLiveRegisters(codegen, locations); + __ jmp(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathX86"; } + + private: + HInstruction* const instruction_; + + DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86); +}; + #undef __ #define __ down_cast<X86Assembler*>(GetAssembler())-> @@ -470,13 +515,13 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, 0, compiler_options, stats), - block_labels_(graph->GetArena(), 0), + block_labels_(nullptr), location_builder_(graph, this), instruction_visitor_(graph, this), move_resolver_(graph->GetArena(), this), isa_features_(isa_features), - method_patches_(graph->GetArena()->Adapter()), - relative_call_patches_(graph->GetArena()->Adapter()) { + method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { // Use a fake return address register to mimic Quick. AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister)); } @@ -782,7 +827,10 @@ void CodeGeneratorX86::Move64(Location destination, Location source) { Location::RegisterLocation(destination.AsRegisterPairLow<Register>()), Primitive::kPrimInt); } else if (source.IsFpuRegister()) { - LOG(FATAL) << "Unimplemented"; + XmmRegister src_reg = source.AsFpuRegister<XmmRegister>(); + __ movd(destination.AsRegisterPairLow<Register>(), src_reg); + __ psrlq(src_reg, Immediate(32)); + __ movd(destination.AsRegisterPairHigh<Register>(), src_reg); } else { // No conflict possible, so just do the moves. DCHECK(source.IsDoubleStackSlot()); @@ -795,6 +843,15 @@ void CodeGeneratorX86::Move64(Location destination, Location source) { __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>()); } else if (source.IsDoubleStackSlot()) { __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex())); + } else if (source.IsRegisterPair()) { + size_t elem_size = Primitive::ComponentSize(Primitive::kPrimInt); + // Create stack space for 2 elements. + __ subl(ESP, Immediate(2 * elem_size)); + __ movl(Address(ESP, 0), source.AsRegisterPairLow<Register>()); + __ movl(Address(ESP, elem_size), source.AsRegisterPairHigh<Register>()); + __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, 0)); + // And remove the temporary stack space we allocated. + __ addl(ESP, Immediate(2 * elem_size)); } else { LOG(FATAL) << "Unimplemented"; } @@ -921,6 +978,25 @@ void CodeGeneratorX86::MoveConstant(Location location, int32_t value) { __ movl(location.AsRegister<Register>(), Immediate(value)); } +void CodeGeneratorX86::MoveLocation(Location dst, Location src, Primitive::Type dst_type) { + if (Primitive::Is64BitType(dst_type)) { + Move64(dst, src); + } else { + Move32(dst, src); + } +} + +void CodeGeneratorX86::AddLocationAsTemp(Location location, LocationSummary* locations) { + if (location.IsRegister()) { + locations->AddTemp(location); + } else if (location.IsRegisterPair()) { + locations->AddTemp(Location::RegisterLocation(location.AsRegisterPairLow<Register>())); + locations->AddTemp(Location::RegisterLocation(location.AsRegisterPairHigh<Register>())); + } else { + UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location; + } +} + void InstructionCodeGeneratorX86::HandleGoto(HInstruction* got, HBasicBlock* successor) { DCHECK(!successor->IsExitBlock()); @@ -4040,6 +4116,74 @@ void InstructionCodeGeneratorX86::VisitInstanceFieldGet(HInstanceFieldGet* instr HandleFieldGet(instruction, instruction->GetFieldInfo()); } +void LocationsBuilderX86::VisitUnresolvedInstanceFieldGet( + HUnresolvedInstanceFieldGet* instruction) { + FieldAccessCallingConventionX86 calling_convention; + codegen_->CreateUnresolvedFieldLocationSummary( + instruction, instruction->GetFieldType(), calling_convention); +} + +void InstructionCodeGeneratorX86::VisitUnresolvedInstanceFieldGet( + HUnresolvedInstanceFieldGet* instruction) { + FieldAccessCallingConventionX86 calling_convention; + codegen_->GenerateUnresolvedFieldAccess(instruction, + instruction->GetFieldType(), + instruction->GetFieldIndex(), + instruction->GetDexPc(), + calling_convention); +} + +void LocationsBuilderX86::VisitUnresolvedInstanceFieldSet( + HUnresolvedInstanceFieldSet* instruction) { + FieldAccessCallingConventionX86 calling_convention; + codegen_->CreateUnresolvedFieldLocationSummary( + instruction, instruction->GetFieldType(), calling_convention); +} + +void InstructionCodeGeneratorX86::VisitUnresolvedInstanceFieldSet( + HUnresolvedInstanceFieldSet* instruction) { + FieldAccessCallingConventionX86 calling_convention; + codegen_->GenerateUnresolvedFieldAccess(instruction, + instruction->GetFieldType(), + instruction->GetFieldIndex(), + instruction->GetDexPc(), + calling_convention); +} + +void LocationsBuilderX86::VisitUnresolvedStaticFieldGet( + HUnresolvedStaticFieldGet* instruction) { + FieldAccessCallingConventionX86 calling_convention; + codegen_->CreateUnresolvedFieldLocationSummary( + instruction, instruction->GetFieldType(), calling_convention); +} + +void InstructionCodeGeneratorX86::VisitUnresolvedStaticFieldGet( + HUnresolvedStaticFieldGet* instruction) { + FieldAccessCallingConventionX86 calling_convention; + codegen_->GenerateUnresolvedFieldAccess(instruction, + instruction->GetFieldType(), + instruction->GetFieldIndex(), + instruction->GetDexPc(), + calling_convention); +} + +void LocationsBuilderX86::VisitUnresolvedStaticFieldSet( + HUnresolvedStaticFieldSet* instruction) { + FieldAccessCallingConventionX86 calling_convention; + codegen_->CreateUnresolvedFieldLocationSummary( + instruction, instruction->GetFieldType(), calling_convention); +} + +void InstructionCodeGeneratorX86::VisitUnresolvedStaticFieldSet( + HUnresolvedStaticFieldSet* instruction) { + FieldAccessCallingConventionX86 calling_convention; + codegen_->GenerateUnresolvedFieldAccess(instruction, + instruction->GetFieldType(), + instruction->GetFieldIndex(), + instruction->GetDexPc(), + calling_convention); +} + void LocationsBuilderX86::VisitNullCheck(HNullCheck* instruction) { LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock() ? LocationSummary::kCallOnSlowPath @@ -4245,72 +4389,59 @@ void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) { bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); - bool needs_runtime_call = instruction->NeedsTypeCheck(); + bool may_need_runtime_call = instruction->NeedsTypeCheck(); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( instruction, - needs_runtime_call ? LocationSummary::kCall : LocationSummary::kNoCall); + may_need_runtime_call ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); - if (needs_runtime_call) { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); - locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); + bool is_byte_type = (value_type == Primitive::kPrimBoolean) + || (value_type == Primitive::kPrimByte); + // We need the inputs to be different than the output in case of long operation. + // In case of a byte operation, the register allocator does not support multiple + // inputs that die at entry with one in a specific register. + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + if (is_byte_type) { + // Ensure the value is in a byte register. + locations->SetInAt(2, Location::ByteRegisterOrConstant(EAX, instruction->InputAt(2))); + } else if (Primitive::IsFloatingPointType(value_type)) { + locations->SetInAt(2, Location::RequiresFpuRegister()); } else { - bool is_byte_type = (value_type == Primitive::kPrimBoolean) - || (value_type == Primitive::kPrimByte); - // We need the inputs to be different than the output in case of long operation. - // In case of a byte operation, the register allocator does not support multiple - // inputs that die at entry with one in a specific register. - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); - if (is_byte_type) { - // Ensure the value is in a byte register. - locations->SetInAt(2, Location::ByteRegisterOrConstant(EAX, instruction->InputAt(2))); - } else if (Primitive::IsFloatingPointType(value_type)) { - locations->SetInAt(2, Location::RequiresFpuRegister()); - } else { - locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2))); - } - if (needs_write_barrier) { - // Temporary registers for the write barrier. - locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too. - // Ensure the card is in a byte register. - locations->AddTemp(Location::RegisterLocation(ECX)); - } + locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2))); + } + if (needs_write_barrier) { + // Temporary registers for the write barrier. + locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too. + // Ensure the card is in a byte register. + locations->AddTemp(Location::RegisterLocation(ECX)); } } void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).AsRegister<Register>(); + Register array = locations->InAt(0).AsRegister<Register>(); Location index = locations->InAt(1); Location value = locations->InAt(2); Primitive::Type value_type = instruction->GetComponentType(); - bool needs_runtime_call = locations->WillCall(); + uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); + uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); + uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); + bool may_need_runtime_call = locations->CanCall(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); switch (value_type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value(); - if (index.IsConstant()) { - size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset; - if (value.IsRegister()) { - __ movb(Address(obj, offset), value.AsRegister<ByteRegister>()); - } else { - __ movb(Address(obj, offset), - Immediate(value.GetConstant()->AsIntConstant()->GetValue())); - } + uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value(); + Address address = index.IsConstant() + ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + offset) + : Address(array, index.AsRegister<Register>(), TIMES_1, offset); + if (value.IsRegister()) { + __ movb(address, value.AsRegister<ByteRegister>()); } else { - if (value.IsRegister()) { - __ movb(Address(obj, index.AsRegister<Register>(), TIMES_1, data_offset), - value.AsRegister<ByteRegister>()); - } else { - __ movb(Address(obj, index.AsRegister<Register>(), TIMES_1, data_offset), - Immediate(value.GetConstant()->AsIntConstant()->GetValue())); - } + __ movb(address, Immediate(value.GetConstant()->AsIntConstant()->GetValue())); } codegen_->MaybeRecordImplicitNullCheck(instruction); break; @@ -4318,93 +4449,106 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { case Primitive::kPrimShort: case Primitive::kPrimChar: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value(); - if (index.IsConstant()) { - size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset; - if (value.IsRegister()) { - __ movw(Address(obj, offset), value.AsRegister<Register>()); - } else { - __ movw(Address(obj, offset), - Immediate(value.GetConstant()->AsIntConstant()->GetValue())); - } + uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value(); + Address address = index.IsConstant() + ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + offset) + : Address(array, index.AsRegister<Register>(), TIMES_2, offset); + if (value.IsRegister()) { + __ movw(address, value.AsRegister<Register>()); } else { - if (value.IsRegister()) { - __ movw(Address(obj, index.AsRegister<Register>(), TIMES_2, data_offset), - value.AsRegister<Register>()); - } else { - __ movw(Address(obj, index.AsRegister<Register>(), TIMES_2, data_offset), - Immediate(value.GetConstant()->AsIntConstant()->GetValue())); - } + __ movw(address, Immediate(value.GetConstant()->AsIntConstant()->GetValue())); } codegen_->MaybeRecordImplicitNullCheck(instruction); break; } - case Primitive::kPrimInt: case Primitive::kPrimNot: { - if (!needs_runtime_call) { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); - if (index.IsConstant()) { - size_t offset = - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; - if (value.IsRegister()) { - if (kPoisonHeapReferences && value_type == Primitive::kPrimNot) { - Register temp = locations->GetTemp(0).AsRegister<Register>(); - __ movl(temp, value.AsRegister<Register>()); - __ PoisonHeapReference(temp); - __ movl(Address(obj, offset), temp); - } else { - __ movl(Address(obj, offset), value.AsRegister<Register>()); - } - } else { - DCHECK(value.IsConstant()) << value; - int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); - // `value_type == Primitive::kPrimNot` implies `v == 0`. - DCHECK((value_type != Primitive::kPrimNot) || (v == 0)); - // Note: if heap poisoning is enabled, no need to poison - // (negate) `v` if it is a reference, as it would be null. - __ movl(Address(obj, offset), Immediate(v)); - } + uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); + Address address = index.IsConstant() + ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset) + : Address(array, index.AsRegister<Register>(), TIMES_4, offset); + if (!value.IsRegister()) { + // Just setting null. + DCHECK(instruction->InputAt(2)->IsNullConstant()); + DCHECK(value.IsConstant()) << value; + __ movl(address, Immediate(0)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + DCHECK(!needs_write_barrier); + DCHECK(!may_need_runtime_call); + break; + } + + DCHECK(needs_write_barrier); + Register register_value = value.AsRegister<Register>(); + NearLabel done, not_null, do_put; + SlowPathCode* slow_path = nullptr; + Register temp = locations->GetTemp(0).AsRegister<Register>(); + if (may_need_runtime_call) { + slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathX86(instruction); + codegen_->AddSlowPath(slow_path); + if (instruction->GetValueCanBeNull()) { + __ testl(register_value, register_value); + __ j(kNotEqual, ¬_null); + __ movl(address, Immediate(0)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ jmp(&done); + __ Bind(¬_null); + } + + __ movl(temp, Address(array, class_offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ MaybeUnpoisonHeapReference(temp); + __ movl(temp, Address(temp, component_offset)); + // No need to poison/unpoison, we're comparing two poisoned references. + __ cmpl(temp, Address(register_value, class_offset)); + if (instruction->StaticTypeOfArrayIsObjectArray()) { + __ j(kEqual, &do_put); + __ MaybeUnpoisonHeapReference(temp); + __ movl(temp, Address(temp, super_offset)); + // No need to unpoison, we're comparing against null.. + __ testl(temp, temp); + __ j(kNotEqual, slow_path->GetEntryLabel()); + __ Bind(&do_put); } else { - DCHECK(index.IsRegister()) << index; - if (value.IsRegister()) { - if (kPoisonHeapReferences && value_type == Primitive::kPrimNot) { - Register temp = locations->GetTemp(0).AsRegister<Register>(); - __ movl(temp, value.AsRegister<Register>()); - __ PoisonHeapReference(temp); - __ movl(Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset), temp); - } else { - __ movl(Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset), - value.AsRegister<Register>()); - } - } else { - DCHECK(value.IsConstant()) << value; - int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); - // `value_type == Primitive::kPrimNot` implies `v == 0`. - DCHECK((value_type != Primitive::kPrimNot) || (v == 0)); - // Note: if heap poisoning is enabled, no need to poison - // (negate) `v` if it is a reference, as it would be null. - __ movl(Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset), Immediate(v)); - } + __ j(kNotEqual, slow_path->GetEntryLabel()); } + } + + if (kPoisonHeapReferences) { + __ movl(temp, register_value); + __ PoisonHeapReference(temp); + __ movl(address, temp); + } else { + __ movl(address, register_value); + } + if (!may_need_runtime_call) { codegen_->MaybeRecordImplicitNullCheck(instruction); + } - if (needs_write_barrier) { - Register temp = locations->GetTemp(0).AsRegister<Register>(); - Register card = locations->GetTemp(1).AsRegister<Register>(); - codegen_->MarkGCCard( - temp, card, obj, value.AsRegister<Register>(), instruction->GetValueCanBeNull()); - } + Register card = locations->GetTemp(1).AsRegister<Register>(); + codegen_->MarkGCCard( + temp, card, array, value.AsRegister<Register>(), instruction->GetValueCanBeNull()); + __ Bind(&done); + + if (slow_path != nullptr) { + __ Bind(slow_path->GetExitLabel()); + } + + break; + } + case Primitive::kPrimInt: { + uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); + Address address = index.IsConstant() + ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset) + : Address(array, index.AsRegister<Register>(), TIMES_4, offset); + if (value.IsRegister()) { + __ movl(address, value.AsRegister<Register>()); } else { - DCHECK_EQ(value_type, Primitive::kPrimNot); - DCHECK(!codegen_->IsLeafMethod()); - // Note: if heap poisoning is enabled, pAputObject takes cares - // of poisoning the reference. - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject), - instruction, - instruction->GetDexPc(), - nullptr); + DCHECK(value.IsConstant()) << value; + int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); + __ movl(address, Immediate(v)); } + codegen_->MaybeRecordImplicitNullCheck(instruction); break; } @@ -4413,30 +4557,30 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; if (value.IsRegisterPair()) { - __ movl(Address(obj, offset), value.AsRegisterPairLow<Register>()); + __ movl(Address(array, offset), value.AsRegisterPairLow<Register>()); codegen_->MaybeRecordImplicitNullCheck(instruction); - __ movl(Address(obj, offset + kX86WordSize), value.AsRegisterPairHigh<Register>()); + __ movl(Address(array, offset + kX86WordSize), value.AsRegisterPairHigh<Register>()); } else { DCHECK(value.IsConstant()); int64_t val = value.GetConstant()->AsLongConstant()->GetValue(); - __ movl(Address(obj, offset), Immediate(Low32Bits(val))); + __ movl(Address(array, offset), Immediate(Low32Bits(val))); codegen_->MaybeRecordImplicitNullCheck(instruction); - __ movl(Address(obj, offset + kX86WordSize), Immediate(High32Bits(val))); + __ movl(Address(array, offset + kX86WordSize), Immediate(High32Bits(val))); } } else { if (value.IsRegisterPair()) { - __ movl(Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset), + __ movl(Address(array, index.AsRegister<Register>(), TIMES_8, data_offset), value.AsRegisterPairLow<Register>()); codegen_->MaybeRecordImplicitNullCheck(instruction); - __ movl(Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset + kX86WordSize), + __ movl(Address(array, index.AsRegister<Register>(), TIMES_8, data_offset + kX86WordSize), value.AsRegisterPairHigh<Register>()); } else { DCHECK(value.IsConstant()); int64_t val = value.GetConstant()->AsLongConstant()->GetValue(); - __ movl(Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset), + __ movl(Address(array, index.AsRegister<Register>(), TIMES_8, data_offset), Immediate(Low32Bits(val))); codegen_->MaybeRecordImplicitNullCheck(instruction); - __ movl(Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset + kX86WordSize), + __ movl(Address(array, index.AsRegister<Register>(), TIMES_8, data_offset + kX86WordSize), Immediate(High32Bits(val))); } } @@ -4444,28 +4588,22 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { } case Primitive::kPrimFloat: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value(); + uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value(); + Address address = index.IsConstant() + ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset) + : Address(array, index.AsRegister<Register>(), TIMES_4, offset); DCHECK(value.IsFpuRegister()); - if (index.IsConstant()) { - size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; - __ movss(Address(obj, offset), value.AsFpuRegister<XmmRegister>()); - } else { - __ movss(Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset), - value.AsFpuRegister<XmmRegister>()); - } + __ movss(address, value.AsFpuRegister<XmmRegister>()); break; } case Primitive::kPrimDouble: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value(); + uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value(); + Address address = index.IsConstant() + ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + offset) + : Address(array, index.AsRegister<Register>(), TIMES_8, offset); DCHECK(value.IsFpuRegister()); - if (index.IsConstant()) { - size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; - __ movsd(Address(obj, offset), value.AsFpuRegister<XmmRegister>()); - } else { - __ movsd(Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset), - value.AsFpuRegister<XmmRegister>()); - } + __ movsd(address, value.AsFpuRegister<XmmRegister>()); break; } @@ -4630,7 +4768,8 @@ void ParallelMoveResolverX86::MoveMemoryToMemory64(int dst, int src) { } void ParallelMoveResolverX86::EmitMove(size_t index) { - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; Location source = move->GetSource(); Location destination = move->GetDestination(); @@ -4782,7 +4921,8 @@ void ParallelMoveResolverX86::Exchange(int mem1, int mem2) { } void ParallelMoveResolverX86::EmitSwap(size_t index) { - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; Location source = move->GetSource(); Location destination = move->GetDestination(); @@ -4849,20 +4989,24 @@ void ParallelMoveResolverX86::RestoreScratch(int reg) { } void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) { - LocationSummary::CallKind call_kind = cls->CanCallRuntime() - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall; - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(cls, call_kind); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister()); + InvokeRuntimeCallingConvention calling_convention; + CodeGenerator::CreateLoadClassLocationSummary( + cls, + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + Location::RegisterLocation(EAX)); } void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) { LocationSummary* locations = cls->GetLocations(); Register out = locations->Out().AsRegister<Register>(); Register current_method = locations->InAt(0).AsRegister<Register>(); - if (cls->IsReferrersClass()) { + if (cls->NeedsAccessCheck()) { + codegen_->MoveConstant(locations->GetTemp(0), cls->GetTypeIndex()); + codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInitializeTypeAndVerifyAccess), + cls, + cls->GetDexPc(), + nullptr); + } else if (cls->IsReferrersClass()) { DCHECK(!cls->CanCallRuntime()); DCHECK(!cls->MustGenerateClinitCheck()); __ movl(out, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value())); @@ -4981,6 +5125,7 @@ void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kArrayObjectCheck: call_kind = LocationSummary::kNoCall; break; + case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCall; break; @@ -5021,10 +5166,11 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { __ j(kEqual, &zero); } - // In case of an interface check, we put the object class into the object register. + // In case of an interface/unresolved check, we put the object class into the object register. // This is safe, as the register is caller-save, and the object must be in another // register if it survives the runtime call. - Register target = (instruction->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) + Register target = (instruction->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) || + (instruction->GetTypeCheckKind() == TypeCheckKind::kUnresolvedCheck) ? obj : out; __ movl(target, Address(obj, class_offset)); @@ -5133,7 +5279,7 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { } break; } - + case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: default: { codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial), @@ -5175,6 +5321,7 @@ void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) { : LocationSummary::kNoCall; break; case TypeCheckKind::kInterfaceCheck: + case TypeCheckKind::kUnresolvedCheck: call_kind = LocationSummary::kCall; break; case TypeCheckKind::kArrayCheck: @@ -5301,6 +5448,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { __ j(kNotEqual, slow_path->GetEntryLabel()); break; } + case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: default: codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), @@ -5621,7 +5769,7 @@ void CodeGeneratorX86::Finalize(CodeAllocator* allocator) { /** * Class to handle late fixup of offsets into constant area. */ -class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocMisc> { +class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> { public: RIPFixup(const CodeGeneratorX86& codegen, int offset) : codegen_(codegen), offset_into_constant_area_(offset) {} diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index f38e1ea09c..ae2d84f945 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -91,6 +91,36 @@ class InvokeDexCallingConventionVisitorX86 : public InvokeDexCallingConventionVi DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorX86); }; +class FieldAccessCallingConventionX86 : public FieldAccessCallingConvention { + public: + FieldAccessCallingConventionX86() {} + + Location GetObjectLocation() const OVERRIDE { + return Location::RegisterLocation(ECX); + } + Location GetFieldIndexLocation() const OVERRIDE { + return Location::RegisterLocation(EAX); + } + Location GetReturnLocation(Primitive::Type type) const OVERRIDE { + return Primitive::Is64BitType(type) + ? Location::RegisterPairLocation(EAX, EDX) + : Location::RegisterLocation(EAX); + } + Location GetSetValueLocation(Primitive::Type type, bool is_instance) const OVERRIDE { + return Primitive::Is64BitType(type) + ? Location::RegisterPairLocation(EDX, EBX) + : (is_instance + ? Location::RegisterLocation(EDX) + : Location::RegisterLocation(ECX)); + } + Location GetFpuLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE { + return Location::FpuRegisterLocation(XMM0); + } + + private: + DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConventionX86); +}; + class ParallelMoveResolverX86 : public ParallelMoveResolverWithSwap { public: ParallelMoveResolverX86(ArenaAllocator* allocator, CodeGeneratorX86* codegen) @@ -228,6 +258,9 @@ class CodeGeneratorX86 : public CodeGenerator { void Bind(HBasicBlock* block) OVERRIDE; void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE; void MoveConstant(Location destination, int32_t value) OVERRIDE; + void MoveLocation(Location dst, Location src, Primitive::Type dst_type) OVERRIDE; + void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE; + size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; @@ -316,11 +349,11 @@ class CodeGeneratorX86 : public CodeGenerator { bool value_can_be_null); Label* GetLabelOf(HBasicBlock* block) const { - return CommonGetLabelOf<Label>(block_labels_.GetRawStorage(), block); + return CommonGetLabelOf<Label>(block_labels_, block); } void Initialize() OVERRIDE { - block_labels_.SetSize(GetGraph()->GetBlocks().size()); + block_labels_ = CommonInitializeLabels<Label>(); } bool NeedsTwoRegisters(Primitive::Type type) const OVERRIDE { @@ -356,7 +389,7 @@ class CodeGeneratorX86 : public CodeGenerator { private: // Labels for each block that will be compiled. - GrowableArray<Label> block_labels_; + Label* block_labels_; // Indexed by block id. Label frame_entry_label_; LocationsBuilderX86 location_builder_; InstructionCodeGeneratorX86 instruction_visitor_; diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index cfce7a0faa..ad6588c359 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -396,6 +396,51 @@ class DeoptimizationSlowPathX86_64 : public SlowPathCode { DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64); }; +class ArraySetSlowPathX86_64 : public SlowPathCode { + public: + explicit ArraySetSlowPathX86_64(HInstruction* instruction) : instruction_(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConvention calling_convention; + HParallelMove parallel_move(codegen->GetGraph()->GetArena()); + parallel_move.AddMove( + locations->InAt(0), + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + Primitive::kPrimNot, + nullptr); + parallel_move.AddMove( + locations->InAt(1), + Location::RegisterLocation(calling_convention.GetRegisterAt(1)), + Primitive::kPrimInt, + nullptr); + parallel_move.AddMove( + locations->InAt(2), + Location::RegisterLocation(calling_convention.GetRegisterAt(2)), + Primitive::kPrimNot, + nullptr); + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + + CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); + x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject), + instruction_, + instruction_->GetDexPc(), + this); + RestoreLiveRegisters(codegen, locations); + __ jmp(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathX86_64"; } + + private: + HInstruction* const instruction_; + + DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64); +}; + #undef __ #define __ down_cast<X86_64Assembler*>(GetAssembler())-> @@ -620,15 +665,15 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, arraysize(kFpuCalleeSaves)), compiler_options, stats), - block_labels_(graph->GetArena(), 0), + block_labels_(nullptr), location_builder_(graph, this), instruction_visitor_(graph, this), move_resolver_(graph->GetArena(), this), isa_features_(isa_features), constant_area_start_(0), - method_patches_(graph->GetArena()->Adapter()), - relative_call_patches_(graph->GetArena()->Adapter()), - pc_rel_dex_cache_patches_(graph->GetArena()->Adapter()) { + method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_rel_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister)); } @@ -945,6 +990,19 @@ void CodeGeneratorX86_64::MoveConstant(Location location, int32_t value) { Load64BitValue(location.AsRegister<CpuRegister>(), static_cast<int64_t>(value)); } +void CodeGeneratorX86_64::MoveLocation( + Location dst, Location src, Primitive::Type dst_type ATTRIBUTE_UNUSED) { + Move(dst, src); +} + +void CodeGeneratorX86_64::AddLocationAsTemp(Location location, LocationSummary* locations) { + if (location.IsRegister()) { + locations->AddTemp(location); + } else { + UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location; + } +} + void InstructionCodeGeneratorX86_64::HandleGoto(HInstruction* got, HBasicBlock* successor) { DCHECK(!successor->IsExitBlock()); @@ -3804,6 +3862,74 @@ void InstructionCodeGeneratorX86_64::VisitStaticFieldSet(HStaticFieldSet* instru HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); } +void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldGet( + HUnresolvedInstanceFieldGet* instruction) { + FieldAccessCallingConventionX86_64 calling_convention; + codegen_->CreateUnresolvedFieldLocationSummary( + instruction, instruction->GetFieldType(), calling_convention); +} + +void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldGet( + HUnresolvedInstanceFieldGet* instruction) { + FieldAccessCallingConventionX86_64 calling_convention; + codegen_->GenerateUnresolvedFieldAccess(instruction, + instruction->GetFieldType(), + instruction->GetFieldIndex(), + instruction->GetDexPc(), + calling_convention); +} + +void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldSet( + HUnresolvedInstanceFieldSet* instruction) { + FieldAccessCallingConventionX86_64 calling_convention; + codegen_->CreateUnresolvedFieldLocationSummary( + instruction, instruction->GetFieldType(), calling_convention); +} + +void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldSet( + HUnresolvedInstanceFieldSet* instruction) { + FieldAccessCallingConventionX86_64 calling_convention; + codegen_->GenerateUnresolvedFieldAccess(instruction, + instruction->GetFieldType(), + instruction->GetFieldIndex(), + instruction->GetDexPc(), + calling_convention); +} + +void LocationsBuilderX86_64::VisitUnresolvedStaticFieldGet( + HUnresolvedStaticFieldGet* instruction) { + FieldAccessCallingConventionX86_64 calling_convention; + codegen_->CreateUnresolvedFieldLocationSummary( + instruction, instruction->GetFieldType(), calling_convention); +} + +void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldGet( + HUnresolvedStaticFieldGet* instruction) { + FieldAccessCallingConventionX86_64 calling_convention; + codegen_->GenerateUnresolvedFieldAccess(instruction, + instruction->GetFieldType(), + instruction->GetFieldIndex(), + instruction->GetDexPc(), + calling_convention); +} + +void LocationsBuilderX86_64::VisitUnresolvedStaticFieldSet( + HUnresolvedStaticFieldSet* instruction) { + FieldAccessCallingConventionX86_64 calling_convention; + codegen_->CreateUnresolvedFieldLocationSummary( + instruction, instruction->GetFieldType(), calling_convention); +} + +void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldSet( + HUnresolvedStaticFieldSet* instruction) { + FieldAccessCallingConventionX86_64 calling_convention; + codegen_->GenerateUnresolvedFieldAccess(instruction, + instruction->GetFieldType(), + instruction->GetFieldIndex(), + instruction->GetDexPc(), + calling_convention); +} + void LocationsBuilderX86_64::VisitNullCheck(HNullCheck* instruction) { LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock() ? LocationSummary::kCallOnSlowPath @@ -3992,66 +4118,55 @@ void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) { bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); - bool needs_runtime_call = instruction->NeedsTypeCheck(); + bool may_need_runtime_call = instruction->NeedsTypeCheck(); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( - instruction, needs_runtime_call ? LocationSummary::kCall : LocationSummary::kNoCall); - if (needs_runtime_call) { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); - locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); + instruction, + may_need_runtime_call ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); + + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt( + 1, Location::RegisterOrConstant(instruction->InputAt(1))); + locations->SetInAt(2, Location::RequiresRegister()); + if (value_type == Primitive::kPrimLong) { + locations->SetInAt(2, Location::RegisterOrInt32LongConstant(instruction->InputAt(2))); + } else if (value_type == Primitive::kPrimFloat || value_type == Primitive::kPrimDouble) { + locations->SetInAt(2, Location::RequiresFpuRegister()); } else { - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt( - 1, Location::RegisterOrConstant(instruction->InputAt(1))); - locations->SetInAt(2, Location::RequiresRegister()); - if (value_type == Primitive::kPrimLong) { - locations->SetInAt(2, Location::RegisterOrInt32LongConstant(instruction->InputAt(2))); - } else if (value_type == Primitive::kPrimFloat || value_type == Primitive::kPrimDouble) { - locations->SetInAt(2, Location::RequiresFpuRegister()); - } else { - locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2))); - } + locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2))); + } - if (needs_write_barrier) { - // Temporary registers for the write barrier. - locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too. - locations->AddTemp(Location::RequiresRegister()); - } + if (needs_write_barrier) { + // Temporary registers for the write barrier. + locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too. + locations->AddTemp(Location::RequiresRegister()); } } void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { LocationSummary* locations = instruction->GetLocations(); - CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); + CpuRegister array = locations->InAt(0).AsRegister<CpuRegister>(); Location index = locations->InAt(1); Location value = locations->InAt(2); Primitive::Type value_type = instruction->GetComponentType(); - bool needs_runtime_call = locations->WillCall(); + bool may_need_runtime_call = locations->CanCall(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); + uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); + uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); + uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); switch (value_type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value(); - if (index.IsConstant()) { - size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset; - if (value.IsRegister()) { - __ movb(Address(obj, offset), value.AsRegister<CpuRegister>()); - } else { - __ movb(Address(obj, offset), - Immediate(value.GetConstant()->AsIntConstant()->GetValue())); - } + uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value(); + Address address = index.IsConstant() + ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + offset) + : Address(array, index.AsRegister<CpuRegister>(), TIMES_1, offset); + if (value.IsRegister()) { + __ movb(address, value.AsRegister<CpuRegister>()); } else { - if (value.IsRegister()) { - __ movb(Address(obj, index.AsRegister<CpuRegister>(), TIMES_1, data_offset), - value.AsRegister<CpuRegister>()); - } else { - __ movb(Address(obj, index.AsRegister<CpuRegister>(), TIMES_1, data_offset), - Immediate(value.GetConstant()->AsIntConstant()->GetValue())); - } + __ movb(address, Immediate(value.GetConstant()->AsIntConstant()->GetValue())); } codegen_->MaybeRecordImplicitNullCheck(instruction); break; @@ -4059,154 +4174,145 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { case Primitive::kPrimShort: case Primitive::kPrimChar: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value(); - if (index.IsConstant()) { - size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset; - if (value.IsRegister()) { - __ movw(Address(obj, offset), value.AsRegister<CpuRegister>()); - } else { - DCHECK(value.IsConstant()) << value; - __ movw(Address(obj, offset), - Immediate(value.GetConstant()->AsIntConstant()->GetValue())); - } + uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value(); + Address address = index.IsConstant() + ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + offset) + : Address(array, index.AsRegister<CpuRegister>(), TIMES_2, offset); + if (value.IsRegister()) { + __ movw(address, value.AsRegister<CpuRegister>()); } else { - DCHECK(index.IsRegister()) << index; - if (value.IsRegister()) { - __ movw(Address(obj, index.AsRegister<CpuRegister>(), TIMES_2, data_offset), - value.AsRegister<CpuRegister>()); - } else { - DCHECK(value.IsConstant()) << value; - __ movw(Address(obj, index.AsRegister<CpuRegister>(), TIMES_2, data_offset), - Immediate(value.GetConstant()->AsIntConstant()->GetValue())); - } + DCHECK(value.IsConstant()) << value; + __ movw(address, Immediate(value.GetConstant()->AsIntConstant()->GetValue())); } codegen_->MaybeRecordImplicitNullCheck(instruction); break; } - case Primitive::kPrimInt: case Primitive::kPrimNot: { - if (!needs_runtime_call) { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); - if (index.IsConstant()) { - size_t offset = - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; - if (value.IsRegister()) { - if (kPoisonHeapReferences && value_type == Primitive::kPrimNot) { - CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); - __ movl(temp, value.AsRegister<CpuRegister>()); - __ PoisonHeapReference(temp); - __ movl(Address(obj, offset), temp); - } else { - __ movl(Address(obj, offset), value.AsRegister<CpuRegister>()); - } - } else { - DCHECK(value.IsConstant()) << value; - int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); - // `value_type == Primitive::kPrimNot` implies `v == 0`. - DCHECK((value_type != Primitive::kPrimNot) || (v == 0)); - // Note: if heap poisoning is enabled, no need to poison - // (negate) `v` if it is a reference, as it would be null. - __ movl(Address(obj, offset), Immediate(v)); - } - } else { - DCHECK(index.IsRegister()) << index; - if (value.IsRegister()) { - if (kPoisonHeapReferences && value_type == Primitive::kPrimNot) { - CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); - __ movl(temp, value.AsRegister<CpuRegister>()); - __ PoisonHeapReference(temp); - __ movl(Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset), temp); - } else { - __ movl(Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset), - value.AsRegister<CpuRegister>()); - } - } else { - DCHECK(value.IsConstant()) << value; - int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); - // `value_type == Primitive::kPrimNot` implies `v == 0`. - DCHECK((value_type != Primitive::kPrimNot) || (v == 0)); - // Note: if heap poisoning is enabled, no need to poison - // (negate) `v` if it is a reference, as it would be null. - __ movl(Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset), - Immediate(v)); - } + uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); + Address address = index.IsConstant() + ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset) + : Address(array, index.AsRegister<CpuRegister>(), TIMES_4, offset); + if (!value.IsRegister()) { + // Just setting null. + DCHECK(instruction->InputAt(2)->IsNullConstant()); + DCHECK(value.IsConstant()) << value; + __ movl(address, Immediate(0)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + DCHECK(!needs_write_barrier); + DCHECK(!may_need_runtime_call); + break; + } + + DCHECK(needs_write_barrier); + CpuRegister register_value = value.AsRegister<CpuRegister>(); + NearLabel done, not_null, do_put; + SlowPathCode* slow_path = nullptr; + CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); + if (may_need_runtime_call) { + slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathX86_64(instruction); + codegen_->AddSlowPath(slow_path); + if (instruction->GetValueCanBeNull()) { + __ testl(register_value, register_value); + __ j(kNotEqual, ¬_null); + __ movl(address, Immediate(0)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ jmp(&done); + __ Bind(¬_null); } + + __ movl(temp, Address(array, class_offset)); codegen_->MaybeRecordImplicitNullCheck(instruction); - if (needs_write_barrier) { - DCHECK_EQ(value_type, Primitive::kPrimNot); - CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); - CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>(); - codegen_->MarkGCCard( - temp, card, obj, value.AsRegister<CpuRegister>(), instruction->GetValueCanBeNull()); + __ MaybeUnpoisonHeapReference(temp); + __ movl(temp, Address(temp, component_offset)); + // No need to poison/unpoison, we're comparing two poisoned references. + __ cmpl(temp, Address(register_value, class_offset)); + if (instruction->StaticTypeOfArrayIsObjectArray()) { + __ j(kEqual, &do_put); + __ MaybeUnpoisonHeapReference(temp); + __ movl(temp, Address(temp, super_offset)); + // No need to unpoison the result, we're comparing against null. + __ testl(temp, temp); + __ j(kNotEqual, slow_path->GetEntryLabel()); + __ Bind(&do_put); + } else { + __ j(kNotEqual, slow_path->GetEntryLabel()); } + } + + if (kPoisonHeapReferences) { + __ movl(temp, register_value); + __ PoisonHeapReference(temp); + __ movl(address, temp); + } else { + __ movl(address, register_value); + } + if (!may_need_runtime_call) { + codegen_->MaybeRecordImplicitNullCheck(instruction); + } + + CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>(); + codegen_->MarkGCCard( + temp, card, array, value.AsRegister<CpuRegister>(), instruction->GetValueCanBeNull()); + __ Bind(&done); + + if (slow_path != nullptr) { + __ Bind(slow_path->GetExitLabel()); + } + + break; + } + case Primitive::kPrimInt: { + uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); + Address address = index.IsConstant() + ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset) + : Address(array, index.AsRegister<CpuRegister>(), TIMES_4, offset); + if (value.IsRegister()) { + __ movl(address, value.AsRegister<CpuRegister>()); } else { - DCHECK_EQ(value_type, Primitive::kPrimNot); - // Note: if heap poisoning is enabled, pAputObject takes cares - // of poisoning the reference. - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject), - instruction, - instruction->GetDexPc(), - nullptr); - DCHECK(!codegen_->IsLeafMethod()); + DCHECK(value.IsConstant()) << value; + int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); + __ movl(address, Immediate(v)); } + codegen_->MaybeRecordImplicitNullCheck(instruction); break; } case Primitive::kPrimLong: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value(); - if (index.IsConstant()) { - size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; - if (value.IsRegister()) { - __ movq(Address(obj, offset), value.AsRegister<CpuRegister>()); - } else { - int64_t v = value.GetConstant()->AsLongConstant()->GetValue(); - DCHECK(IsInt<32>(v)); - int32_t v_32 = v; - __ movq(Address(obj, offset), Immediate(v_32)); - } + uint32_t offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value(); + Address address = index.IsConstant() + ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + offset) + : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset); + if (value.IsRegister()) { + __ movq(address, value.AsRegister<CpuRegister>()); } else { - if (value.IsRegister()) { - __ movq(Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset), - value.AsRegister<CpuRegister>()); - } else { - int64_t v = value.GetConstant()->AsLongConstant()->GetValue(); - DCHECK(IsInt<32>(v)); - int32_t v_32 = v; - __ movq(Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset), - Immediate(v_32)); - } + int64_t v = value.GetConstant()->AsLongConstant()->GetValue(); + DCHECK(IsInt<32>(v)); + int32_t v_32 = v; + __ movq(address, Immediate(v_32)); } codegen_->MaybeRecordImplicitNullCheck(instruction); break; } case Primitive::kPrimFloat: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value(); - if (index.IsConstant()) { - size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; - DCHECK(value.IsFpuRegister()); - __ movss(Address(obj, offset), value.AsFpuRegister<XmmRegister>()); - } else { - DCHECK(value.IsFpuRegister()); - __ movss(Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset), - value.AsFpuRegister<XmmRegister>()); - } + uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value(); + Address address = index.IsConstant() + ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset) + : Address(array, index.AsRegister<CpuRegister>(), TIMES_4, offset); + DCHECK(value.IsFpuRegister()); + __ movss(address, value.AsFpuRegister<XmmRegister>()); codegen_->MaybeRecordImplicitNullCheck(instruction); break; } case Primitive::kPrimDouble: { - uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value(); - if (index.IsConstant()) { - size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; - DCHECK(value.IsFpuRegister()); - __ movsd(Address(obj, offset), value.AsFpuRegister<XmmRegister>()); - } else { - DCHECK(value.IsFpuRegister()); - __ movsd(Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset), - value.AsFpuRegister<XmmRegister>()); - } + uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value(); + Address address = index.IsConstant() + ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + offset) + : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset); + DCHECK(value.IsFpuRegister()); + __ movsd(address, value.AsFpuRegister<XmmRegister>()); codegen_->MaybeRecordImplicitNullCheck(instruction); break; } @@ -4250,7 +4356,7 @@ void InstructionCodeGeneratorX86_64::VisitBoundsCheck(HBoundsCheck* instruction) Location index_loc = locations->InAt(0); Location length_loc = locations->InAt(1); SlowPathCode* slow_path = - new (GetGraph()->GetArena()) BoundsCheckSlowPathX86_64(instruction); + new (GetGraph()->GetArena()) BoundsCheckSlowPathX86_64(instruction); if (length_loc.IsConstant()) { int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant()); @@ -4373,7 +4479,8 @@ X86_64Assembler* ParallelMoveResolverX86_64::GetAssembler() const { } void ParallelMoveResolverX86_64::EmitMove(size_t index) { - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; Location source = move->GetSource(); Location destination = move->GetDestination(); @@ -4531,7 +4638,8 @@ void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) { } void ParallelMoveResolverX86_64::EmitSwap(size_t index) { - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; Location source = move->GetSource(); Location destination = move->GetDestination(); @@ -4586,20 +4694,24 @@ void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck( } void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) { - LocationSummary::CallKind call_kind = cls->CanCallRuntime() - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall; - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(cls, call_kind); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister()); + InvokeRuntimeCallingConvention calling_convention; + CodeGenerator::CreateLoadClassLocationSummary( + cls, + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + Location::RegisterLocation(RAX)); } void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) { LocationSummary* locations = cls->GetLocations(); CpuRegister out = locations->Out().AsRegister<CpuRegister>(); CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>(); - if (cls->IsReferrersClass()) { + if (cls->NeedsAccessCheck()) { + codegen_->MoveConstant(locations->GetTemp(0), cls->GetTypeIndex()); + codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInitializeTypeAndVerifyAccess), + cls, + cls->GetDexPc(), + nullptr); + } else if (cls->IsReferrersClass()) { DCHECK(!cls->CanCallRuntime()); DCHECK(!cls->MustGenerateClinitCheck()); __ movl(out, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value())); @@ -4709,6 +4821,7 @@ void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kArrayObjectCheck: call_kind = LocationSummary::kNoCall; break; + case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCall; break; @@ -4749,10 +4862,11 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { __ j(kEqual, &zero); } - // In case of an interface check, we put the object class into the object register. + // In case of an interface/unresolved check, we put the object class into the object register. // This is safe, as the register is caller-save, and the object must be in another // register if it survives the runtime call. - CpuRegister target = (instruction->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) + CpuRegister target = (instruction->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) || + (instruction->GetTypeCheckKind() == TypeCheckKind::kUnresolvedCheck) ? obj : out; __ movl(target, Address(obj, class_offset)); @@ -4866,7 +4980,7 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { } break; } - + case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: default: { codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial), @@ -4907,6 +5021,7 @@ void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) { ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; break; + case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCall; break; @@ -5034,6 +5149,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { __ j(kNotEqual, slow_path->GetEntryLabel()); break; } + case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: default: codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), @@ -5277,7 +5393,7 @@ void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) { /** * Class to handle late fixup of offsets into constant area. */ -class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocMisc> { +class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> { public: RIPFixup(const CodeGeneratorX86_64& codegen, int offset) : codegen_(codegen), offset_into_constant_area_(offset) {} diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 1ec3580040..ecc8630e6b 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -70,6 +70,35 @@ class InvokeDexCallingConvention : public CallingConvention<Register, FloatRegis DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention); }; +class FieldAccessCallingConventionX86_64 : public FieldAccessCallingConvention { + public: + FieldAccessCallingConventionX86_64() {} + + Location GetObjectLocation() const OVERRIDE { + return Location::RegisterLocation(RSI); + } + Location GetFieldIndexLocation() const OVERRIDE { + return Location::RegisterLocation(RDI); + } + Location GetReturnLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE { + return Location::RegisterLocation(RAX); + } + Location GetSetValueLocation(Primitive::Type type, bool is_instance) const OVERRIDE { + return Primitive::Is64BitType(type) + ? Location::RegisterLocation(RDX) + : (is_instance + ? Location::RegisterLocation(RDX) + : Location::RegisterLocation(RSI)); + } + Location GetFpuLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE { + return Location::FpuRegisterLocation(XMM0); + } + + private: + DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConventionX86_64); +}; + + class InvokeDexCallingConventionVisitorX86_64 : public InvokeDexCallingConventionVisitor { public: InvokeDexCallingConventionVisitorX86_64() {} @@ -215,6 +244,9 @@ class CodeGeneratorX86_64 : public CodeGenerator { void Bind(HBasicBlock* block) OVERRIDE; void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE; void MoveConstant(Location destination, int32_t value) OVERRIDE; + void MoveLocation(Location dst, Location src, Primitive::Type dst_type) OVERRIDE; + void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE; + size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; @@ -286,11 +318,11 @@ class CodeGeneratorX86_64 : public CodeGenerator { void Move(Location destination, Location source); Label* GetLabelOf(HBasicBlock* block) const { - return CommonGetLabelOf<Label>(block_labels_.GetRawStorage(), block); + return CommonGetLabelOf<Label>(block_labels_, block); } void Initialize() OVERRIDE { - block_labels_.SetSize(GetGraph()->GetBlocks().size()); + block_labels_ = CommonInitializeLabels<Label>(); } bool NeedsTwoRegisters(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE { @@ -334,7 +366,7 @@ class CodeGeneratorX86_64 : public CodeGenerator { }; // Labels for each block that will be compiled. - GrowableArray<Label> block_labels_; + Label* block_labels_; // Indexed by block id. Label frame_entry_label_; LocationsBuilderX86_64 location_builder_; InstructionCodeGeneratorX86_64 instruction_visitor_; diff --git a/compiler/optimizing/constant_folding.cc b/compiler/optimizing/constant_folding.cc index 20ce1105ce..e0aa4ff489 100644 --- a/compiler/optimizing/constant_folding.cc +++ b/compiler/optimizing/constant_folding.cc @@ -226,14 +226,14 @@ void InstructionWithAbsorbingInputSimplifier::VisitSub(HSub* instruction) { // We assume that GVN has run before, so we only perform a pointer // comparison. If for some reason the values are equal but the pointers are - // different, we are still correct and only miss an optimisation + // different, we are still correct and only miss an optimization // opportunity. if (instruction->GetLeft() == instruction->GetRight()) { // Replace code looking like // SUB dst, src, src // with // CONSTANT 0 - // Note that we cannot optimise `x - x` to `0` for floating-point. It does + // Note that we cannot optimize `x - x` to `0` for floating-point. It does // not work when `x` is an infinity. instruction->ReplaceWith(GetGraph()->GetConstant(type, 0)); block->RemoveInstruction(instruction); diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc index 345ff72148..007d0e3332 100644 --- a/compiler/optimizing/dead_code_elimination.cc +++ b/compiler/optimizing/dead_code_elimination.cc @@ -16,49 +16,67 @@ #include "dead_code_elimination.h" +#include "utils/array_ref.h" #include "base/bit_vector-inl.h" #include "ssa_phi_elimination.h" namespace art { -static void MarkReachableBlocks(HBasicBlock* block, ArenaBitVector* visited) { - int block_id = block->GetBlockId(); - if (visited->IsBitSet(block_id)) { - return; - } - visited->SetBit(block_id); - - HInstruction* last_instruction = block->GetLastInstruction(); - if (last_instruction->IsIf()) { - HIf* if_instruction = last_instruction->AsIf(); - HInstruction* condition = if_instruction->InputAt(0); - if (!condition->IsIntConstant()) { - MarkReachableBlocks(if_instruction->IfTrueSuccessor(), visited); - MarkReachableBlocks(if_instruction->IfFalseSuccessor(), visited); - } else if (condition->AsIntConstant()->IsOne()) { - MarkReachableBlocks(if_instruction->IfTrueSuccessor(), visited); - } else { - DCHECK(condition->AsIntConstant()->IsZero()); - MarkReachableBlocks(if_instruction->IfFalseSuccessor(), visited); - } - } else if (last_instruction->IsPackedSwitch() && - last_instruction->AsPackedSwitch()->InputAt(0)->IsIntConstant()) { - HPackedSwitch* switch_instruction = last_instruction->AsPackedSwitch(); - int32_t switch_value = switch_instruction->InputAt(0)->AsIntConstant()->GetValue(); - int32_t start_value = switch_instruction->GetStartValue(); - int32_t last_value = start_value + switch_instruction->GetNumEntries(); - for (int32_t case_value = start_value; case_value <= last_value; case_value++) { - if (case_value == last_value) { - MarkReachableBlocks(switch_instruction->GetDefaultBlock(), visited); +static void MarkReachableBlocks(HGraph* graph, ArenaBitVector* visited) { + ArenaVector<HBasicBlock*> worklist(graph->GetArena()->Adapter()); + constexpr size_t kDefaultWorlistSize = 8; + worklist.reserve(kDefaultWorlistSize); + visited->SetBit(graph->GetEntryBlock()->GetBlockId()); + worklist.push_back(graph->GetEntryBlock()); + + while (!worklist.empty()) { + HBasicBlock* block = worklist.back(); + worklist.pop_back(); + int block_id = block->GetBlockId(); + DCHECK(visited->IsBitSet(block_id)); + + ArrayRef<HBasicBlock* const> live_successors(block->GetSuccessors()); + HInstruction* last_instruction = block->GetLastInstruction(); + if (last_instruction->IsIf()) { + HIf* if_instruction = last_instruction->AsIf(); + HInstruction* condition = if_instruction->InputAt(0); + if (condition->IsIntConstant()) { + if (condition->AsIntConstant()->IsOne()) { + live_successors = live_successors.SubArray(0u, 1u); + DCHECK_EQ(live_successors[0], if_instruction->IfTrueSuccessor()); + } else { + DCHECK(condition->AsIntConstant()->IsZero()); + live_successors = live_successors.SubArray(1u, 1u); + DCHECK_EQ(live_successors[0], if_instruction->IfFalseSuccessor()); + } } - if (case_value == switch_value) { - MarkReachableBlocks(block->GetSuccessor(case_value - start_value), visited); - break; + } else if (last_instruction->IsPackedSwitch()) { + HPackedSwitch* switch_instruction = last_instruction->AsPackedSwitch(); + HInstruction* switch_input = switch_instruction->InputAt(0); + if (switch_input->IsIntConstant()) { + int32_t switch_value = switch_input->AsIntConstant()->GetValue(); + int32_t start_value = switch_instruction->GetStartValue(); + // Note: Though the spec forbids packed-switch values to wrap around, we leave + // that task to the verifier and use unsigned arithmetic with it's "modulo 2^32" + // semantics to check if the value is in range, wrapped or not. + uint32_t switch_index = + static_cast<uint32_t>(switch_value) - static_cast<uint32_t>(start_value); + if (switch_index < switch_instruction->GetNumEntries()) { + live_successors = live_successors.SubArray(switch_index, 1u); + DCHECK_EQ(live_successors[0], block->GetSuccessor(switch_index)); + } else { + live_successors = live_successors.SubArray(switch_instruction->GetNumEntries(), 1u); + DCHECK_EQ(live_successors[0], switch_instruction->GetDefaultBlock()); + } } } - } else { - for (HBasicBlock* successor : block->GetSuccessors()) { - MarkReachableBlocks(successor, visited); + + for (HBasicBlock* successor : live_successors) { + // Add only those successors that have not been visited yet. + if (!visited->IsBitSet(successor->GetBlockId())) { + visited->SetBit(successor->GetBlockId()); + worklist.push_back(successor); + } } } } @@ -82,7 +100,7 @@ void HDeadCodeElimination::RemoveDeadBlocks() { ArenaBitVector live_blocks(allocator, graph_->GetBlocks().size(), false); ArenaBitVector affected_loops(allocator, graph_->GetBlocks().size(), false); - MarkReachableBlocks(graph_->GetEntryBlock(), &live_blocks); + MarkReachableBlocks(graph_, &live_blocks); bool removed_one_or_more_blocks = false; // Remove all dead blocks. Iterate in post order because removal needs the diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index d05c514912..d38f4c862f 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -374,6 +374,11 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { << instance_of->MustDoNullCheck() << std::noboolalpha; } + void VisitArraySet(HArraySet* array_set) OVERRIDE { + StartAttributeStream("value_can_be_null") << std::boolalpha + << array_set->GetValueCanBeNull() << std::noboolalpha; + } + void VisitInvoke(HInvoke* invoke) OVERRIDE { StartAttributeStream("dex_file_index") << invoke->GetDexMethodIndex(); StartAttributeStream("method_name") << PrettyMethod( @@ -393,6 +398,22 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { StartAttributeStream("intrinsic") << invoke->GetIntrinsic(); } + void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* field_access) OVERRIDE { + StartAttributeStream("field_type") << field_access->GetFieldType(); + } + + void VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet* field_access) OVERRIDE { + StartAttributeStream("field_type") << field_access->GetFieldType(); + } + + void VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet* field_access) OVERRIDE { + StartAttributeStream("field_type") << field_access->GetFieldType(); + } + + void VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet* field_access) OVERRIDE { + StartAttributeStream("field_type") << field_access->GetFieldType(); + } + void VisitTryBoundary(HTryBoundary* try_boundary) OVERRIDE { StartAttributeStream("kind") << (try_boundary->IsEntry() ? "entry" : "exit"); } @@ -480,8 +501,11 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { StartAttributeStream("can_be_null") << std::boolalpha << instruction->CanBeNull() << std::noboolalpha; StartAttributeStream("exact") << std::boolalpha << info.IsExact() << std::noboolalpha; + } else if (instruction->IsLoadClass()) { + StartAttributeStream("klass") << "unresolved"; } else { - DCHECK(!is_after_pass_) << "Type info should be valid after reference type propagation"; + DCHECK(!is_after_pass_) + << "Expected a valid rti after reference type propagation"; } } if (disasm_info_ != nullptr) { diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc index 1ee8648533..7cf061773f 100644 --- a/compiler/optimizing/gvn.cc +++ b/compiler/optimizing/gvn.cc @@ -15,11 +15,12 @@ */ #include "gvn.h" + +#include "base/arena_containers.h" +#include "base/bit_vector-inl.h" #include "side_effects_analysis.h" #include "utils.h" - #include "utils/arena_bit_vector.h" -#include "base/bit_vector-inl.h" namespace art { @@ -32,13 +33,13 @@ namespace art { * if there is one in the set. In GVN, we would say those instructions have the * same "number". */ -class ValueSet : public ArenaObject<kArenaAllocMisc> { +class ValueSet : public ArenaObject<kArenaAllocGvn> { public: // Constructs an empty ValueSet which owns all its buckets. explicit ValueSet(ArenaAllocator* allocator) : allocator_(allocator), num_buckets_(kMinimumNumberOfBuckets), - buckets_(allocator->AllocArray<Node*>(num_buckets_)), + buckets_(allocator->AllocArray<Node*>(num_buckets_, kArenaAllocGvn)), buckets_owned_(allocator, num_buckets_, false), num_entries_(0) { // ArenaAllocator returns zeroed memory, so no need to set buckets to null. @@ -51,7 +52,7 @@ class ValueSet : public ArenaObject<kArenaAllocMisc> { ValueSet(ArenaAllocator* allocator, const ValueSet& to_copy) : allocator_(allocator), num_buckets_(to_copy.IdealBucketCount()), - buckets_(allocator->AllocArray<Node*>(num_buckets_)), + buckets_(allocator->AllocArray<Node*>(num_buckets_, kArenaAllocGvn)), buckets_owned_(allocator, num_buckets_, false), num_entries_(to_copy.num_entries_) { // ArenaAllocator returns zeroed memory, so entries of buckets_ and @@ -143,7 +144,7 @@ class ValueSet : public ArenaObject<kArenaAllocMisc> { size_t GetNumberOfEntries() const { return num_entries_; } private: - class Node : public ArenaObject<kArenaAllocMisc> { + class Node : public ArenaObject<kArenaAllocGvn> { public: Node(HInstruction* instruction, size_t hash_code, Node* next) : instruction_(instruction), hash_code_(hash_code), next_(next) {} @@ -306,7 +307,7 @@ class GlobalValueNumberer : public ValueObject { : graph_(graph), allocator_(allocator), side_effects_(side_effects), - sets_(allocator, graph->GetBlocks().size(), nullptr) {} + sets_(graph->GetBlocks().size(), nullptr, allocator->Adapter(kArenaAllocGvn)) {} void Run(); @@ -322,14 +323,14 @@ class GlobalValueNumberer : public ValueObject { // ValueSet for blocks. Initially null, but for an individual block they // are allocated and populated by the dominator, and updated by all blocks // in the path from the dominator to the block. - GrowableArray<ValueSet*> sets_; + ArenaVector<ValueSet*> sets_; DISALLOW_COPY_AND_ASSIGN(GlobalValueNumberer); }; void GlobalValueNumberer::Run() { DCHECK(side_effects_.HasRun()); - sets_.Put(graph_->GetEntryBlock()->GetBlockId(), new (allocator_) ValueSet(allocator_)); + sets_[graph_->GetEntryBlock()->GetBlockId()] = new (allocator_) ValueSet(allocator_); // Use the reverse post order to ensure the non back-edge predecessors of a block are // visited before the block itself. @@ -348,7 +349,7 @@ void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) { set = new (allocator_) ValueSet(allocator_); } else { HBasicBlock* dominator = block->GetDominator(); - ValueSet* dominator_set = sets_.Get(dominator->GetBlockId()); + ValueSet* dominator_set = sets_[dominator->GetBlockId()]; if (dominator->GetSuccessors().size() == 1) { DCHECK_EQ(dominator->GetSuccessor(0), block); set = dominator_set; @@ -363,7 +364,7 @@ void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) { set->Kill(side_effects_.GetLoopEffects(block)); } else if (predecessors.size() > 1) { for (HBasicBlock* predecessor : predecessors) { - set->IntersectWith(sets_.Get(predecessor->GetBlockId())); + set->IntersectWith(sets_[predecessor->GetBlockId()]); if (set->IsEmpty()) { break; } @@ -372,7 +373,7 @@ void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) { } } - sets_.Put(block->GetBlockId(), set); + sets_[block->GetBlockId()] = set; HInstruction* current = block->GetFirstInstruction(); while (current != nullptr) { diff --git a/compiler/optimizing/induction_var_analysis.cc b/compiler/optimizing/induction_var_analysis.cc index 92c732c0c3..e5123deed6 100644 --- a/compiler/optimizing/induction_var_analysis.cc +++ b/compiler/optimizing/induction_var_analysis.cc @@ -33,17 +33,6 @@ static bool IsLoopInvariant(HLoopInformation* loop, HInstruction* instruction) { } /** - * Returns true if instruction is proper entry-phi-operation for given loop - * (referred to as mu-operation in Gerlek's paper). - */ -static bool IsEntryPhi(HLoopInformation* loop, HInstruction* instruction) { - return - instruction->IsPhi() && - instruction->InputCount() == 2 && - instruction->GetBlock() == loop->GetHeader(); -} - -/** * Since graph traversal may enter a SCC at any position, an initial representation may be rotated, * along dependences, viz. any of (a, b, c, d), (d, a, b, c) (c, d, a, b), (b, c, d, a) assuming * a chain of dependences (mutual independent items may occur in arbitrary order). For proper @@ -58,8 +47,9 @@ static void RotateEntryPhiFirst(HLoopInformation* loop, size_t phi_pos = -1; const size_t size = scc->size(); for (size_t i = 0; i < size; i++) { - if (IsEntryPhi(loop, scc->at(i)) && (phi == nullptr || phis.FoundBefore(scc->at(i), phi))) { - phi = scc->at(i); + HInstruction* other = scc->at(i); + if (other->IsLoopHeaderPhi() && (phi == nullptr || phis.FoundBefore(other, phi))) { + phi = other; phi_pos = i; } } @@ -84,11 +74,14 @@ static void RotateEntryPhiFirst(HLoopInformation* loop, HInductionVarAnalysis::HInductionVarAnalysis(HGraph* graph) : HOptimization(graph, kInductionPassName), global_depth_(0), - stack_(graph->GetArena()->Adapter()), - scc_(graph->GetArena()->Adapter()), - map_(std::less<HInstruction*>(), graph->GetArena()->Adapter()), - cycle_(std::less<HInstruction*>(), graph->GetArena()->Adapter()), - induction_(std::less<HLoopInformation*>(), graph->GetArena()->Adapter()) { + stack_(graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)), + scc_(graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)), + map_(std::less<HInstruction*>(), + graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)), + cycle_(std::less<HInstruction*>(), + graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)), + induction_(std::less<HLoopInformation*>(), + graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)) { } void HInductionVarAnalysis::Run() { @@ -168,7 +161,7 @@ void HInductionVarAnalysis::VisitNode(HLoopInformation* loop, HInstruction* inst } // Classify the SCC. - if (scc_.size() == 1 && !IsEntryPhi(loop, scc_[0])) { + if (scc_.size() == 1 && !scc_[0]->IsLoopHeaderPhi()) { ClassifyTrivial(loop, scc_[0]); } else { ClassifyNonTrivial(loop); @@ -200,10 +193,7 @@ uint32_t HInductionVarAnalysis::VisitDescendant(HLoopInformation* loop, HInstruc void HInductionVarAnalysis::ClassifyTrivial(HLoopInformation* loop, HInstruction* instruction) { InductionInfo* info = nullptr; if (instruction->IsPhi()) { - for (size_t i = 1, count = instruction->InputCount(); i < count; i++) { - info = TransferPhi(LookupInfo(loop, instruction->InputAt(0)), - LookupInfo(loop, instruction->InputAt(i))); - } + info = TransferPhi(loop, instruction, /* input_index */ 0); } else if (instruction->IsAdd()) { info = TransferAddSub(LookupInfo(loop, instruction->InputAt(0)), LookupInfo(loop, instruction->InputAt(1)), kAdd); @@ -241,25 +231,25 @@ void HInductionVarAnalysis::ClassifyNonTrivial(HLoopInformation* loop) { // Rotate proper entry-phi to front. if (size > 1) { - ArenaVector<HInstruction*> other(graph_->GetArena()->Adapter()); + ArenaVector<HInstruction*> other(graph_->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)); RotateEntryPhiFirst(loop, &scc_, &other); } - // Analyze from phi onwards. + // Analyze from entry-phi onwards. HInstruction* phi = scc_[0]; - if (!IsEntryPhi(loop, phi)) { + if (!phi->IsLoopHeaderPhi()) { return; } - HInstruction* external = phi->InputAt(0); - HInstruction* internal = phi->InputAt(1); - InductionInfo* initial = LookupInfo(loop, external); + + // External link should be loop invariant. + InductionInfo* initial = LookupInfo(loop, phi->InputAt(0)); if (initial == nullptr || initial->induction_class != kInvariant) { return; } - // Singleton entry-phi-operation may be a wrap-around induction. + // Singleton is wrap-around induction if all internal links have the same meaning. if (size == 1) { - InductionInfo* update = LookupInfo(loop, internal); + InductionInfo* update = TransferPhi(loop, phi, /* input_index */ 1); if (update != nullptr) { AssignInfo(loop, phi, CreateInduction(kWrapAround, initial, update)); } @@ -272,7 +262,7 @@ void HInductionVarAnalysis::ClassifyNonTrivial(HLoopInformation* loop) { HInstruction* instruction = scc_[i]; InductionInfo* update = nullptr; if (instruction->IsPhi()) { - update = SolvePhi(loop, phi, instruction); + update = SolvePhiAllInputs(loop, phi, instruction); } else if (instruction->IsAdd()) { update = SolveAddSub( loop, phi, instruction, instruction->InputAt(0), instruction->InputAt(1), kAdd, true); @@ -286,10 +276,9 @@ void HInductionVarAnalysis::ClassifyNonTrivial(HLoopInformation* loop) { cycle_.Put(instruction, update); } - // Success if the internal link received a meaning. - auto it = cycle_.find(internal); - if (it != cycle_.end()) { - InductionInfo* induction = it->second; + // Success if all internal links received the same temporary meaning. + InductionInfo* induction = SolvePhi(phi, /* input_index */ 1); + if (induction != nullptr) { switch (induction->induction_class) { case kInvariant: // Classify first phi and then the rest of the cycle "on-demand". @@ -329,13 +318,20 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::RotatePeriodicInduc return CreateInduction(kPeriodic, induction->op_a, RotatePeriodicInduction(induction->op_b, last)); } -HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::TransferPhi(InductionInfo* a, - InductionInfo* b) { - // Transfer over a phi: if both inputs are identical, result is input. - if (InductionEqual(a, b)) { - return a; - } - return nullptr; +HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::TransferPhi(HLoopInformation* loop, + HInstruction* phi, + size_t input_index) { + // Match all phi inputs from input_index onwards exactly. + const size_t count = phi->InputCount(); + DCHECK_LT(input_index, count); + InductionInfo* a = LookupInfo(loop, phi->InputAt(input_index)); + for (size_t i = input_index + 1; i < count; i++) { + InductionInfo* b = LookupInfo(loop, phi->InputAt(i)); + if (!InductionEqual(a, b)) { + return nullptr; + } + } + return a; } HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::TransferAddSub(InductionInfo* a, @@ -421,47 +417,56 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::TransferNeg(Inducti return nullptr; } -HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::SolvePhi(HLoopInformation* loop, - HInstruction* phi, - HInstruction* instruction) { - // Solve within a cycle over a phi: identical inputs are combined into that input as result. - const size_t count = instruction->InputCount(); - DCHECK_GT(count, 0u); - auto ita = cycle_.find(instruction->InputAt(0)); +HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::SolvePhi(HInstruction* phi, + size_t input_index) { + // Match all phi inputs from input_index onwards exactly. + const size_t count = phi->InputCount(); + DCHECK_LT(input_index, count); + auto ita = cycle_.find(phi->InputAt(input_index)); if (ita != cycle_.end()) { - InductionInfo* a = ita->second; - for (size_t i = 1; i < count; i++) { - auto itb = cycle_.find(instruction->InputAt(i)); - if (itb == cycle_.end() || !HInductionVarAnalysis::InductionEqual(a, itb->second)) { + for (size_t i = input_index + 1; i < count; i++) { + auto itb = cycle_.find(phi->InputAt(i)); + if (itb == cycle_.end() || + !HInductionVarAnalysis::InductionEqual(ita->second, itb->second)) { return nullptr; } } - return a; + return ita->second; } + return nullptr; +} - // Solve within a cycle over another entry-phi: add invariants into a periodic. - if (IsEntryPhi(loop, instruction)) { - InductionInfo* a = LookupInfo(loop, instruction->InputAt(0)); +HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::SolvePhiAllInputs( + HLoopInformation* loop, + HInstruction* entry_phi, + HInstruction* phi) { + // Match all phi inputs. + InductionInfo* match = SolvePhi(phi, /* input_index */ 0); + if (match != nullptr) { + return match; + } + + // Otherwise, try to solve for a periodic seeded from phi onward. + // Only tight multi-statement cycles are considered in order to + // simplify rotating the periodic during the final classification. + if (phi->IsLoopHeaderPhi() && phi->InputCount() == 2) { + InductionInfo* a = LookupInfo(loop, phi->InputAt(0)); if (a != nullptr && a->induction_class == kInvariant) { - if (instruction->InputAt(1) == phi) { - InductionInfo* initial = LookupInfo(loop, phi->InputAt(0)); + if (phi->InputAt(1) == entry_phi) { + InductionInfo* initial = LookupInfo(loop, entry_phi->InputAt(0)); return CreateInduction(kPeriodic, a, initial); } - auto it = cycle_.find(instruction->InputAt(1)); - if (it != cycle_.end()) { - InductionInfo* b = it->second; - if (b->induction_class == kPeriodic) { - return CreateInduction(kPeriodic, a, b); - } + InductionInfo* b = SolvePhi(phi, /* input_index */ 1); + if (b != nullptr && b->induction_class == kPeriodic) { + return CreateInduction(kPeriodic, a, b); } } } - return nullptr; } HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::SolveAddSub(HLoopInformation* loop, - HInstruction* phi, + HInstruction* entry_phi, HInstruction* instruction, HInstruction* x, HInstruction* y, @@ -471,7 +476,7 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::SolveAddSub(HLoopIn // invariant value, seeded from phi, keeps adding to the stride of the induction. InductionInfo* b = LookupInfo(loop, y); if (b != nullptr && b->induction_class == kInvariant) { - if (x == phi) { + if (x == entry_phi) { return (op == kAdd) ? b : CreateInvariantOp(kNeg, nullptr, b); } auto it = cycle_.find(x); @@ -487,14 +492,15 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::SolveAddSub(HLoopIn if (op == kAdd) { // Try the other way around for an addition if considered for first time. if (is_first_call) { - return SolveAddSub(loop, phi, instruction, y, x, op, false); + return SolveAddSub(loop, entry_phi, instruction, y, x, op, false); } } else if (op == kSub) { - // Solve within a tight cycle for a periodic idiom k = c - k; - if (y == phi && instruction == phi->InputAt(1)) { + // Solve within a tight cycle that is formed by exactly two instructions, + // one phi and one update, for a periodic idiom of the form k = c - k; + if (y == entry_phi && entry_phi->InputCount() == 2 && instruction == entry_phi->InputAt(1)) { InductionInfo* a = LookupInfo(loop, x); if (a != nullptr && a->induction_class == kInvariant) { - InductionInfo* initial = LookupInfo(loop, phi->InputAt(0)); + InductionInfo* initial = LookupInfo(loop, entry_phi->InputAt(0)); return CreateInduction(kPeriodic, CreateInvariantOp(kSub, a, initial), initial); } } @@ -539,42 +545,46 @@ void HInductionVarAnalysis::VisitCondition(HLoopInformation* loop, Primitive::Type type, IfCondition cmp) { if (a->induction_class == kInvariant && b->induction_class == kLinear) { - // Swap conditions (e.g. U > i is same as i < U). + // Swap condition if induction is at right-hand-side (e.g. U > i is same as i < U). switch (cmp) { case kCondLT: VisitCondition(loop, b, a, type, kCondGT); break; case kCondLE: VisitCondition(loop, b, a, type, kCondGE); break; case kCondGT: VisitCondition(loop, b, a, type, kCondLT); break; case kCondGE: VisitCondition(loop, b, a, type, kCondLE); break; + case kCondNE: VisitCondition(loop, b, a, type, kCondNE); break; default: break; } } else if (a->induction_class == kLinear && b->induction_class == kInvariant) { - // Normalize a linear loop control with a constant, nonzero stride: + // Analyze condition with induction at left-hand-side (e.g. i < U). + InductionInfo* lower_expr = a->op_b; + InductionInfo* upper_expr = b; + InductionInfo* stride = a->op_a; + int64_t stride_value = 0; + if (!IsIntAndGet(stride, &stride_value)) { + return; + } + // Rewrite condition i != U into i < U or i > U if end condition is reached exactly. + if (cmp == kCondNE && ((stride_value == +1 && IsTaken(lower_expr, upper_expr, kCondLT)) || + (stride_value == -1 && IsTaken(lower_expr, upper_expr, kCondGT)))) { + cmp = stride_value > 0 ? kCondLT : kCondGT; + } + // Normalize a linear loop control with a nonzero stride: // stride > 0, either i < U or i <= U // stride < 0, either i > U or i >= U - InductionInfo* stride = a->op_a; - InductionInfo* lo_val = a->op_b; - InductionInfo* hi_val = b; - // Analyze the stride thoroughly, since its representation may be compound at this point. - InductionVarRange::Value v1 = InductionVarRange::GetMin(stride, nullptr); - InductionVarRange::Value v2 = InductionVarRange::GetMax(stride, nullptr); - if (v1.a_constant == 0 && v2.a_constant == 0 && v1.b_constant == v2.b_constant) { - const int32_t stride_value = v1.b_constant; - if ((stride_value > 0 && (cmp == kCondLT || cmp == kCondLE)) || - (stride_value < 0 && (cmp == kCondGT || cmp == kCondGE))) { - bool is_strict = cmp == kCondLT || cmp == kCondGT; - VisitTripCount(loop, lo_val, hi_val, stride, stride_value, type, is_strict); - } + if ((stride_value > 0 && (cmp == kCondLT || cmp == kCondLE)) || + (stride_value < 0 && (cmp == kCondGT || cmp == kCondGE))) { + VisitTripCount(loop, lower_expr, upper_expr, stride, stride_value, type, cmp); } } } void HInductionVarAnalysis::VisitTripCount(HLoopInformation* loop, - InductionInfo* lo_val, - InductionInfo* hi_val, + InductionInfo* lower_expr, + InductionInfo* upper_expr, InductionInfo* stride, - int32_t stride_value, + int64_t stride_value, Primitive::Type type, - bool is_strict) { + IfCondition cmp) { // Any loop of the general form: // // for (i = L; i <= U; i += S) // S > 0 @@ -586,29 +596,95 @@ void HInductionVarAnalysis::VisitTripCount(HLoopInformation* loop, // for (n = 0; n < TC; n++) // where TC = (U + S - L) / S // .. L + S * n .. // - // NOTE: The TC (trip-count) expression is only valid if the top-test path is taken at - // least once. Otherwise TC is 0. Also, the expression assumes the loop does not - // have any early-exits. Otherwise, TC is an upper bound. + // taking the following into consideration: // - bool cancels = is_strict && std::abs(stride_value) == 1; // compensation cancels conversion? + // (1) Using the same precision, the TC (trip-count) expression should be interpreted as + // an unsigned entity, for example, as in the following loop that uses the full range: + // for (int i = INT_MIN; i < INT_MAX; i++) // TC = UINT_MAX + // (2) The TC is only valid if the loop is taken, otherwise TC = 0, as in: + // for (int i = 12; i < U; i++) // TC = 0 when U >= 12 + // If this cannot be determined at compile-time, the TC is only valid within the + // loop-body proper, not the loop-header unless enforced with an explicit condition. + // (3) The TC is only valid if the loop is finite, otherwise TC has no value, as in: + // for (int i = 0; i <= U; i++) // TC = Inf when U = INT_MAX + // If this cannot be determined at compile-time, the TC is only valid when enforced + // with an explicit condition. + // (4) For loops which early-exits, the TC forms an upper bound, as in: + // for (int i = 0; i < 10 && ....; i++) // TC <= 10 + const bool is_taken = IsTaken(lower_expr, upper_expr, cmp); + const bool is_finite = IsFinite(upper_expr, stride_value, type, cmp); + const bool cancels = (cmp == kCondLT || cmp == kCondGT) && std::abs(stride_value) == 1; if (!cancels) { // Convert exclusive integral inequality into inclusive integral inequality, // viz. condition i < U is i <= U - 1 and condition i > U is i >= U + 1. - if (is_strict) { - const InductionOp op = stride_value > 0 ? kSub : kAdd; - hi_val = CreateInvariantOp(op, hi_val, CreateConstant(1, type)); + if (cmp == kCondLT) { + upper_expr = CreateInvariantOp(kSub, upper_expr, CreateConstant(1, type)); + } else if (cmp == kCondGT) { + upper_expr = CreateInvariantOp(kAdd, upper_expr, CreateConstant(1, type)); } // Compensate for stride. - hi_val = CreateInvariantOp(kAdd, hi_val, stride); + upper_expr = CreateInvariantOp(kAdd, upper_expr, stride); } - + InductionInfo* trip_count + = CreateInvariantOp(kDiv, CreateInvariantOp(kSub, upper_expr, lower_expr), stride); // Assign the trip-count expression to the loop control. Clients that use the information - // should be aware that due to the top-test assumption, the expression is only valid in the - // loop-body proper, and not yet in the loop-header. If the loop has any early exits, the - // trip-count forms a conservative upper bound on the number of loop iterations. - InductionInfo* trip_count = - CreateInvariantOp(kDiv, CreateInvariantOp(kSub, hi_val, lo_val), stride); - AssignInfo(loop, loop->GetHeader()->GetLastInstruction(), trip_count); + // should be aware that the expression is only valid under the conditions listed above. + InductionOp tcKind = kTripCountInBodyUnsafe; + if (is_taken && is_finite) { + tcKind = kTripCountInLoop; + } else if (is_finite) { + tcKind = kTripCountInBody; + } else if (is_taken) { + tcKind = kTripCountInLoopUnsafe; + } + AssignInfo(loop, loop->GetHeader()->GetLastInstruction(), CreateTripCount(tcKind, trip_count)); +} + +bool HInductionVarAnalysis::IsTaken(InductionInfo* lower_expr, + InductionInfo* upper_expr, + IfCondition cmp) { + int64_t lower_value; + int64_t upper_value; + if (IsIntAndGet(lower_expr, &lower_value) && IsIntAndGet(upper_expr, &upper_value)) { + switch (cmp) { + case kCondLT: return lower_value < upper_value; + case kCondLE: return lower_value <= upper_value; + case kCondGT: return lower_value > upper_value; + case kCondGE: return lower_value >= upper_value; + case kCondEQ: + case kCondNE: LOG(FATAL) << "CONDITION UNREACHABLE"; + } + } + return false; // not certain, may be untaken +} + +bool HInductionVarAnalysis::IsFinite(InductionInfo* upper_expr, + int64_t stride_value, + Primitive::Type type, + IfCondition cmp) { + const int64_t min = type == Primitive::kPrimInt + ? std::numeric_limits<int32_t>::min() + : std::numeric_limits<int64_t>::min(); + const int64_t max = type == Primitive::kPrimInt + ? std::numeric_limits<int32_t>::max() + : std::numeric_limits<int64_t>::max(); + // Some rules under which it is certain at compile-time that the loop is finite. + int64_t value; + switch (cmp) { + case kCondLT: + return stride_value == 1 || + (IsIntAndGet(upper_expr, &value) && value <= (max - stride_value + 1)); + case kCondLE: + return (IsIntAndGet(upper_expr, &value) && value <= (max - stride_value)); + case kCondGT: + return stride_value == -1 || + (IsIntAndGet(upper_expr, &value) && value >= (min - stride_value - 1)); + case kCondGE: + return (IsIntAndGet(upper_expr, &value) && value >= (min - stride_value)); + case kCondEQ: + case kCondNE: LOG(FATAL) << "CONDITION UNREACHABLE"; + } + return false; // not certain, may be infinite } void HInductionVarAnalysis::AssignInfo(HLoopInformation* loop, @@ -618,7 +694,8 @@ void HInductionVarAnalysis::AssignInfo(HLoopInformation* loop, if (it == induction_.end()) { it = induction_.Put(loop, ArenaSafeMap<HInstruction*, InductionInfo*>( - std::less<HInstruction*>(), graph_->GetArena()->Adapter())); + std::less<HInstruction*>(), + graph_->GetArena()->Adapter(kArenaAllocInductionVarAnalysis))); } it->second.Put(instruction, info); } @@ -725,13 +802,22 @@ bool HInductionVarAnalysis::InductionEqual(InductionInfo* info1, } bool HInductionVarAnalysis::IsIntAndGet(InductionInfo* info, int64_t* value) { - if (info != nullptr && info->induction_class == kInvariant && info->operation == kFetch) { - DCHECK(info->fetch); - if (info->fetch->IsIntConstant()) { - *value = info->fetch->AsIntConstant()->GetValue(); - return true; - } else if (info->fetch->IsLongConstant()) { - *value = info->fetch->AsLongConstant()->GetValue(); + if (info != nullptr && info->induction_class == kInvariant) { + // A direct constant fetch. + if (info->operation == kFetch) { + DCHECK(info->fetch); + if (info->fetch->IsIntConstant()) { + *value = info->fetch->AsIntConstant()->GetValue(); + return true; + } else if (info->fetch->IsLongConstant()) { + *value = info->fetch->AsLongConstant()->GetValue(); + return true; + } + } + // Use range analysis to resolve compound values. + int32_t range_value; + if (InductionVarRange::GetConstant(info, &range_value)) { + *value = range_value; return true; } } @@ -759,6 +845,10 @@ std::string HInductionVarAnalysis::InductionToString(InductionInfo* info) { inv += std::to_string(info->fetch->GetId()) + ":" + info->fetch->DebugName(); } break; + case kTripCountInLoop: inv += "TC-loop:"; break; + case kTripCountInBody: inv += "TC-body:"; break; + case kTripCountInLoopUnsafe: inv += "TC-loop-unsafe:"; break; + case kTripCountInBodyUnsafe: inv += "TC-body-unsafe:"; break; } inv += InductionToString(info->op_b); return inv + ")"; diff --git a/compiler/optimizing/induction_var_analysis.h b/compiler/optimizing/induction_var_analysis.h index 8eccf925c1..7ab80cd676 100644 --- a/compiler/optimizing/induction_var_analysis.h +++ b/compiler/optimizing/induction_var_analysis.h @@ -56,13 +56,20 @@ class HInductionVarAnalysis : public HOptimization { }; enum InductionOp { - kNop, // no-operation: a true induction + // No-operation: a true induction. + kNop, + // Various invariant operations. kAdd, kSub, kNeg, kMul, kDiv, - kFetch + kFetch, + // Trip counts (valid in full loop or only body proper; unsafe implies loop may be infinite). + kTripCountInLoop, + kTripCountInBody, + kTripCountInLoopUnsafe, + kTripCountInBodyUnsafe }; /** @@ -77,8 +84,10 @@ class HInductionVarAnalysis : public HOptimization { * nop: a, then defined by b * (4) periodic * nop: a, then defined by b (repeated when exhausted) + * (5) trip-count: + * tc: defined by b */ - struct InductionInfo : public ArenaObject<kArenaAllocMisc> { + struct InductionInfo : public ArenaObject<kArenaAllocInductionVarAnalysis> { InductionInfo(InductionClass ic, InductionOp op, InductionInfo* a, @@ -110,6 +119,10 @@ class HInductionVarAnalysis : public HOptimization { return new (graph_->GetArena()) InductionInfo(kInvariant, kFetch, nullptr, nullptr, f); } + InductionInfo* CreateTripCount(InductionOp op, InductionInfo* b) { + return new (graph_->GetArena()) InductionInfo(kInvariant, op, nullptr, b, nullptr); + } + InductionInfo* CreateInduction(InductionClass ic, InductionInfo* a, InductionInfo* b) { DCHECK(a != nullptr && b != nullptr); return new (graph_->GetArena()) InductionInfo(ic, kNop, a, b, nullptr); @@ -121,26 +134,27 @@ class HInductionVarAnalysis : public HOptimization { uint32_t VisitDescendant(HLoopInformation* loop, HInstruction* instruction); void ClassifyTrivial(HLoopInformation* loop, HInstruction* instruction); void ClassifyNonTrivial(HLoopInformation* loop); + InductionInfo* RotatePeriodicInduction(InductionInfo* induction, InductionInfo* last); // Transfer operations. - InductionInfo* TransferPhi(InductionInfo* a, InductionInfo* b); + InductionInfo* TransferPhi(HLoopInformation* loop, HInstruction* phi, size_t input_index); InductionInfo* TransferAddSub(InductionInfo* a, InductionInfo* b, InductionOp op); InductionInfo* TransferMul(InductionInfo* a, InductionInfo* b); InductionInfo* TransferShl(InductionInfo* a, InductionInfo* b, Primitive::Type type); InductionInfo* TransferNeg(InductionInfo* a); // Solvers. - InductionInfo* SolvePhi(HLoopInformation* loop, - HInstruction* phi, - HInstruction* instruction); + InductionInfo* SolvePhi(HInstruction* phi, size_t input_index); + InductionInfo* SolvePhiAllInputs(HLoopInformation* loop, + HInstruction* entry_phi, + HInstruction* phi); InductionInfo* SolveAddSub(HLoopInformation* loop, - HInstruction* phi, + HInstruction* entry_phi, HInstruction* instruction, HInstruction* x, HInstruction* y, InductionOp op, bool is_first_call); - InductionInfo* RotatePeriodicInduction(InductionInfo* induction, InductionInfo* last); // Trip count information. void VisitControl(HLoopInformation* loop); @@ -150,12 +164,17 @@ class HInductionVarAnalysis : public HOptimization { Primitive::Type type, IfCondition cmp); void VisitTripCount(HLoopInformation* loop, - InductionInfo* lo_val, - InductionInfo* hi_val, + InductionInfo* lower_expr, + InductionInfo* upper_expr, InductionInfo* stride, - int32_t stride_value, + int64_t stride_value, Primitive::Type type, - bool is_strict); + IfCondition cmp); + bool IsTaken(InductionInfo* lower_expr, InductionInfo* upper_expr, IfCondition cmp); + bool IsFinite(InductionInfo* upper_expr, + int64_t stride_value, + Primitive::Type type, + IfCondition cmp); // Assign and lookup. void AssignInfo(HLoopInformation* loop, HInstruction* instruction, InductionInfo* info); diff --git a/compiler/optimizing/induction_var_analysis_test.cc b/compiler/optimizing/induction_var_analysis_test.cc index fca1ca55e5..20492e7152 100644 --- a/compiler/optimizing/induction_var_analysis_test.cc +++ b/compiler/optimizing/induction_var_analysis_test.cc @@ -20,6 +20,7 @@ #include "builder.h" #include "gtest/gtest.h" #include "induction_var_analysis.h" +#include "induction_var_range.h" #include "nodes.h" #include "optimizing_unit_test.h" @@ -233,7 +234,8 @@ TEST_F(InductionVarAnalysisTest, FindBasicInduction) { EXPECT_STREQ("((1) * i + (1))", GetInductionInfo(increment_[0], 0).c_str()); // Trip-count. - EXPECT_STREQ("(100)", GetInductionInfo(loop_header_[0]->GetLastInstruction(), 0).c_str()); + EXPECT_STREQ("(TC-loop:(100))", + GetInductionInfo(loop_header_[0]->GetLastInstruction(), 0).c_str()); } TEST_F(InductionVarAnalysisTest, FindDerivedInduction) { @@ -388,7 +390,7 @@ TEST_F(InductionVarAnalysisTest, FindSecondOrderWrapAroundInduction) { HInstruction* store = InsertArrayStore(induc_, 0); InsertLocalStore(induc_, InsertLocalLoad(tmp_, 0), 0); HInstruction *sub = InsertInstruction( - new (&allocator_) HSub(Primitive::kPrimInt, constant100_, InsertLocalLoad(basic_[0], 0)), 0); + new (&allocator_) HSub(Primitive::kPrimInt, constant100_, InsertLocalLoad(basic_[0], 0)), 0); InsertLocalStore(tmp_, sub, 0); PerformInductionVarAnalysis(); @@ -412,16 +414,16 @@ TEST_F(InductionVarAnalysisTest, FindWrapAroundDerivedInduction) { new (&allocator_) HAdd(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant100_), 0); InsertLocalStore(tmp_, add, 0); HInstruction *sub = InsertInstruction( - new (&allocator_) HSub(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant100_), 0); + new (&allocator_) HSub(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant100_), 0); InsertLocalStore(tmp_, sub, 0); HInstruction *mul = InsertInstruction( - new (&allocator_) HMul(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant100_), 0); + new (&allocator_) HMul(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant100_), 0); InsertLocalStore(tmp_, mul, 0); HInstruction *shl = InsertInstruction( - new (&allocator_) HShl(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant1_), 0); + new (&allocator_) HShl(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant1_), 0); InsertLocalStore(tmp_, shl, 0); HInstruction *neg = InsertInstruction( - new (&allocator_) HNeg(Primitive::kPrimInt, InsertLocalLoad(induc_, 0)), 0); + new (&allocator_) HNeg(Primitive::kPrimInt, InsertLocalLoad(induc_, 0)), 0); InsertLocalStore(tmp_, neg, 0); InsertLocalStore( induc_, @@ -471,7 +473,7 @@ TEST_F(InductionVarAnalysisTest, FindIdiomaticPeriodicInduction) { BuildLoopNest(1); HInstruction* store = InsertArrayStore(induc_, 0); HInstruction *sub = InsertInstruction( - new (&allocator_) HSub(Primitive::kPrimInt, constant1_, InsertLocalLoad(induc_, 0)), 0); + new (&allocator_) HSub(Primitive::kPrimInt, constant1_, InsertLocalLoad(induc_, 0)), 0); InsertLocalStore(induc_, sub, 0); PerformInductionVarAnalysis(); @@ -497,19 +499,19 @@ TEST_F(InductionVarAnalysisTest, FindDerivedPeriodicInduction) { HSub(Primitive::kPrimInt, constant1_, InsertLocalLoad(induc_, 0)), 0), 0); // Derived expressions. HInstruction *add = InsertInstruction( - new (&allocator_) HAdd(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant100_), 0); + new (&allocator_) HAdd(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant100_), 0); InsertLocalStore(tmp_, add, 0); HInstruction *sub = InsertInstruction( - new (&allocator_) HSub(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant100_), 0); + new (&allocator_) HSub(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant100_), 0); InsertLocalStore(tmp_, sub, 0); HInstruction *mul = InsertInstruction( - new (&allocator_) HMul(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant100_), 0); + new (&allocator_) HMul(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant100_), 0); InsertLocalStore(tmp_, mul, 0); HInstruction *shl = InsertInstruction( - new (&allocator_) HShl(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant1_), 0); + new (&allocator_) HShl(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant1_), 0); InsertLocalStore(tmp_, shl, 0); HInstruction *neg = InsertInstruction( - new (&allocator_) HNeg(Primitive::kPrimInt, InsertLocalLoad(induc_, 0)), 0); + new (&allocator_) HNeg(Primitive::kPrimInt, InsertLocalLoad(induc_, 0)), 0); InsertLocalStore(tmp_, neg, 0); PerformInductionVarAnalysis(); @@ -520,6 +522,36 @@ TEST_F(InductionVarAnalysisTest, FindDerivedPeriodicInduction) { EXPECT_STREQ("periodic(( - (1)), (0))", GetInductionInfo(neg, 0).c_str()); } +TEST_F(InductionVarAnalysisTest, FindRange) { + // Setup: + // for (int i = 0; i < 100; i++) { + // k = i << 1; + // k = k + 1; + // a[k] = 0; + // } + BuildLoopNest(1); + HInstruction *shl = InsertInstruction( + new (&allocator_) HShl(Primitive::kPrimInt, InsertLocalLoad(basic_[0], 0), constant1_), 0); + InsertLocalStore(induc_, shl, 0); + HInstruction *add = InsertInstruction( + new (&allocator_) HAdd(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant1_), 0); + InsertLocalStore(induc_, add, 0); + HInstruction* store = InsertArrayStore(induc_, 0); + PerformInductionVarAnalysis(); + + EXPECT_STREQ("((2) * i + (1))", GetInductionInfo(store->InputAt(1), 0).c_str()); + + InductionVarRange range(iva_); + InductionVarRange::Value v_min = range.GetMinInduction(store, store->InputAt(1)); + InductionVarRange::Value v_max = range.GetMaxInduction(store, store->InputAt(1)); + ASSERT_TRUE(v_min.is_known); + EXPECT_EQ(0, v_min.a_constant); + EXPECT_EQ(1, v_min.b_constant); + ASSERT_TRUE(v_max.is_known); + EXPECT_EQ(0, v_max.a_constant); + EXPECT_EQ(199, v_max.b_constant); +} + TEST_F(InductionVarAnalysisTest, FindDeepLoopInduction) { // Setup: // k = 0; @@ -550,7 +582,8 @@ TEST_F(InductionVarAnalysisTest, FindDeepLoopInduction) { } EXPECT_STREQ("((1) * i + (1))", GetInductionInfo(increment_[d], d).c_str()); // Trip-count. - EXPECT_STREQ("(100)", GetInductionInfo(loop_header_[d]->GetLastInstruction(), d).c_str()); + EXPECT_STREQ("(TC-loop:(100))", + GetInductionInfo(loop_header_[d]->GetLastInstruction(), d).c_str()); } } diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc index 311042756f..db12819060 100644 --- a/compiler/optimizing/induction_var_range.cc +++ b/compiler/optimizing/induction_var_range.cc @@ -14,15 +14,15 @@ * limitations under the License. */ -#include <limits.h> - #include "induction_var_range.h" +#include <limits> + namespace art { /** Returns true if 64-bit constant fits in 32-bit constant. */ static bool CanLongValueFitIntoInt(int64_t c) { - return INT_MIN <= c && c <= INT_MAX; + return std::numeric_limits<int32_t>::min() <= c && c <= std::numeric_limits<int32_t>::max(); } /** Returns true if 32-bit addition can be done safely. */ @@ -86,49 +86,36 @@ InductionVarRange::InductionVarRange(HInductionVarAnalysis* induction_analysis) InductionVarRange::Value InductionVarRange::GetMinInduction(HInstruction* context, HInstruction* instruction) { - HLoopInformation* loop = context->GetBlock()->GetLoopInformation(); - if (loop != nullptr) { - return GetMin(induction_analysis_->LookupInfo(loop, instruction), GetTripCount(loop, context)); - } - return Value(); + return GetInduction(context, instruction, /* is_min */ true); } InductionVarRange::Value InductionVarRange::GetMaxInduction(HInstruction* context, HInstruction* instruction) { - HLoopInformation* loop = context->GetBlock()->GetLoopInformation(); - if (loop != nullptr) { - return SimplifyMax( - GetMax(induction_analysis_->LookupInfo(loop, instruction), GetTripCount(loop, context))); - } - return Value(); + return SimplifyMax(GetInduction(context, instruction, /* is_min */ false)); } // // Private class methods. // -HInductionVarAnalysis::InductionInfo* InductionVarRange::GetTripCount(HLoopInformation* loop, - HInstruction* context) { - // The trip-count expression is only valid when the top-test is taken at least once, - // that means, when the analyzed context appears outside the loop header itself. - // Early-exit loops are okay, since in those cases, the trip-count is conservative. - // - // TODO: deal with runtime safety issues on TCs - // - if (context->GetBlock() != loop->GetHeader()) { - HInductionVarAnalysis::InductionInfo* trip = - induction_analysis_->LookupInfo(loop, loop->GetHeader()->GetLastInstruction()); - if (trip != nullptr) { - // Wrap the trip-count representation in its own unusual NOP node, so that range analysis - // is able to determine the [0, TC - 1] interval without having to construct constants. - return induction_analysis_->CreateInvariantOp(HInductionVarAnalysis::kNop, trip, trip); - } +InductionVarRange::Value InductionVarRange::GetInduction(HInstruction* context, + HInstruction* instruction, + bool is_min) { + HLoopInformation* loop = context->GetBlock()->GetLoopInformation(); // closest enveloping loop + if (loop != nullptr) { + HBasicBlock* header = loop->GetHeader(); + bool in_body = context->GetBlock() != header; + return GetVal(induction_analysis_->LookupInfo(loop, instruction), + induction_analysis_->LookupInfo(loop, header->GetLastInstruction()), + in_body, + is_min); } - return nullptr; + return Value(); } InductionVarRange::Value InductionVarRange::GetFetch(HInstruction* instruction, HInductionVarAnalysis::InductionInfo* trip, + bool in_body, bool is_min) { // Detect constants and chase the fetch a bit deeper into the HIR tree, so that it becomes // more likely range analysis will compare the same instructions as terminal nodes. @@ -137,86 +124,68 @@ InductionVarRange::Value InductionVarRange::GetFetch(HInstruction* instruction, return Value(value); } else if (instruction->IsAdd()) { if (IsIntAndGet(instruction->InputAt(0), &value)) { - return AddValue(Value(value), GetFetch(instruction->InputAt(1), trip, is_min)); + return AddValue(Value(value), GetFetch(instruction->InputAt(1), trip, in_body, is_min)); } else if (IsIntAndGet(instruction->InputAt(1), &value)) { - return AddValue(GetFetch(instruction->InputAt(0), trip, is_min), Value(value)); + return AddValue(GetFetch(instruction->InputAt(0), trip, in_body, is_min), Value(value)); } } else if (is_min) { - // Special case for finding minimum: minimum of trip-count is 1. - if (trip != nullptr && instruction == trip->op_b->fetch) { + // Special case for finding minimum: minimum of trip-count in loop-body is 1. + if (trip != nullptr && in_body && instruction == trip->op_b->fetch) { return Value(1); } } return Value(instruction, 1, 0); } -InductionVarRange::Value InductionVarRange::GetMin(HInductionVarAnalysis::InductionInfo* info, - HInductionVarAnalysis::InductionInfo* trip) { - if (info != nullptr) { - switch (info->induction_class) { - case HInductionVarAnalysis::kInvariant: - // Invariants. - switch (info->operation) { - case HInductionVarAnalysis::kNop: // normalized: 0 - DCHECK_EQ(info->op_a, info->op_b); - return Value(0); - case HInductionVarAnalysis::kAdd: - return AddValue(GetMin(info->op_a, trip), GetMin(info->op_b, trip)); - case HInductionVarAnalysis::kSub: // second max! - return SubValue(GetMin(info->op_a, trip), GetMax(info->op_b, trip)); - case HInductionVarAnalysis::kNeg: // second max! - return SubValue(Value(0), GetMax(info->op_b, trip)); - case HInductionVarAnalysis::kMul: - return GetMul(info->op_a, info->op_b, trip, true); - case HInductionVarAnalysis::kDiv: - return GetDiv(info->op_a, info->op_b, trip, true); - case HInductionVarAnalysis::kFetch: - return GetFetch(info->fetch, trip, true); - } - break; - case HInductionVarAnalysis::kLinear: - // Minimum over linear induction a * i + b, for normalized 0 <= i < TC. - return AddValue(GetMul(info->op_a, trip, trip, true), GetMin(info->op_b, trip)); - case HInductionVarAnalysis::kWrapAround: - case HInductionVarAnalysis::kPeriodic: - // Minimum over all values in the wrap-around/periodic. - return MinValue(GetMin(info->op_a, trip), GetMin(info->op_b, trip)); - } - } - return Value(); -} - -InductionVarRange::Value InductionVarRange::GetMax(HInductionVarAnalysis::InductionInfo* info, - HInductionVarAnalysis::InductionInfo* trip) { +InductionVarRange::Value InductionVarRange::GetVal(HInductionVarAnalysis::InductionInfo* info, + HInductionVarAnalysis::InductionInfo* trip, + bool in_body, + bool is_min) { if (info != nullptr) { switch (info->induction_class) { case HInductionVarAnalysis::kInvariant: // Invariants. switch (info->operation) { - case HInductionVarAnalysis::kNop: // normalized: TC - 1 - DCHECK_EQ(info->op_a, info->op_b); - return SubValue(GetMax(info->op_b, trip), Value(1)); case HInductionVarAnalysis::kAdd: - return AddValue(GetMax(info->op_a, trip), GetMax(info->op_b, trip)); - case HInductionVarAnalysis::kSub: // second min! - return SubValue(GetMax(info->op_a, trip), GetMin(info->op_b, trip)); - case HInductionVarAnalysis::kNeg: // second min! - return SubValue(Value(0), GetMin(info->op_b, trip)); + return AddValue(GetVal(info->op_a, trip, in_body, is_min), + GetVal(info->op_b, trip, in_body, is_min)); + case HInductionVarAnalysis::kSub: // second reversed! + return SubValue(GetVal(info->op_a, trip, in_body, is_min), + GetVal(info->op_b, trip, in_body, !is_min)); + case HInductionVarAnalysis::kNeg: // second reversed! + return SubValue(Value(0), + GetVal(info->op_b, trip, in_body, !is_min)); case HInductionVarAnalysis::kMul: - return GetMul(info->op_a, info->op_b, trip, false); + return GetMul(info->op_a, info->op_b, trip, in_body, is_min); case HInductionVarAnalysis::kDiv: - return GetDiv(info->op_a, info->op_b, trip, false); + return GetDiv(info->op_a, info->op_b, trip, in_body, is_min); case HInductionVarAnalysis::kFetch: - return GetFetch(info->fetch, trip, false); + return GetFetch(info->fetch, trip, in_body, is_min); + case HInductionVarAnalysis::kTripCountInLoop: + if (!in_body) { + return is_min ? Value(0) + : GetVal(info->op_b, trip, in_body, is_min); // one extra! + } + FALLTHROUGH_INTENDED; + case HInductionVarAnalysis::kTripCountInBody: + if (in_body) { + return is_min ? Value(0) + : SubValue(GetVal(info->op_b, trip, in_body, is_min), Value(1)); + } + break; + default: + break; } break; case HInductionVarAnalysis::kLinear: - // Maximum over linear induction a * i + b, for normalized 0 <= i < TC. - return AddValue(GetMul(info->op_a, trip, trip, false), GetMax(info->op_b, trip)); + // Linear induction a * i + b, for normalized 0 <= i < TC. + return AddValue(GetMul(info->op_a, trip, trip, in_body, is_min), + GetVal(info->op_b, trip, in_body, is_min)); case HInductionVarAnalysis::kWrapAround: case HInductionVarAnalysis::kPeriodic: - // Maximum over all values in the wrap-around/periodic. - return MaxValue(GetMax(info->op_a, trip), GetMax(info->op_b, trip)); + // Merge values in the wrap-around/periodic. + return MergeVal(GetVal(info->op_a, trip, in_body, is_min), + GetVal(info->op_b, trip, in_body, is_min), is_min); } } return Value(); @@ -225,11 +194,12 @@ InductionVarRange::Value InductionVarRange::GetMax(HInductionVarAnalysis::Induct InductionVarRange::Value InductionVarRange::GetMul(HInductionVarAnalysis::InductionInfo* info1, HInductionVarAnalysis::InductionInfo* info2, HInductionVarAnalysis::InductionInfo* trip, + bool in_body, bool is_min) { - Value v1_min = GetMin(info1, trip); - Value v1_max = GetMax(info1, trip); - Value v2_min = GetMin(info2, trip); - Value v2_max = GetMax(info2, trip); + Value v1_min = GetVal(info1, trip, in_body, /* is_min */ true); + Value v1_max = GetVal(info1, trip, in_body, /* is_min */ false); + Value v2_min = GetVal(info2, trip, in_body, /* is_min */ true); + Value v2_max = GetVal(info2, trip, in_body, /* is_min */ false); if (v1_min.is_known && v1_min.a_constant == 0 && v1_min.b_constant >= 0) { // Positive range vs. positive or negative range. if (v2_min.is_known && v2_min.a_constant == 0 && v2_min.b_constant >= 0) { @@ -255,11 +225,12 @@ InductionVarRange::Value InductionVarRange::GetMul(HInductionVarAnalysis::Induct InductionVarRange::Value InductionVarRange::GetDiv(HInductionVarAnalysis::InductionInfo* info1, HInductionVarAnalysis::InductionInfo* info2, HInductionVarAnalysis::InductionInfo* trip, + bool in_body, bool is_min) { - Value v1_min = GetMin(info1, trip); - Value v1_max = GetMax(info1, trip); - Value v2_min = GetMin(info2, trip); - Value v2_max = GetMax(info2, trip); + Value v1_min = GetVal(info1, trip, in_body, /* is_min */ true); + Value v1_max = GetVal(info1, trip, in_body, /* is_min */ false); + Value v2_min = GetVal(info2, trip, in_body, /* is_min */ true); + Value v2_max = GetVal(info2, trip, in_body, /* is_min */ false); if (v1_min.is_known && v1_min.a_constant == 0 && v1_min.b_constant >= 0) { // Positive range vs. positive or negative range. if (v2_min.is_known && v2_min.a_constant == 0 && v2_min.b_constant >= 0) { @@ -282,6 +253,16 @@ InductionVarRange::Value InductionVarRange::GetDiv(HInductionVarAnalysis::Induct return Value(); } +bool InductionVarRange::GetConstant(HInductionVarAnalysis::InductionInfo* info, int32_t *value) { + Value v_min = GetVal(info, nullptr, false, /* is_min */ true); + Value v_max = GetVal(info, nullptr, false, /* is_min */ false); + if (v_min.a_constant == 0 && v_max.a_constant == 0 && v_min.b_constant == v_max.b_constant) { + *value = v_min.b_constant; + return true; + } + return false; +} + InductionVarRange::Value InductionVarRange::AddValue(Value v1, Value v2) { if (v1.is_known && v2.is_known && IsSafeAdd(v1.b_constant, v2.b_constant)) { const int32_t b = v1.b_constant + v2.b_constant; @@ -334,19 +315,12 @@ InductionVarRange::Value InductionVarRange::DivValue(Value v1, Value v2) { return Value(); } -InductionVarRange::Value InductionVarRange::MinValue(Value v1, Value v2) { - if (v1.is_known && v2.is_known) { - if (v1.instruction == v2.instruction && v1.a_constant == v2.a_constant) { - return Value(v1.instruction, v1.a_constant, std::min(v1.b_constant, v2.b_constant)); - } - } - return Value(); -} - -InductionVarRange::Value InductionVarRange::MaxValue(Value v1, Value v2) { +InductionVarRange::Value InductionVarRange::MergeVal(Value v1, Value v2, bool is_min) { if (v1.is_known && v2.is_known) { if (v1.instruction == v2.instruction && v1.a_constant == v2.a_constant) { - return Value(v1.instruction, v1.a_constant, std::max(v1.b_constant, v2.b_constant)); + return Value(v1.instruction, v1.a_constant, + is_min ? std::min(v1.b_constant, v2.b_constant) + : std::max(v1.b_constant, v2.b_constant)); } } return Value(); diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h index 96cbd46279..dbdd2eedac 100644 --- a/compiler/optimizing/induction_var_range.h +++ b/compiler/optimizing/induction_var_range.h @@ -73,31 +73,34 @@ class InductionVarRange { // Private helper methods. // - HInductionVarAnalysis::InductionInfo* GetTripCount(HLoopInformation* loop, HInstruction* context); + Value GetInduction(HInstruction* context, HInstruction* instruction, bool is_min); static Value GetFetch(HInstruction* instruction, HInductionVarAnalysis::InductionInfo* trip, + bool in_body, bool is_min); - - static Value GetMin(HInductionVarAnalysis::InductionInfo* info, - HInductionVarAnalysis::InductionInfo* trip); - static Value GetMax(HInductionVarAnalysis::InductionInfo* info, - HInductionVarAnalysis::InductionInfo* trip); + static Value GetVal(HInductionVarAnalysis::InductionInfo* info, + HInductionVarAnalysis::InductionInfo* trip, + bool in_body, + bool is_min); static Value GetMul(HInductionVarAnalysis::InductionInfo* info1, HInductionVarAnalysis::InductionInfo* info2, HInductionVarAnalysis::InductionInfo* trip, + bool in_body, bool is_min); static Value GetDiv(HInductionVarAnalysis::InductionInfo* info1, HInductionVarAnalysis::InductionInfo* info2, HInductionVarAnalysis::InductionInfo* trip, + bool in_body, bool is_min); + static bool GetConstant(HInductionVarAnalysis::InductionInfo* info, int32_t *value); + static Value AddValue(Value v1, Value v2); static Value SubValue(Value v1, Value v2); static Value MulValue(Value v1, Value v2); static Value DivValue(Value v1, Value v2); - static Value MinValue(Value v1, Value v2); - static Value MaxValue(Value v1, Value v2); + static Value MergeVal(Value v1, Value v2, bool is_min); /** Results of prior induction variable analysis. */ HInductionVarAnalysis *induction_analysis_; diff --git a/compiler/optimizing/induction_var_range_test.cc b/compiler/optimizing/induction_var_range_test.cc index c8abe36119..4497a884d9 100644 --- a/compiler/optimizing/induction_var_range_test.cc +++ b/compiler/optimizing/induction_var_range_test.cc @@ -14,8 +14,6 @@ * limitations under the License. */ -#include <limits.h> - #include "base/arena_allocator.h" #include "builder.h" #include "gtest/gtest.h" @@ -87,8 +85,7 @@ class InductionVarRangeTest : public testing::Test { /** Constructs a trip-count. */ HInductionVarAnalysis::InductionInfo* CreateTripCount(int32_t tc) { - HInductionVarAnalysis::InductionInfo* trip = CreateConst(tc); - return CreateInvariant('@', trip, trip); + return iva_->CreateTripCount(HInductionVarAnalysis::kTripCountInLoop, CreateConst(tc)); } /** Constructs a linear a * i + b induction. */ @@ -114,32 +111,36 @@ class InductionVarRangeTest : public testing::Test { Value GetMin(HInductionVarAnalysis::InductionInfo* info, HInductionVarAnalysis::InductionInfo* induc) { - return InductionVarRange::GetMin(info, induc); + return InductionVarRange::GetVal(info, induc, /* in_body */ true, /* is_min */ true); } Value GetMax(HInductionVarAnalysis::InductionInfo* info, HInductionVarAnalysis::InductionInfo* induc) { - return InductionVarRange::GetMax(info, induc); + return InductionVarRange::GetVal(info, induc, /* in_body */ true, /* is_min */ false); } Value GetMul(HInductionVarAnalysis::InductionInfo* info1, HInductionVarAnalysis::InductionInfo* info2, bool is_min) { - return InductionVarRange::GetMul(info1, info2, nullptr, is_min); + return InductionVarRange::GetMul(info1, info2, nullptr, /* in_body */ true, is_min); } Value GetDiv(HInductionVarAnalysis::InductionInfo* info1, HInductionVarAnalysis::InductionInfo* info2, bool is_min) { - return InductionVarRange::GetDiv(info1, info2, nullptr, is_min); + return InductionVarRange::GetDiv(info1, info2, nullptr, /* in_body */ true, is_min); + } + + bool GetConstant(HInductionVarAnalysis::InductionInfo* info, int32_t* value) { + return InductionVarRange::GetConstant(info, value); } Value AddValue(Value v1, Value v2) { return InductionVarRange::AddValue(v1, v2); } Value SubValue(Value v1, Value v2) { return InductionVarRange::SubValue(v1, v2); } Value MulValue(Value v1, Value v2) { return InductionVarRange::MulValue(v1, v2); } Value DivValue(Value v1, Value v2) { return InductionVarRange::DivValue(v1, v2); } - Value MinValue(Value v1, Value v2) { return InductionVarRange::MinValue(v1, v2); } - Value MaxValue(Value v1, Value v2) { return InductionVarRange::MaxValue(v1, v2); } + Value MinValue(Value v1, Value v2) { return InductionVarRange::MergeVal(v1, v2, true); } + Value MaxValue(Value v1, Value v2) { return InductionVarRange::MergeVal(v1, v2, false); } // General building fields. ArenaPool pool_; @@ -281,6 +282,13 @@ TEST_F(InductionVarRangeTest, GetDivMax) { ExpectEqual(Value(500), GetDiv(CreateRange(-1000, -40), CreateRange(-4, -2), false)); } +TEST_F(InductionVarRangeTest, GetConstant) { + int32_t value; + ASSERT_TRUE(GetConstant(CreateConst(12345), &value)); + EXPECT_EQ(12345, value); + EXPECT_FALSE(GetConstant(CreateRange(1, 2), &value)); +} + TEST_F(InductionVarRangeTest, AddValue) { ExpectEqual(Value(110), AddValue(Value(10), Value(100))); ExpectEqual(Value(-5), AddValue(Value(&x_, 1, -4), Value(&x_, -1, -1))); @@ -288,8 +296,9 @@ TEST_F(InductionVarRangeTest, AddValue) { ExpectEqual(Value(), AddValue(Value(&x_, 1, 5), Value(&y_, 1, -7))); ExpectEqual(Value(&x_, 1, 23), AddValue(Value(&x_, 1, 20), Value(3))); ExpectEqual(Value(&y_, 1, 5), AddValue(Value(55), Value(&y_, 1, -50))); - ExpectEqual(Value(INT_MAX), AddValue(Value(INT_MAX - 5), Value(5))); - ExpectEqual(Value(), AddValue(Value(INT_MAX - 5), Value(6))); // unsafe + const int32_t max_value = std::numeric_limits<int32_t>::max(); + ExpectEqual(Value(max_value), AddValue(Value(max_value - 5), Value(5))); + ExpectEqual(Value(), AddValue(Value(max_value - 5), Value(6))); // unsafe } TEST_F(InductionVarRangeTest, SubValue) { @@ -299,8 +308,9 @@ TEST_F(InductionVarRangeTest, SubValue) { ExpectEqual(Value(), SubValue(Value(&x_, 1, 5), Value(&y_, 1, -7))); ExpectEqual(Value(&x_, 1, 17), SubValue(Value(&x_, 1, 20), Value(3))); ExpectEqual(Value(&y_, -4, 105), SubValue(Value(55), Value(&y_, 4, -50))); - ExpectEqual(Value(INT_MIN), SubValue(Value(INT_MIN + 5), Value(5))); - ExpectEqual(Value(), SubValue(Value(INT_MIN + 5), Value(6))); // unsafe + const int32_t min_value = std::numeric_limits<int32_t>::min(); + ExpectEqual(Value(min_value), SubValue(Value(min_value + 5), Value(5))); + ExpectEqual(Value(), SubValue(Value(min_value + 5), Value(6))); // unsafe } TEST_F(InductionVarRangeTest, MulValue) { diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 039029aa52..0b65c564f7 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -247,12 +247,14 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) { return false; } - uint16_t class_def_idx = resolved_method->GetDeclaringClass()->GetDexClassDefIndex(); - if (!compiler_driver_->IsMethodVerifiedWithoutFailures( - resolved_method->GetDexMethodIndex(), class_def_idx, *resolved_method->GetDexFile())) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) - << " couldn't be verified, so it cannot be inlined"; - return false; + if (!resolved_method->GetDeclaringClass()->IsVerified()) { + uint16_t class_def_idx = resolved_method->GetDeclaringClass()->GetDexClassDefIndex(); + if (!compiler_driver_->IsMethodVerifiedWithoutFailures( + resolved_method->GetDexMethodIndex(), class_def_idx, *resolved_method->GetDexFile())) { + VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + << " couldn't be verified, so it cannot be inlined"; + return false; + } } if (invoke_instruction->IsInvokeStaticOrDirect() && diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index 0ac26de674..86a3ad98b4 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -71,7 +71,8 @@ class InstructionSimplifierVisitor : public HGraphVisitor { void VisitXor(HXor* instruction) OVERRIDE; void VisitInstanceOf(HInstanceOf* instruction) OVERRIDE; void VisitFakeString(HFakeString* fake_string) OVERRIDE; - bool IsDominatedByInputNullCheck(HInstruction* instr); + + bool CanEnsureNotNullAt(HInstruction* instr, HInstruction* at) const; OptimizingCompilerStats* stats_; bool simplification_occurred_ = false; @@ -187,14 +188,18 @@ void InstructionSimplifierVisitor::VisitNullCheck(HNullCheck* null_check) { } } -bool InstructionSimplifierVisitor::IsDominatedByInputNullCheck(HInstruction* instr) { - HInstruction* input = instr->InputAt(0); +bool InstructionSimplifierVisitor::CanEnsureNotNullAt(HInstruction* input, HInstruction* at) const { + if (!input->CanBeNull()) { + return true; + } + for (HUseIterator<HInstruction*> it(input->GetUses()); !it.Done(); it.Advance()) { HInstruction* use = it.Current()->GetUser(); - if (use->IsNullCheck() && use->StrictlyDominates(instr)) { + if (use->IsNullCheck() && use->StrictlyDominates(at)) { return true; } } + return false; } @@ -211,7 +216,11 @@ static bool TypeCheckHasKnownOutcome(HLoadClass* klass, HInstruction* object, bo } ReferenceTypeInfo class_rti = klass->GetLoadedClassRTI(); - DCHECK(class_rti.IsValid() && class_rti.IsExact()); + if (!class_rti.IsValid()) { + // Happens when the loaded class is unresolved. + return false; + } + DCHECK(class_rti.IsExact()); if (class_rti.IsSupertypeOf(obj_rti)) { *outcome = true; return true; @@ -231,7 +240,7 @@ static bool TypeCheckHasKnownOutcome(HLoadClass* klass, HInstruction* object, bo void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) { HInstruction* object = check_cast->InputAt(0); - if (!object->CanBeNull() || IsDominatedByInputNullCheck(check_cast)) { + if (CanEnsureNotNullAt(object, check_cast)) { check_cast->ClearMustDoNullCheck(); } @@ -267,7 +276,7 @@ void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) { void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) { HInstruction* object = instruction->InputAt(0); bool can_be_null = true; - if (!object->CanBeNull() || IsDominatedByInputNullCheck(instruction)) { + if (CanEnsureNotNullAt(object, instruction)) { can_be_null = false; instruction->ClearMustDoNullCheck(); } @@ -305,14 +314,14 @@ void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) { void InstructionSimplifierVisitor::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { if ((instruction->GetValue()->GetType() == Primitive::kPrimNot) - && !instruction->GetValue()->CanBeNull()) { + && CanEnsureNotNullAt(instruction->GetValue(), instruction)) { instruction->ClearValueCanBeNull(); } } void InstructionSimplifierVisitor::VisitStaticFieldSet(HStaticFieldSet* instruction) { if ((instruction->GetValue()->GetType() == Primitive::kPrimNot) - && !instruction->GetValue()->CanBeNull()) { + && CanEnsureNotNullAt(instruction->GetValue(), instruction)) { instruction->ClearValueCanBeNull(); } } @@ -426,19 +435,41 @@ void InstructionSimplifierVisitor::VisitArraySet(HArraySet* instruction) { HInstruction* value = instruction->GetValue(); if (value->GetType() != Primitive::kPrimNot) return; + if (CanEnsureNotNullAt(value, instruction)) { + instruction->ClearValueCanBeNull(); + } + if (value->IsArrayGet()) { if (value->AsArrayGet()->GetArray() == instruction->GetArray()) { // If the code is just swapping elements in the array, no need for a type check. instruction->ClearNeedsTypeCheck(); + return; } } if (value->IsNullConstant()) { instruction->ClearNeedsTypeCheck(); + return; } - if (!value->CanBeNull()) { - instruction->ClearValueCanBeNull(); + ScopedObjectAccess soa(Thread::Current()); + ReferenceTypeInfo array_rti = instruction->GetArray()->GetReferenceTypeInfo(); + ReferenceTypeInfo value_rti = value->GetReferenceTypeInfo(); + if (!array_rti.IsValid()) { + return; + } + + if (value_rti.IsValid() && array_rti.CanArrayHold(value_rti)) { + instruction->ClearNeedsTypeCheck(); + return; + } + + if (array_rti.IsObjectArray()) { + if (array_rti.IsExact()) { + instruction->ClearNeedsTypeCheck(); + return; + } + instruction->SetStaticTypeOfArrayIsObjectArray(); } } @@ -502,14 +533,45 @@ void InstructionSimplifierVisitor::VisitAnd(HAnd* instruction) { HConstant* input_cst = instruction->GetConstantRight(); HInstruction* input_other = instruction->GetLeastConstantLeft(); - if ((input_cst != nullptr) && AreAllBitsSet(input_cst)) { - // Replace code looking like - // AND dst, src, 0xFFF...FF - // with - // src - instruction->ReplaceWith(input_other); - instruction->GetBlock()->RemoveInstruction(instruction); - return; + if (input_cst != nullptr) { + int64_t value = Int64FromConstant(input_cst); + if (value == -1) { + // Replace code looking like + // AND dst, src, 0xFFF...FF + // with + // src + instruction->ReplaceWith(input_other); + instruction->GetBlock()->RemoveInstruction(instruction); + RecordSimplification(); + return; + } + // Eliminate And from UShr+And if the And-mask contains all the bits that + // can be non-zero after UShr. Transform Shr+And to UShr if the And-mask + // precisely clears the shifted-in sign bits. + if ((input_other->IsUShr() || input_other->IsShr()) && input_other->InputAt(1)->IsConstant()) { + size_t reg_bits = (instruction->GetResultType() == Primitive::kPrimLong) ? 64 : 32; + size_t shift = Int64FromConstant(input_other->InputAt(1)->AsConstant()) & (reg_bits - 1); + size_t num_tail_bits_set = CTZ(value + 1); + if ((num_tail_bits_set >= reg_bits - shift) && input_other->IsUShr()) { + // This AND clears only bits known to be clear, for example "(x >>> 24) & 0xff". + instruction->ReplaceWith(input_other); + instruction->GetBlock()->RemoveInstruction(instruction); + RecordSimplification(); + return; + } else if ((num_tail_bits_set == reg_bits - shift) && IsPowerOfTwo(value + 1) && + input_other->HasOnlyOneNonEnvironmentUse()) { + DCHECK(input_other->IsShr()); // For UShr, we would have taken the branch above. + // Replace SHR+AND with USHR, for example "(x >> 24) & 0xff" -> "x >>> 24". + HUShr* ushr = new (GetGraph()->GetArena()) HUShr(instruction->GetType(), + input_other->InputAt(0), + input_other->InputAt(1), + input_other->GetDexPc()); + instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, ushr); + input_other->GetBlock()->RemoveInstruction(input_other); + RecordSimplification(); + return; + } + } } // We assume that GVN has run before, so we only perform a pointer comparison. diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc index b71fdb8f1d..95646222ef 100644 --- a/compiler/optimizing/intrinsics.cc +++ b/compiler/optimizing/intrinsics.cc @@ -90,7 +90,7 @@ static Primitive::Type GetType(uint64_t data, bool is_op_size) { } static Intrinsics GetIntrinsic(InlineMethod method, InstructionSet instruction_set) { - if (instruction_set == kMips || instruction_set == kMips64) { + if (instruction_set == kMips) { return Intrinsics::kNone; } switch (method.opcode) { diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc new file mode 100644 index 0000000000..b60905d682 --- /dev/null +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -0,0 +1,821 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "intrinsics_mips64.h" + +#include "arch/mips64/instruction_set_features_mips64.h" +#include "art_method.h" +#include "code_generator_mips64.h" +#include "entrypoints/quick/quick_entrypoints.h" +#include "intrinsics.h" +#include "mirror/array-inl.h" +#include "mirror/string.h" +#include "thread.h" +#include "utils/mips64/assembler_mips64.h" +#include "utils/mips64/constants_mips64.h" + +namespace art { + +namespace mips64 { + +IntrinsicLocationsBuilderMIPS64::IntrinsicLocationsBuilderMIPS64(CodeGeneratorMIPS64* codegen) + : arena_(codegen->GetGraph()->GetArena()) { +} + +Mips64Assembler* IntrinsicCodeGeneratorMIPS64::GetAssembler() { + return reinterpret_cast<Mips64Assembler*>(codegen_->GetAssembler()); +} + +ArenaAllocator* IntrinsicCodeGeneratorMIPS64::GetAllocator() { + return codegen_->GetGraph()->GetArena(); +} + +bool IntrinsicLocationsBuilderMIPS64::TryDispatch(HInvoke* invoke) { + Dispatch(invoke); + LocationSummary* res = invoke->GetLocations(); + return res != nullptr && res->Intrinsified(); +} + +#define __ assembler-> + +static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); +} + +static void MoveFPToInt(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) { + FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>(); + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + + if (is64bit) { + __ Dmfc1(out, in); + } else { + __ Mfc1(out, in); + } +} + +// long java.lang.Double.doubleToRawLongBits(double) +void IntrinsicLocationsBuilderMIPS64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { + CreateFPToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { + MoveFPToInt(invoke->GetLocations(), true, GetAssembler()); +} + +// int java.lang.Float.floatToRawIntBits(float) +void IntrinsicLocationsBuilderMIPS64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { + CreateFPToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { + MoveFPToInt(invoke->GetLocations(), false, GetAssembler()); +} + +static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresFpuRegister()); +} + +static void MoveIntToFP(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) { + GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); + FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); + + if (is64bit) { + __ Dmtc1(in, out); + } else { + __ Mtc1(in, out); + } +} + +// double java.lang.Double.longBitsToDouble(long) +void IntrinsicLocationsBuilderMIPS64::VisitDoubleLongBitsToDouble(HInvoke* invoke) { + CreateIntToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitDoubleLongBitsToDouble(HInvoke* invoke) { + MoveIntToFP(invoke->GetLocations(), true, GetAssembler()); +} + +// float java.lang.Float.intBitsToFloat(int) +void IntrinsicLocationsBuilderMIPS64::VisitFloatIntBitsToFloat(HInvoke* invoke) { + CreateIntToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitFloatIntBitsToFloat(HInvoke* invoke) { + MoveIntToFP(invoke->GetLocations(), false, GetAssembler()); +} + +static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +static void GenReverseBytes(LocationSummary* locations, + Primitive::Type type, + Mips64Assembler* assembler) { + GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + + switch (type) { + case Primitive::kPrimShort: + __ Dsbh(out, in); + __ Seh(out, out); + break; + case Primitive::kPrimInt: + __ Rotr(out, in, 16); + __ Wsbh(out, out); + break; + case Primitive::kPrimLong: + __ Dsbh(out, in); + __ Dshd(out, out); + break; + default: + LOG(FATAL) << "Unexpected size for reverse-bytes: " << type; + UNREACHABLE(); + } +} + +// int java.lang.Integer.reverseBytes(int) +void IntrinsicLocationsBuilderMIPS64::VisitIntegerReverseBytes(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitIntegerReverseBytes(HInvoke* invoke) { + GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); +} + +// long java.lang.Long.reverseBytes(long) +void IntrinsicLocationsBuilderMIPS64::VisitLongReverseBytes(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitLongReverseBytes(HInvoke* invoke) { + GenReverseBytes(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); +} + +// short java.lang.Short.reverseBytes(short) +void IntrinsicLocationsBuilderMIPS64::VisitShortReverseBytes(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitShortReverseBytes(HInvoke* invoke) { + GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler()); +} + +static void GenCountZeroes(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) { + GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + + if (is64bit) { + __ Dclz(out, in); + } else { + __ Clz(out, in); + } +} + +// int java.lang.Integer.numberOfLeadingZeros(int i) +void IntrinsicLocationsBuilderMIPS64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { + GenCountZeroes(invoke->GetLocations(), false, GetAssembler()); +} + +// int java.lang.Long.numberOfLeadingZeros(long i) +void IntrinsicLocationsBuilderMIPS64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { + GenCountZeroes(invoke->GetLocations(), true, GetAssembler()); +} + +static void GenReverse(LocationSummary* locations, + Primitive::Type type, + Mips64Assembler* assembler) { + DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong); + + GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + + if (type == Primitive::kPrimInt) { + __ Rotr(out, in, 16); + __ Wsbh(out, out); + __ Bitswap(out, out); + } else { + __ Dsbh(out, in); + __ Dshd(out, out); + __ Dbitswap(out, out); + } +} + +// int java.lang.Integer.reverse(int) +void IntrinsicLocationsBuilderMIPS64::VisitIntegerReverse(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitIntegerReverse(HInvoke* invoke) { + GenReverse(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); +} + +// long java.lang.Long.reverse(long) +void IntrinsicLocationsBuilderMIPS64::VisitLongReverse(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitLongReverse(HInvoke* invoke) { + GenReverse(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); +} + +static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); +} + +static void MathAbsFP(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) { + FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>(); + FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); + + if (is64bit) { + __ AbsD(out, in); + } else { + __ AbsS(out, in); + } +} + +// double java.lang.Math.abs(double) +void IntrinsicLocationsBuilderMIPS64::VisitMathAbsDouble(HInvoke* invoke) { + CreateFPToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMathAbsDouble(HInvoke* invoke) { + MathAbsFP(invoke->GetLocations(), true, GetAssembler()); +} + +// float java.lang.Math.abs(float) +void IntrinsicLocationsBuilderMIPS64::VisitMathAbsFloat(HInvoke* invoke) { + CreateFPToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMathAbsFloat(HInvoke* invoke) { + MathAbsFP(invoke->GetLocations(), false, GetAssembler()); +} + +static void CreateIntToInt(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +static void GenAbsInteger(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) { + GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + + if (is64bit) { + __ Dsra32(AT, in, 31); + __ Xor(out, in, AT); + __ Dsubu(out, out, AT); + } else { + __ Sra(AT, in, 31); + __ Xor(out, in, AT); + __ Subu(out, out, AT); + } +} + +// int java.lang.Math.abs(int) +void IntrinsicLocationsBuilderMIPS64::VisitMathAbsInt(HInvoke* invoke) { + CreateIntToInt(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMathAbsInt(HInvoke* invoke) { + GenAbsInteger(invoke->GetLocations(), false, GetAssembler()); +} + +// long java.lang.Math.abs(long) +void IntrinsicLocationsBuilderMIPS64::VisitMathAbsLong(HInvoke* invoke) { + CreateIntToInt(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMathAbsLong(HInvoke* invoke) { + GenAbsInteger(invoke->GetLocations(), true, GetAssembler()); +} + +static void GenMinMaxFP(LocationSummary* locations, + bool is_min, + bool is_double, + Mips64Assembler* assembler) { + FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>(); + FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>(); + FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); + + if (is_double) { + if (is_min) { + __ MinD(out, lhs, rhs); + } else { + __ MaxD(out, lhs, rhs); + } + } else { + if (is_min) { + __ MinS(out, lhs, rhs); + } else { + __ MaxS(out, lhs, rhs); + } + } +} + +static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); +} + +// double java.lang.Math.min(double, double) +void IntrinsicLocationsBuilderMIPS64::VisitMathMinDoubleDouble(HInvoke* invoke) { + CreateFPFPToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMathMinDoubleDouble(HInvoke* invoke) { + GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler()); +} + +// float java.lang.Math.min(float, float) +void IntrinsicLocationsBuilderMIPS64::VisitMathMinFloatFloat(HInvoke* invoke) { + CreateFPFPToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMathMinFloatFloat(HInvoke* invoke) { + GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler()); +} + +// double java.lang.Math.max(double, double) +void IntrinsicLocationsBuilderMIPS64::VisitMathMaxDoubleDouble(HInvoke* invoke) { + CreateFPFPToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMathMaxDoubleDouble(HInvoke* invoke) { + GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler()); +} + +// float java.lang.Math.max(float, float) +void IntrinsicLocationsBuilderMIPS64::VisitMathMaxFloatFloat(HInvoke* invoke) { + CreateFPFPToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMathMaxFloatFloat(HInvoke* invoke) { + GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler()); +} + +static void GenMinMax(LocationSummary* locations, + bool is_min, + Mips64Assembler* assembler) { + GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>(); + GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>(); + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + + // Some architectures, such as ARM and MIPS (prior to r6), have a + // conditional move instruction which only changes the target + // (output) register if the condition is true (MIPS prior to r6 had + // MOVF, MOVT, and MOVZ). The SELEQZ and SELNEZ instructions always + // change the target (output) register. If the condition is true the + // output register gets the contents of the "rs" register; otherwise, + // the output register is set to zero. One consequence of this is + // that to implement something like "rd = c==0 ? rs : rt" MIPS64r6 + // needs to use a pair of SELEQZ/SELNEZ instructions. After + // executing this pair of instructions one of the output registers + // from the pair will necessarily contain zero. Then the code ORs the + // output registers from the SELEQZ/SELNEZ instructions to get the + // final result. + // + // The initial test to see if the output register is same as the + // first input register is needed to make sure that value in the + // first input register isn't clobbered before we've finished + // computing the output value. The logic in the corresponding else + // clause performs the same task but makes sure the second input + // register isn't clobbered in the event that it's the same register + // as the output register; the else clause also handles the case + // where the output register is distinct from both the first, and the + // second input registers. + if (out == lhs) { + __ Slt(AT, rhs, lhs); + if (is_min) { + __ Seleqz(out, lhs, AT); + __ Selnez(AT, rhs, AT); + } else { + __ Selnez(out, lhs, AT); + __ Seleqz(AT, rhs, AT); + } + } else { + __ Slt(AT, lhs, rhs); + if (is_min) { + __ Seleqz(out, rhs, AT); + __ Selnez(AT, lhs, AT); + } else { + __ Selnez(out, rhs, AT); + __ Seleqz(AT, lhs, AT); + } + } + __ Or(out, out, AT); +} + +static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +// int java.lang.Math.min(int, int) +void IntrinsicLocationsBuilderMIPS64::VisitMathMinIntInt(HInvoke* invoke) { + CreateIntIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMathMinIntInt(HInvoke* invoke) { + GenMinMax(invoke->GetLocations(), true, GetAssembler()); +} + +// long java.lang.Math.min(long, long) +void IntrinsicLocationsBuilderMIPS64::VisitMathMinLongLong(HInvoke* invoke) { + CreateIntIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMathMinLongLong(HInvoke* invoke) { + GenMinMax(invoke->GetLocations(), true, GetAssembler()); +} + +// int java.lang.Math.max(int, int) +void IntrinsicLocationsBuilderMIPS64::VisitMathMaxIntInt(HInvoke* invoke) { + CreateIntIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMathMaxIntInt(HInvoke* invoke) { + GenMinMax(invoke->GetLocations(), false, GetAssembler()); +} + +// long java.lang.Math.max(long, long) +void IntrinsicLocationsBuilderMIPS64::VisitMathMaxLongLong(HInvoke* invoke) { + CreateIntIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMathMaxLongLong(HInvoke* invoke) { + GenMinMax(invoke->GetLocations(), false, GetAssembler()); +} + +// double java.lang.Math.sqrt(double) +void IntrinsicLocationsBuilderMIPS64::VisitMathSqrt(HInvoke* invoke) { + CreateFPToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMathSqrt(HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + Mips64Assembler* assembler = GetAssembler(); + FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>(); + FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); + + __ SqrtD(out, in); +} + +static void CreateFPToFP(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); +} + +// double java.lang.Math.rint(double) +void IntrinsicLocationsBuilderMIPS64::VisitMathRint(HInvoke* invoke) { + CreateFPToFP(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMathRint(HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + Mips64Assembler* assembler = GetAssembler(); + FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>(); + FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); + + __ RintD(out, in); +} + +// double java.lang.Math.floor(double) +void IntrinsicLocationsBuilderMIPS64::VisitMathFloor(HInvoke* invoke) { + CreateFPToFP(arena_, invoke); +} + +const constexpr uint16_t kFPLeaveUnchanged = kPositiveZero | + kPositiveInfinity | + kNegativeZero | + kNegativeInfinity | + kQuietNaN | + kSignalingNaN; + +void IntrinsicCodeGeneratorMIPS64::VisitMathFloor(HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + Mips64Assembler* assembler = GetAssembler(); + FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>(); + FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); + + Label done; + + // double floor(double in) { + // if in.isNaN || in.isInfinite || in.isZero { + // return in; + // } + __ ClassD(out, in); + __ Dmfc1(AT, out); + __ Andi(AT, AT, kFPLeaveUnchanged); // +0.0 | +Inf | -0.0 | -Inf | qNaN | sNaN + __ MovD(out, in); + __ Bnezc(AT, &done); + + // Long outLong = floor(in); + // if outLong == Long.MAX_VALUE { + // // floor() has almost certainly returned a value which + // // can't be successfully represented as a signed 64-bit + // // number. Java expects that the input value will be + // // returned in these cases. + // // There is also a small probability that floor(in) + // // correctly truncates the input value to Long.MAX_VALUE. In + // // that case, this exception handling code still does the + // // correct thing. + // return in; + // } + __ FloorLD(out, in); + __ Dmfc1(AT, out); + __ MovD(out, in); + __ LoadConst64(TMP, kPrimLongMax); + __ Beqc(AT, TMP, &done); + + // double out = outLong; + // return out; + __ Dmtc1(AT, out); + __ Cvtdl(out, out); + __ Bind(&done); + // } +} + +// double java.lang.Math.ceil(double) +void IntrinsicLocationsBuilderMIPS64::VisitMathCeil(HInvoke* invoke) { + CreateFPToFP(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMathCeil(HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + Mips64Assembler* assembler = GetAssembler(); + FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>(); + FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); + + Label done; + + // double ceil(double in) { + // if in.isNaN || in.isInfinite || in.isZero { + // return in; + // } + __ ClassD(out, in); + __ Dmfc1(AT, out); + __ Andi(AT, AT, kFPLeaveUnchanged); // +0.0 | +Inf | -0.0 | -Inf | qNaN | sNaN + __ MovD(out, in); + __ Bnezc(AT, &done); + + // Long outLong = ceil(in); + // if outLong == Long.MAX_VALUE { + // // ceil() has almost certainly returned a value which + // // can't be successfully represented as a signed 64-bit + // // number. Java expects that the input value will be + // // returned in these cases. + // // There is also a small probability that ceil(in) + // // correctly rounds up the input value to Long.MAX_VALUE. In + // // that case, this exception handling code still does the + // // correct thing. + // return in; + // } + __ CeilLD(out, in); + __ Dmfc1(AT, out); + __ MovD(out, in); + __ LoadConst64(TMP, kPrimLongMax); + __ Beqc(AT, TMP, &done); + + // double out = outLong; + // return out; + __ Dmtc1(AT, out); + __ Cvtdl(out, out); + __ Bind(&done); + // } +} + +// byte libcore.io.Memory.peekByte(long address) +void IntrinsicLocationsBuilderMIPS64::VisitMemoryPeekByte(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMemoryPeekByte(HInvoke* invoke) { + Mips64Assembler* assembler = GetAssembler(); + GpuRegister adr = invoke->GetLocations()->InAt(0).AsRegister<GpuRegister>(); + GpuRegister out = invoke->GetLocations()->Out().AsRegister<GpuRegister>(); + + __ Lb(out, adr, 0); +} + +// short libcore.io.Memory.peekShort(long address) +void IntrinsicLocationsBuilderMIPS64::VisitMemoryPeekShortNative(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMemoryPeekShortNative(HInvoke* invoke) { + Mips64Assembler* assembler = GetAssembler(); + GpuRegister adr = invoke->GetLocations()->InAt(0).AsRegister<GpuRegister>(); + GpuRegister out = invoke->GetLocations()->Out().AsRegister<GpuRegister>(); + + __ Lh(out, adr, 0); +} + +// int libcore.io.Memory.peekInt(long address) +void IntrinsicLocationsBuilderMIPS64::VisitMemoryPeekIntNative(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMemoryPeekIntNative(HInvoke* invoke) { + Mips64Assembler* assembler = GetAssembler(); + GpuRegister adr = invoke->GetLocations()->InAt(0).AsRegister<GpuRegister>(); + GpuRegister out = invoke->GetLocations()->Out().AsRegister<GpuRegister>(); + + __ Lw(out, adr, 0); +} + +// long libcore.io.Memory.peekLong(long address) +void IntrinsicLocationsBuilderMIPS64::VisitMemoryPeekLongNative(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMemoryPeekLongNative(HInvoke* invoke) { + Mips64Assembler* assembler = GetAssembler(); + GpuRegister adr = invoke->GetLocations()->InAt(0).AsRegister<GpuRegister>(); + GpuRegister out = invoke->GetLocations()->Out().AsRegister<GpuRegister>(); + + __ Ld(out, adr, 0); +} + +static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); +} + +// void libcore.io.Memory.pokeByte(long address, byte value) +void IntrinsicLocationsBuilderMIPS64::VisitMemoryPokeByte(HInvoke* invoke) { + CreateIntIntToVoidLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMemoryPokeByte(HInvoke* invoke) { + Mips64Assembler* assembler = GetAssembler(); + GpuRegister adr = invoke->GetLocations()->InAt(0).AsRegister<GpuRegister>(); + GpuRegister val = invoke->GetLocations()->InAt(1).AsRegister<GpuRegister>(); + + __ Sb(val, adr, 0); +} + +// void libcore.io.Memory.pokeShort(long address, short value) +void IntrinsicLocationsBuilderMIPS64::VisitMemoryPokeShortNative(HInvoke* invoke) { + CreateIntIntToVoidLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMemoryPokeShortNative(HInvoke* invoke) { + Mips64Assembler* assembler = GetAssembler(); + GpuRegister adr = invoke->GetLocations()->InAt(0).AsRegister<GpuRegister>(); + GpuRegister val = invoke->GetLocations()->InAt(1).AsRegister<GpuRegister>(); + + __ Sh(val, adr, 0); +} + +// void libcore.io.Memory.pokeInt(long address, int value) +void IntrinsicLocationsBuilderMIPS64::VisitMemoryPokeIntNative(HInvoke* invoke) { + CreateIntIntToVoidLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMemoryPokeIntNative(HInvoke* invoke) { + Mips64Assembler* assembler = GetAssembler(); + GpuRegister adr = invoke->GetLocations()->InAt(0).AsRegister<GpuRegister>(); + GpuRegister val = invoke->GetLocations()->InAt(1).AsRegister<GpuRegister>(); + + __ Sw(val, adr, 00); +} + +// void libcore.io.Memory.pokeLong(long address, long value) +void IntrinsicLocationsBuilderMIPS64::VisitMemoryPokeLongNative(HInvoke* invoke) { + CreateIntIntToVoidLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMemoryPokeLongNative(HInvoke* invoke) { + Mips64Assembler* assembler = GetAssembler(); + GpuRegister adr = invoke->GetLocations()->InAt(0).AsRegister<GpuRegister>(); + GpuRegister val = invoke->GetLocations()->InAt(1).AsRegister<GpuRegister>(); + + __ Sd(val, adr, 0); +} + +// Thread java.lang.Thread.currentThread() +void IntrinsicLocationsBuilderMIPS64::VisitThreadCurrentThread(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetOut(Location::RequiresRegister()); +} + +void IntrinsicCodeGeneratorMIPS64::VisitThreadCurrentThread(HInvoke* invoke) { + Mips64Assembler* assembler = GetAssembler(); + GpuRegister out = invoke->GetLocations()->Out().AsRegister<GpuRegister>(); + + __ LoadFromOffset(kLoadUnsignedWord, + out, + TR, + Thread::PeerOffset<kMips64PointerSize>().Int32Value()); +} + +// Unimplemented intrinsics. + +#define UNIMPLEMENTED_INTRINSIC(Name) \ +void IntrinsicLocationsBuilderMIPS64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ +} \ +void IntrinsicCodeGeneratorMIPS64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ +} + +UNIMPLEMENTED_INTRINSIC(MathRoundDouble) +UNIMPLEMENTED_INTRINSIC(MathRoundFloat) + +UNIMPLEMENTED_INTRINSIC(UnsafeGet) +UNIMPLEMENTED_INTRINSIC(UnsafeGetVolatile) +UNIMPLEMENTED_INTRINSIC(UnsafeGetLong) +UNIMPLEMENTED_INTRINSIC(UnsafeGetLongVolatile) +UNIMPLEMENTED_INTRINSIC(UnsafeGetObject) +UNIMPLEMENTED_INTRINSIC(UnsafeGetObjectVolatile) +UNIMPLEMENTED_INTRINSIC(UnsafePut) +UNIMPLEMENTED_INTRINSIC(UnsafePutOrdered) +UNIMPLEMENTED_INTRINSIC(UnsafePutVolatile) +UNIMPLEMENTED_INTRINSIC(UnsafePutObject) +UNIMPLEMENTED_INTRINSIC(UnsafePutObjectOrdered) +UNIMPLEMENTED_INTRINSIC(UnsafePutObjectVolatile) +UNIMPLEMENTED_INTRINSIC(UnsafePutLong) +UNIMPLEMENTED_INTRINSIC(UnsafePutLongOrdered) +UNIMPLEMENTED_INTRINSIC(UnsafePutLongVolatile) +UNIMPLEMENTED_INTRINSIC(UnsafeCASInt) +UNIMPLEMENTED_INTRINSIC(UnsafeCASLong) +UNIMPLEMENTED_INTRINSIC(UnsafeCASObject) +UNIMPLEMENTED_INTRINSIC(StringCharAt) +UNIMPLEMENTED_INTRINSIC(StringCompareTo) +UNIMPLEMENTED_INTRINSIC(StringEquals) +UNIMPLEMENTED_INTRINSIC(StringIndexOf) +UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter) +UNIMPLEMENTED_INTRINSIC(StringNewStringFromBytes) +UNIMPLEMENTED_INTRINSIC(StringNewStringFromChars) +UNIMPLEMENTED_INTRINSIC(StringNewStringFromString) +UNIMPLEMENTED_INTRINSIC(LongRotateLeft) +UNIMPLEMENTED_INTRINSIC(LongRotateRight) +UNIMPLEMENTED_INTRINSIC(LongNumberOfTrailingZeros) +UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft) +UNIMPLEMENTED_INTRINSIC(IntegerRotateRight) +UNIMPLEMENTED_INTRINSIC(IntegerNumberOfTrailingZeros) + +UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) +UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck) +UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) + +#undef UNIMPLEMENTED_INTRINSIC + +#undef __ + +} // namespace mips64 +} // namespace art diff --git a/compiler/optimizing/intrinsics_mips64.h b/compiler/optimizing/intrinsics_mips64.h new file mode 100644 index 0000000000..1481d24c9e --- /dev/null +++ b/compiler/optimizing/intrinsics_mips64.h @@ -0,0 +1,83 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_MIPS64_H_ +#define ART_COMPILER_OPTIMIZING_INTRINSICS_MIPS64_H_ + +#include "intrinsics.h" + +namespace art { + +class ArenaAllocator; +class HInvokeStaticOrDirect; +class HInvokeVirtual; + +namespace mips64 { + +class CodeGeneratorMIPS64; +class Mips64Assembler; + +class IntrinsicLocationsBuilderMIPS64 FINAL : public IntrinsicVisitor { + public: + explicit IntrinsicLocationsBuilderMIPS64(CodeGeneratorMIPS64* codegen); + + // Define visitor methods. + +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache) \ + void Visit ## Name(HInvoke* invoke) OVERRIDE; +#include "intrinsics_list.h" +INTRINSICS_LIST(OPTIMIZING_INTRINSICS) +#undef INTRINSICS_LIST +#undef OPTIMIZING_INTRINSICS + + // Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether + // a corresponding LocationSummary with the intrinsified_ flag set was generated and attached to + // the invoke. + bool TryDispatch(HInvoke* invoke); + + private: + ArenaAllocator* arena_; + + DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderMIPS64); +}; + +class IntrinsicCodeGeneratorMIPS64 FINAL : public IntrinsicVisitor { + public: + explicit IntrinsicCodeGeneratorMIPS64(CodeGeneratorMIPS64* codegen) : codegen_(codegen) {} + + // Define visitor methods. + +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache) \ + void Visit ## Name(HInvoke* invoke) OVERRIDE; +#include "intrinsics_list.h" +INTRINSICS_LIST(OPTIMIZING_INTRINSICS) +#undef INTRINSICS_LIST +#undef OPTIMIZING_INTRINSICS + + private: + Mips64Assembler* GetAssembler(); + + ArenaAllocator* GetAllocator(); + + CodeGeneratorMIPS64* codegen_; + + DISALLOW_COPY_AND_ASSIGN(IntrinsicCodeGeneratorMIPS64); +}; + +} // namespace mips64 +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_INTRINSICS_MIPS64_H_ diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc index d14dfc190f..ebdf7a2f65 100644 --- a/compiler/optimizing/locations.cc +++ b/compiler/optimizing/locations.cc @@ -23,18 +23,15 @@ namespace art { LocationSummary::LocationSummary(HInstruction* instruction, CallKind call_kind, bool intrinsified) - : inputs_(instruction->GetBlock()->GetGraph()->GetArena(), instruction->InputCount()), - temps_(instruction->GetBlock()->GetGraph()->GetArena(), 0), + : inputs_(instruction->InputCount(), + instruction->GetBlock()->GetGraph()->GetArena()->Adapter(kArenaAllocLocationSummary)), + temps_(instruction->GetBlock()->GetGraph()->GetArena()->Adapter(kArenaAllocLocationSummary)), output_overlaps_(Location::kOutputOverlap), call_kind_(call_kind), stack_mask_(nullptr), register_mask_(0), live_registers_(), intrinsified_(intrinsified) { - inputs_.SetSize(instruction->InputCount()); - for (size_t i = 0; i < instruction->InputCount(); ++i) { - inputs_.Put(i, Location()); - } instruction->SetLocations(this); if (NeedsSafepoint()) { diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index 2162ab928b..de4fb7e201 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -17,11 +17,11 @@ #ifndef ART_COMPILER_OPTIMIZING_LOCATIONS_H_ #define ART_COMPILER_OPTIMIZING_LOCATIONS_H_ +#include "base/arena_containers.h" #include "base/arena_object.h" #include "base/bit_field.h" #include "base/bit_vector.h" #include "base/value_object.h" -#include "utils/growable_array.h" namespace art { @@ -468,7 +468,7 @@ static constexpr bool kIntrinsified = true; * The intent is to have the code for generating the instruction independent of * register allocation. A register allocator just has to provide a LocationSummary. */ -class LocationSummary : public ArenaObject<kArenaAllocMisc> { +class LocationSummary : public ArenaObject<kArenaAllocLocationSummary> { public: enum CallKind { kNoCall, @@ -481,15 +481,17 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> { bool intrinsified = false); void SetInAt(uint32_t at, Location location) { - inputs_.Put(at, location); + DCHECK_LT(at, GetInputCount()); + inputs_[at] = location; } Location InAt(uint32_t at) const { - return inputs_.Get(at); + DCHECK_LT(at, GetInputCount()); + return inputs_[at]; } size_t GetInputCount() const { - return inputs_.Size(); + return inputs_.size(); } void SetOut(Location location, Location::OutputOverlap overlaps = Location::kOutputOverlap) { @@ -508,23 +510,25 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> { } void AddTemp(Location location) { - temps_.Add(location); + temps_.push_back(location); } Location GetTemp(uint32_t at) const { - return temps_.Get(at); + DCHECK_LT(at, GetTempCount()); + return temps_[at]; } void SetTempAt(uint32_t at, Location location) { - DCHECK(temps_.Get(at).IsUnallocated() || temps_.Get(at).IsInvalid()); - temps_.Put(at, location); + DCHECK_LT(at, GetTempCount()); + DCHECK(temps_[at].IsUnallocated() || temps_[at].IsInvalid()); + temps_[at] = location; } size_t GetTempCount() const { - return temps_.Size(); + return temps_.size(); } - bool HasTemps() const { return !temps_.IsEmpty(); } + bool HasTemps() const { return !temps_.empty(); } Location Out() const { return output_; } @@ -576,7 +580,7 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> { } bool IsFixedInput(uint32_t input_index) const { - Location input = inputs_.Get(input_index); + Location input = inputs_[input_index]; return input.IsRegister() || input.IsFpuRegister() || input.IsPair() @@ -593,8 +597,8 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> { } private: - GrowableArray<Location> inputs_; - GrowableArray<Location> temps_; + ArenaVector<Location> inputs_; + ArenaVector<Location> temps_; // Whether the output overlaps with any of the inputs. If it overlaps, then it cannot // share the same register as the inputs. Location::OutputOverlap output_overlaps_; diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 012858920f..989970fb49 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -20,8 +20,8 @@ #include "ssa_builder.h" #include "base/bit_vector-inl.h" #include "base/bit_utils.h" +#include "base/stl_util.h" #include "mirror/class-inl.h" -#include "utils/growable_array.h" #include "scoped_thread_state_change.h" namespace art { @@ -32,8 +32,41 @@ void HGraph::AddBlock(HBasicBlock* block) { } void HGraph::FindBackEdges(ArenaBitVector* visited) { + // "visited" must be empty on entry, it's an output argument for all visited (i.e. live) blocks. + DCHECK_EQ(visited->GetHighestBitSet(), -1); + + // Nodes that we're currently visiting, indexed by block id. ArenaBitVector visiting(arena_, blocks_.size(), false); - VisitBlockForBackEdges(entry_block_, visited, &visiting); + // Number of successors visited from a given node, indexed by block id. + ArenaVector<size_t> successors_visited(blocks_.size(), 0u, arena_->Adapter()); + // Stack of nodes that we're currently visiting (same as marked in "visiting" above). + ArenaVector<HBasicBlock*> worklist(arena_->Adapter()); + constexpr size_t kDefaultWorklistSize = 8; + worklist.reserve(kDefaultWorklistSize); + visited->SetBit(entry_block_->GetBlockId()); + visiting.SetBit(entry_block_->GetBlockId()); + worklist.push_back(entry_block_); + + while (!worklist.empty()) { + HBasicBlock* current = worklist.back(); + uint32_t current_id = current->GetBlockId(); + if (successors_visited[current_id] == current->GetSuccessors().size()) { + visiting.ClearBit(current_id); + worklist.pop_back(); + } else { + DCHECK_LT(successors_visited[current_id], current->GetSuccessors().size()); + HBasicBlock* successor = current->GetSuccessors()[successors_visited[current_id]++]; + uint32_t successor_id = successor->GetBlockId(); + if (visiting.IsBitSet(successor_id)) { + DCHECK(ContainsElement(worklist, successor)); + successor->AddBackEdge(current); + } else if (!visited->IsBitSet(successor_id)) { + visited->SetBit(successor_id); + visiting.SetBit(successor_id); + worklist.push_back(successor); + } + } + } } static void RemoveAsUser(HInstruction* instruction) { @@ -79,24 +112,6 @@ void HGraph::RemoveDeadBlocks(const ArenaBitVector& visited) { } } -void HGraph::VisitBlockForBackEdges(HBasicBlock* block, - ArenaBitVector* visited, - ArenaBitVector* visiting) { - int id = block->GetBlockId(); - if (visited->IsBitSet(id)) return; - - visited->SetBit(id); - visiting->SetBit(id); - for (HBasicBlock* successor : block->GetSuccessors()) { - if (visiting->IsBitSet(successor->GetBlockId())) { - successor->AddBackEdge(block); - } else { - VisitBlockForBackEdges(successor, visited, visiting); - } - } - visiting->ClearBit(id); -} - void HGraph::BuildDominatorTree() { // (1) Simplify the CFG so that catch blocks have only exceptional incoming // edges. This invariant simplifies building SSA form because Phis cannot @@ -141,10 +156,43 @@ void HBasicBlock::ClearDominanceInformation() { void HGraph::ComputeDominanceInformation() { DCHECK(reverse_post_order_.empty()); reverse_post_order_.reserve(blocks_.size()); - ArenaVector<size_t> visits(blocks_.size(), 0u, arena_->Adapter()); reverse_post_order_.push_back(entry_block_); - for (HBasicBlock* successor : entry_block_->GetSuccessors()) { - VisitBlockForDominatorTree(successor, entry_block_, &visits); + + // Number of visits of a given node, indexed by block id. + ArenaVector<size_t> visits(blocks_.size(), 0u, arena_->Adapter()); + // Number of successors visited from a given node, indexed by block id. + ArenaVector<size_t> successors_visited(blocks_.size(), 0u, arena_->Adapter()); + // Nodes for which we need to visit successors. + ArenaVector<HBasicBlock*> worklist(arena_->Adapter()); + constexpr size_t kDefaultWorklistSize = 8; + worklist.reserve(kDefaultWorklistSize); + worklist.push_back(entry_block_); + + while (!worklist.empty()) { + HBasicBlock* current = worklist.back(); + uint32_t current_id = current->GetBlockId(); + if (successors_visited[current_id] == current->GetSuccessors().size()) { + worklist.pop_back(); + } else { + DCHECK_LT(successors_visited[current_id], current->GetSuccessors().size()); + HBasicBlock* successor = current->GetSuccessors()[successors_visited[current_id]++]; + + if (successor->GetDominator() == nullptr) { + successor->SetDominator(current); + } else { + successor->SetDominator(FindCommonDominator(successor->GetDominator(), current)); + } + + // Once all the forward edges have been visited, we know the immediate + // dominator of the block. We can then start visiting its successors. + DCHECK_LT(successor->GetBlockId(), visits.size()); + if (++visits[successor->GetBlockId()] == + successor->GetPredecessors().size() - successor->NumberOfBackEdges()) { + successor->GetDominator()->AddDominatedBlock(successor); + reverse_post_order_.push_back(successor); + worklist.push_back(successor); + } + } } } @@ -166,28 +214,6 @@ HBasicBlock* HGraph::FindCommonDominator(HBasicBlock* first, HBasicBlock* second return nullptr; } -void HGraph::VisitBlockForDominatorTree(HBasicBlock* block, - HBasicBlock* predecessor, - ArenaVector<size_t>* visits) { - if (block->GetDominator() == nullptr) { - block->SetDominator(predecessor); - } else { - block->SetDominator(FindCommonDominator(block->GetDominator(), predecessor)); - } - - // Once all the forward edges have been visited, we know the immediate - // dominator of the block. We can then start visiting its successors. - DCHECK_LT(block->GetBlockId(), visits->size()); - if (++(*visits)[block->GetBlockId()] == - block->GetPredecessors().size() - block->NumberOfBackEdges()) { - block->GetDominator()->AddDominatedBlock(block); - reverse_post_order_.push_back(block); - for (HBasicBlock* successor : block->GetSuccessors()) { - VisitBlockForDominatorTree(successor, block, visits); - } - } -} - void HGraph::TransformToSsa() { DCHECK(!reverse_post_order_.empty()); SsaBuilder ssa_builder(this); @@ -1143,6 +1169,23 @@ HBasicBlock* HBasicBlock::SplitBefore(HInstruction* cursor) { return new_block; } +HBasicBlock* HBasicBlock::CreateImmediateDominator() { + DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented"; + DCHECK(!IsCatchBlock()) << "Support for updating try/catch information not implemented."; + + HBasicBlock* new_block = new (GetGraph()->GetArena()) HBasicBlock(GetGraph(), GetDexPc()); + + for (HBasicBlock* predecessor : GetPredecessors()) { + new_block->predecessors_.push_back(predecessor); + predecessor->successors_[predecessor->GetSuccessorIndexOf(this)] = new_block; + } + predecessors_.clear(); + AddPredecessor(new_block); + + GetGraph()->AddBlock(new_block); + return new_block; +} + HBasicBlock* HBasicBlock::SplitAfter(HInstruction* cursor) { DCHECK(!cursor->IsControlFlow()); DCHECK_NE(instructions_.last_instruction_, cursor); @@ -1188,6 +1231,15 @@ const HTryBoundary* HBasicBlock::ComputeTryEntryOfSuccessors() const { } } +bool HBasicBlock::HasThrowingInstructions() const { + for (HInstructionIterator it(GetInstructions()); !it.Done(); it.Advance()) { + if (it.Current()->CanThrow()) { + return true; + } + } + return false; +} + static bool HasOnlyOneInstruction(const HBasicBlock& block) { return block.GetPhis().IsEmpty() && !block.GetInstructions().IsEmpty() diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 52f6e232ea..489f71de74 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -35,7 +35,6 @@ #include "offsets.h" #include "primitive.h" #include "utils/arena_bit_vector.h" -#include "utils/growable_array.h" namespace art { @@ -370,13 +369,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { void SetHasTryCatch(bool value) { has_try_catch_ = value; } private: - void VisitBlockForDominatorTree(HBasicBlock* block, - HBasicBlock* predecessor, - ArenaVector<size_t>* visits); void FindBackEdges(ArenaBitVector* visited); - void VisitBlockForBackEdges(HBasicBlock* block, - ArenaBitVector* visited, - ArenaBitVector* visiting); void RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const; void RemoveDeadBlocks(const ArenaBitVector& visited); @@ -825,11 +818,17 @@ class HBasicBlock : public ArenaObject<kArenaAllocBasicBlock> { return EndsWithTryBoundary() ? 1 : GetSuccessors().size(); } + // Create a new block between this block and its predecessors. The new block + // is added to the graph, all predecessor edges are relinked to it and an edge + // is created to `this`. Returns the new empty block. Reverse post order or + // loop and try/catch information are not updated. + HBasicBlock* CreateImmediateDominator(); + // Split the block into two blocks just before `cursor`. Returns the newly // created, latter block. Note that this method will add the block to the // graph, create a Goto at the end of the former block and will create an edge // between the blocks. It will not, however, update the reverse post order or - // loop information. + // loop and try/catch information. HBasicBlock* SplitBefore(HInstruction* cursor); // Split the block into two blocks just after `cursor`. Returns the newly @@ -940,6 +939,8 @@ class HBasicBlock : public ArenaObject<kArenaAllocBasicBlock> { // the appropriate try entry will be returned. const HTryBoundary* ComputeTryEntryOfSuccessors() const; + bool HasThrowingInstructions() const; + // Returns whether this block dominates the blocked passed as parameter. bool Dominates(HBasicBlock* block) const; @@ -949,7 +950,6 @@ class HBasicBlock : public ArenaObject<kArenaAllocBasicBlock> { void SetLifetimeStart(size_t start) { lifetime_start_ = start; } void SetLifetimeEnd(size_t end) { lifetime_end_ = end; } - bool EndsWithControlFlowInstruction() const; bool EndsWithIf() const; bool EndsWithTryBoundary() const; @@ -1067,6 +1067,10 @@ class HLoopInformationOutwardIterator : public ValueObject { M(Shr, BinaryOperation) \ M(StaticFieldGet, Instruction) \ M(StaticFieldSet, Instruction) \ + M(UnresolvedInstanceFieldGet, Instruction) \ + M(UnresolvedInstanceFieldSet, Instruction) \ + M(UnresolvedStaticFieldGet, Instruction) \ + M(UnresolvedStaticFieldSet, Instruction) \ M(StoreLocal, Instruction) \ M(Sub, BinaryOperation) \ M(SuspendCheck, Instruction) \ @@ -1644,17 +1648,34 @@ class ReferenceTypeInfo : ValueObject { bool IsValid() const SHARED_REQUIRES(Locks::mutator_lock_) { return IsValidHandle(type_handle_); } + bool IsExact() const { return is_exact_; } bool IsObjectClass() const SHARED_REQUIRES(Locks::mutator_lock_) { DCHECK(IsValid()); return GetTypeHandle()->IsObjectClass(); } + + bool IsObjectArray() const SHARED_REQUIRES(Locks::mutator_lock_) { + DCHECK(IsValid()); + return IsArrayClass() && GetTypeHandle()->GetComponentType()->IsObjectClass(); + } + bool IsInterface() const SHARED_REQUIRES(Locks::mutator_lock_) { DCHECK(IsValid()); return GetTypeHandle()->IsInterface(); } + bool IsArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) { + return GetTypeHandle()->IsArrayClass(); + } + + bool CanArrayHold(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) { + if (!IsExact()) return false; + if (!IsArrayClass()) return false; + return GetTypeHandle()->GetComponentType()->IsAssignableFrom(rti.GetTypeHandle().Get()); + } + Handle<mirror::Class> GetTypeHandle() const { return type_handle_; } bool IsSupertypeOf(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) { @@ -2222,7 +2243,9 @@ class HIntConstant : public HConstant { public: int32_t GetValue() const { return value_; } - uint64_t GetValueAsUint64() const OVERRIDE { return static_cast<uint64_t>(value_); } + uint64_t GetValueAsUint64() const OVERRIDE { + return static_cast<uint64_t>(static_cast<uint32_t>(value_)); + } bool InstructionDataEquals(HInstruction* other) const OVERRIDE { DCHECK(other->IsIntConstant()); @@ -2408,7 +2431,9 @@ class HCurrentMethod : public HExpression<0> { // will be the block containing the next Dex opcode. class HPackedSwitch : public HTemplateInstruction<1> { public: - HPackedSwitch(int32_t start_value, int32_t num_entries, HInstruction* input, + HPackedSwitch(int32_t start_value, + uint32_t num_entries, + HInstruction* input, uint32_t dex_pc = kNoDexPc) : HTemplateInstruction(SideEffects::None(), dex_pc), start_value_(start_value), @@ -2420,7 +2445,7 @@ class HPackedSwitch : public HTemplateInstruction<1> { int32_t GetStartValue() const { return start_value_; } - int32_t GetNumEntries() const { return num_entries_; } + uint32_t GetNumEntries() const { return num_entries_; } HBasicBlock* GetDefaultBlock() const { // Last entry is the default block. @@ -2429,8 +2454,8 @@ class HPackedSwitch : public HTemplateInstruction<1> { DECLARE_INSTRUCTION(PackedSwitch); private: - int32_t start_value_; - int32_t num_entries_; + const int32_t start_value_; + const uint32_t num_entries_; DISALLOW_COPY_AND_ASSIGN(HPackedSwitch); }; @@ -4310,7 +4335,8 @@ class HArraySet : public HTemplateInstruction<3> { SideEffectsForArchRuntimeCalls(value->GetType())), dex_pc), expected_component_type_(expected_component_type), needs_type_check_(value->GetType() == Primitive::kPrimNot), - value_can_be_null_(true) { + value_can_be_null_(true), + static_type_of_array_is_object_array_(false) { SetRawInputAt(0, array); SetRawInputAt(1, index); SetRawInputAt(2, value); @@ -4339,8 +4365,13 @@ class HArraySet : public HTemplateInstruction<3> { value_can_be_null_ = false; } + void SetStaticTypeOfArrayIsObjectArray() { + static_type_of_array_is_object_array_ = true; + } + bool GetValueCanBeNull() const { return value_can_be_null_; } bool NeedsTypeCheck() const { return needs_type_check_; } + bool StaticTypeOfArrayIsObjectArray() const { return static_type_of_array_is_object_array_; } HInstruction* GetArray() const { return InputAt(0); } HInstruction* GetIndex() const { return InputAt(1); } @@ -4367,6 +4398,9 @@ class HArraySet : public HTemplateInstruction<3> { const Primitive::Type expected_component_type_; bool needs_type_check_; bool value_can_be_null_; + // Cached information for the reference_type_info_ so that codegen + // does not need to inspect the static type. + bool static_type_of_array_is_object_array_; DISALLOW_COPY_AND_ASSIGN(HArraySet); }; @@ -4479,12 +4513,14 @@ class HLoadClass : public HExpression<1> { uint16_t type_index, const DexFile& dex_file, bool is_referrers_class, - uint32_t dex_pc) + uint32_t dex_pc, + bool needs_access_check) : HExpression(Primitive::kPrimNot, SideEffectsForArchRuntimeCalls(), dex_pc), type_index_(type_index), dex_file_(dex_file), is_referrers_class_(is_referrers_class), generate_clinit_check_(false), + needs_access_check_(needs_access_check), loaded_class_rti_(ReferenceTypeInfo::CreateInvalid()) { SetRawInputAt(0, current_method); } @@ -4504,19 +4540,22 @@ class HLoadClass : public HExpression<1> { bool NeedsEnvironment() const OVERRIDE { // Will call runtime and load the class if the class is not loaded yet. // TODO: finer grain decision. - return !is_referrers_class_; + return !is_referrers_class_ || needs_access_check_; } bool MustGenerateClinitCheck() const { return generate_clinit_check_; } - void SetMustGenerateClinitCheck(bool generate_clinit_check) { generate_clinit_check_ = generate_clinit_check; } bool CanCallRuntime() const { - return MustGenerateClinitCheck() || !is_referrers_class_; + return MustGenerateClinitCheck() || !is_referrers_class_ || needs_access_check_; + } + + bool NeedsAccessCheck() const { + return needs_access_check_; } bool CanThrow() const OVERRIDE { @@ -4552,6 +4591,7 @@ class HLoadClass : public HExpression<1> { // Whether this instruction must generate the initialization check. // Used for code generation. bool generate_clinit_check_; + bool needs_access_check_; ReferenceTypeInfo loaded_class_rti_; @@ -4705,6 +4745,112 @@ class HStaticFieldSet : public HTemplateInstruction<2> { DISALLOW_COPY_AND_ASSIGN(HStaticFieldSet); }; +class HUnresolvedInstanceFieldGet : public HExpression<1> { + public: + HUnresolvedInstanceFieldGet(HInstruction* obj, + Primitive::Type field_type, + uint32_t field_index, + uint32_t dex_pc) + : HExpression(field_type, SideEffects::AllExceptGCDependency(), dex_pc), + field_index_(field_index) { + SetRawInputAt(0, obj); + } + + bool NeedsEnvironment() const OVERRIDE { return true; } + bool CanThrow() const OVERRIDE { return true; } + + Primitive::Type GetFieldType() const { return GetType(); } + uint32_t GetFieldIndex() const { return field_index_; } + + DECLARE_INSTRUCTION(UnresolvedInstanceFieldGet); + + private: + const uint32_t field_index_; + + DISALLOW_COPY_AND_ASSIGN(HUnresolvedInstanceFieldGet); +}; + +class HUnresolvedInstanceFieldSet : public HTemplateInstruction<2> { + public: + HUnresolvedInstanceFieldSet(HInstruction* obj, + HInstruction* value, + Primitive::Type field_type, + uint32_t field_index, + uint32_t dex_pc) + : HTemplateInstruction(SideEffects::AllExceptGCDependency(), dex_pc), + field_type_(field_type), + field_index_(field_index) { + DCHECK_EQ(field_type, value->GetType()); + SetRawInputAt(0, obj); + SetRawInputAt(1, value); + } + + bool NeedsEnvironment() const OVERRIDE { return true; } + bool CanThrow() const OVERRIDE { return true; } + + Primitive::Type GetFieldType() const { return field_type_; } + uint32_t GetFieldIndex() const { return field_index_; } + + DECLARE_INSTRUCTION(UnresolvedInstanceFieldSet); + + private: + const Primitive::Type field_type_; + const uint32_t field_index_; + + DISALLOW_COPY_AND_ASSIGN(HUnresolvedInstanceFieldSet); +}; + +class HUnresolvedStaticFieldGet : public HExpression<0> { + public: + HUnresolvedStaticFieldGet(Primitive::Type field_type, + uint32_t field_index, + uint32_t dex_pc) + : HExpression(field_type, SideEffects::AllExceptGCDependency(), dex_pc), + field_index_(field_index) { + } + + bool NeedsEnvironment() const OVERRIDE { return true; } + bool CanThrow() const OVERRIDE { return true; } + + Primitive::Type GetFieldType() const { return GetType(); } + uint32_t GetFieldIndex() const { return field_index_; } + + DECLARE_INSTRUCTION(UnresolvedStaticFieldGet); + + private: + const uint32_t field_index_; + + DISALLOW_COPY_AND_ASSIGN(HUnresolvedStaticFieldGet); +}; + +class HUnresolvedStaticFieldSet : public HTemplateInstruction<1> { + public: + HUnresolvedStaticFieldSet(HInstruction* value, + Primitive::Type field_type, + uint32_t field_index, + uint32_t dex_pc) + : HTemplateInstruction(SideEffects::AllExceptGCDependency(), dex_pc), + field_type_(field_type), + field_index_(field_index) { + DCHECK_EQ(field_type, value->GetType()); + SetRawInputAt(0, value); + } + + bool NeedsEnvironment() const OVERRIDE { return true; } + bool CanThrow() const OVERRIDE { return true; } + + Primitive::Type GetFieldType() const { return field_type_; } + uint32_t GetFieldIndex() const { return field_index_; } + + DECLARE_INSTRUCTION(UnresolvedStaticFieldSet); + + private: + const Primitive::Type field_type_; + const uint32_t field_index_; + + DISALLOW_COPY_AND_ASSIGN(HUnresolvedStaticFieldSet); +}; + // Implement the move-exception DEX instruction. class HLoadException : public HExpression<0> { public: @@ -4757,6 +4903,7 @@ class HThrow : public HTemplateInstruction<1> { * or `HCheckCast`. */ enum class TypeCheckKind { + kUnresolvedCheck, // Check against an unresolved type. kExactCheck, // Can do a single class compare. kClassHierarchyCheck, // Can just walk the super class chain. kAbstractClassCheck, // Can just walk the super class chain, starting one up. @@ -5053,7 +5200,10 @@ static constexpr size_t kDefaultNumberOfMoves = 4; class HParallelMove : public HTemplateInstruction<0> { public: explicit HParallelMove(ArenaAllocator* arena, uint32_t dex_pc = kNoDexPc) - : HTemplateInstruction(SideEffects::None(), dex_pc), moves_(arena, kDefaultNumberOfMoves) {} + : HTemplateInstruction(SideEffects::None(), dex_pc), + moves_(arena->Adapter(kArenaAllocMoveOperands)) { + moves_.reserve(kDefaultNumberOfMoves); + } void AddMove(Location source, Location destination, @@ -5063,15 +5213,15 @@ class HParallelMove : public HTemplateInstruction<0> { DCHECK(destination.IsValid()); if (kIsDebugBuild) { if (instruction != nullptr) { - for (size_t i = 0, e = moves_.Size(); i < e; ++i) { - if (moves_.Get(i).GetInstruction() == instruction) { + for (const MoveOperands& move : moves_) { + if (move.GetInstruction() == instruction) { // Special case the situation where the move is for the spill slot // of the instruction. if ((GetPrevious() == instruction) || ((GetPrevious() == nullptr) && instruction->IsPhi() && instruction->GetBlock() == GetBlock())) { - DCHECK_NE(destination.GetKind(), moves_.Get(i).GetDestination().GetKind()) + DCHECK_NE(destination.GetKind(), move.GetDestination().GetKind()) << "Doing parallel moves for the same instruction."; } else { DCHECK(false) << "Doing parallel moves for the same instruction."; @@ -5079,26 +5229,27 @@ class HParallelMove : public HTemplateInstruction<0> { } } } - for (size_t i = 0, e = moves_.Size(); i < e; ++i) { - DCHECK(!destination.OverlapsWith(moves_.Get(i).GetDestination())) + for (const MoveOperands& move : moves_) { + DCHECK(!destination.OverlapsWith(move.GetDestination())) << "Overlapped destination for two moves in a parallel move: " - << moves_.Get(i).GetSource() << " ==> " << moves_.Get(i).GetDestination() << " and " + << move.GetSource() << " ==> " << move.GetDestination() << " and " << source << " ==> " << destination; } } - moves_.Add(MoveOperands(source, destination, type, instruction)); + moves_.emplace_back(source, destination, type, instruction); } - MoveOperands* MoveOperandsAt(size_t index) const { - return moves_.GetRawStorage() + index; + MoveOperands* MoveOperandsAt(size_t index) { + DCHECK_LT(index, moves_.size()); + return &moves_[index]; } - size_t NumMoves() const { return moves_.Size(); } + size_t NumMoves() const { return moves_.size(); } DECLARE_INSTRUCTION(ParallelMove); private: - GrowableArray<MoveOperands> moves_; + ArenaVector<MoveOperands> moves_; DISALLOW_COPY_AND_ASSIGN(HParallelMove); }; diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index a2b613194f..5177b9a794 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -310,9 +310,6 @@ class OptimizingCompiler FINAL : public Compiler { std::unique_ptr<std::ostream> visualizer_output_; - // Delegate to Quick in case the optimizing compiler cannot compile a method. - std::unique_ptr<Compiler> delegate_; - DISALLOW_COPY_AND_ASSIGN(OptimizingCompiler); }; @@ -321,12 +318,9 @@ static const int kMaximumCompilationTimeBeforeWarning = 100; /* ms */ OptimizingCompiler::OptimizingCompiler(CompilerDriver* driver) : Compiler(driver, kMaximumCompilationTimeBeforeWarning), run_optimizations_( - (driver->GetCompilerOptions().GetCompilerFilter() != CompilerOptions::kTime) - && !driver->GetCompilerOptions().GetDebuggable()), - delegate_(Create(driver, Compiler::Kind::kQuick)) {} + driver->GetCompilerOptions().GetCompilerFilter() != CompilerOptions::kTime) {} void OptimizingCompiler::Init() { - delegate_->Init(); // Enable C1visualizer output. Must be done in Init() because the compiler // driver is not fully initialized when passed to the compiler's constructor. CompilerDriver* driver = GetCompilerDriver(); @@ -345,7 +339,6 @@ void OptimizingCompiler::Init() { } void OptimizingCompiler::UnInit() const { - delegate_->UnInit(); } OptimizingCompiler::~OptimizingCompiler() { @@ -354,8 +347,7 @@ OptimizingCompiler::~OptimizingCompiler() { } } -void OptimizingCompiler::InitCompilationUnit(CompilationUnit& cu) const { - delegate_->InitCompilationUnit(cu); +void OptimizingCompiler::InitCompilationUnit(CompilationUnit& cu ATTRIBUTE_UNUSED) const { } bool OptimizingCompiler::CanCompileMethod(uint32_t method_idx ATTRIBUTE_UNUSED, @@ -575,12 +567,6 @@ CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph, CompilerDriver* compiler_driver, const DexCompilationUnit& dex_compilation_unit, PassObserver* pass_observer) const { - if (graph->HasTryCatch() && graph->IsDebuggable()) { - // TODO: b/24054676, stop creating catch phis eagerly to avoid special cases like phis without - // inputs. - return nullptr; - } - ScopedObjectAccess soa(Thread::Current()); StackHandleScopeCollection handles(soa.Self()); soa.Self()->TransitionFromRunnableToSuspended(kNative); @@ -836,8 +822,12 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite return compiled_method; } -static bool HasOnlyUnresolvedFailures(const VerifiedMethod* verified_method) { - uint32_t unresolved_mask = verifier::VerifyError::VERIFY_ERROR_NO_CLASS; +static bool CanHandleVerificationFailure(const VerifiedMethod* verified_method) { + // For access errors the compiler will use the unresolved helpers (e.g. HInvokeUnresolved). + uint32_t unresolved_mask = verifier::VerifyError::VERIFY_ERROR_NO_CLASS + | verifier::VerifyError::VERIFY_ERROR_ACCESS_CLASS + | verifier::VerifyError::VERIFY_ERROR_ACCESS_FIELD + | verifier::VerifyError::VERIFY_ERROR_ACCESS_METHOD; return (verified_method->GetEncounteredVerificationFailures() & (~unresolved_mask)) == 0; } @@ -854,7 +844,7 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, const VerifiedMethod* verified_method = compiler_driver->GetVerifiedMethod(&dex_file, method_idx); DCHECK(!verified_method->HasRuntimeThrow()); if (compiler_driver->IsMethodVerifiedWithoutFailures(method_idx, class_def_idx, dex_file) - || HasOnlyUnresolvedFailures(verified_method)) { + || CanHandleVerificationFailure(verified_method)) { method = TryCompile(code_item, access_flags, invoke_type, class_def_idx, method_idx, jclass_loader, dex_file, dex_cache); } else { @@ -865,15 +855,6 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, } } - if (method != nullptr) { - return method; - } - method = delegate_->Compile(code_item, access_flags, invoke_type, class_def_idx, method_idx, - jclass_loader, dex_file, dex_cache); - - if (method != nullptr) { - MaybeRecordStat(MethodCompilationStat::kCompiledQuick); - } return method; } diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index c7701b70ad..6375cf1a56 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -29,11 +29,12 @@ enum MethodCompilationStat { kAttemptCompilation = 0, kCompiledBaseline, kCompiledOptimized, - kCompiledQuick, kInlinedInvoke, kInstructionSimplifications, kInstructionSimplificationsArch, kUnresolvedMethod, + kUnresolvedField, + kUnresolvedFieldNotAFastAccess, kNotCompiledBranchOutsideMethodCode, kNotCompiledCannotBuildSSA, kNotCompiledCantAccesType, @@ -45,7 +46,6 @@ enum MethodCompilationStat { kNotCompiledPathological, kNotCompiledSpaceFilter, kNotCompiledUnhandledInstruction, - kNotCompiledUnresolvedField, kNotCompiledUnsupportedIsa, kNotCompiledVerifyAtRuntime, kNotOptimizedDisabled, @@ -73,14 +73,11 @@ class OptimizingCompilerStats { compile_stats_[kCompiledBaseline] * 100 / compile_stats_[kAttemptCompilation]; size_t optimized_percent = compile_stats_[kCompiledOptimized] * 100 / compile_stats_[kAttemptCompilation]; - size_t quick_percent = - compile_stats_[kCompiledQuick] * 100 / compile_stats_[kAttemptCompilation]; std::ostringstream oss; oss << "Attempted compilation of " << compile_stats_[kAttemptCompilation] << " methods: "; oss << unoptimized_percent << "% (" << compile_stats_[kCompiledBaseline] << ") unoptimized, "; oss << optimized_percent << "% (" << compile_stats_[kCompiledOptimized] << ") optimized, "; - oss << quick_percent << "% (" << compile_stats_[kCompiledQuick] << ") quick."; LOG(INFO) << oss.str(); @@ -99,11 +96,12 @@ class OptimizingCompilerStats { case kAttemptCompilation : return "kAttemptCompilation"; case kCompiledBaseline : return "kCompiledBaseline"; case kCompiledOptimized : return "kCompiledOptimized"; - case kCompiledQuick : return "kCompiledQuick"; case kInlinedInvoke : return "kInlinedInvoke"; case kInstructionSimplifications: return "kInstructionSimplifications"; case kInstructionSimplificationsArch: return "kInstructionSimplificationsArch"; case kUnresolvedMethod : return "kUnresolvedMethod"; + case kUnresolvedField : return "kUnresolvedField"; + case kUnresolvedFieldNotAFastAccess : return "kUnresolvedFieldNotAFastAccess"; case kNotCompiledBranchOutsideMethodCode: return "kNotCompiledBranchOutsideMethodCode"; case kNotCompiledCannotBuildSSA : return "kNotCompiledCannotBuildSSA"; case kNotCompiledCantAccesType : return "kNotCompiledCantAccesType"; @@ -115,7 +113,6 @@ class OptimizingCompilerStats { case kNotCompiledPathological : return "kNotCompiledPathological"; case kNotCompiledSpaceFilter : return "kNotCompiledSpaceFilter"; case kNotCompiledUnhandledInstruction : return "kNotCompiledUnhandledInstruction"; - case kNotCompiledUnresolvedField : return "kNotCompiledUnresolvedField"; case kNotCompiledUnsupportedIsa : return "kNotCompiledUnsupportedIsa"; case kNotCompiledVerifyAtRuntime : return "kNotCompiledVerifyAtRuntime"; case kNotOptimizedDisabled : return "kNotOptimizedDisabled"; diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc index f9d812f6a6..fce776920d 100644 --- a/compiler/optimizing/parallel_move_resolver.cc +++ b/compiler/optimizing/parallel_move_resolver.cc @@ -16,6 +16,8 @@ #include <iostream> #include "parallel_move_resolver.h" + +#include "base/stl_util.h" #include "nodes.h" namespace art { @@ -28,19 +30,19 @@ void ParallelMoveResolver::BuildInitialMoveList(HParallelMove* parallel_move) { for (size_t i = 0; i < parallel_move->NumMoves(); ++i) { MoveOperands* move = parallel_move->MoveOperandsAt(i); if (!move->IsRedundant()) { - moves_.Add(move); + moves_.push_back(move); } } } void ParallelMoveResolverWithSwap::EmitNativeCode(HParallelMove* parallel_move) { - DCHECK(moves_.IsEmpty()); + DCHECK(moves_.empty()); // Build up a worklist of moves. BuildInitialMoveList(parallel_move); // Move stack/stack slot to take advantage of a free register on constrained machines. - for (size_t i = 0; i < moves_.Size(); ++i) { - const MoveOperands& move = *moves_.Get(i); + for (size_t i = 0; i < moves_.size(); ++i) { + const MoveOperands& move = *moves_[i]; // Ignore constants and moves already eliminated. if (move.IsEliminated() || move.GetSource().IsConstant()) { continue; @@ -52,8 +54,8 @@ void ParallelMoveResolverWithSwap::EmitNativeCode(HParallelMove* parallel_move) } } - for (size_t i = 0; i < moves_.Size(); ++i) { - const MoveOperands& move = *moves_.Get(i); + for (size_t i = 0; i < moves_.size(); ++i) { + const MoveOperands& move = *moves_[i]; // Skip constants to perform them last. They don't block other moves // and skipping such moves with register destinations keeps those // registers free for the whole algorithm. @@ -63,8 +65,8 @@ void ParallelMoveResolverWithSwap::EmitNativeCode(HParallelMove* parallel_move) } // Perform the moves with constant sources. - for (size_t i = 0; i < moves_.Size(); ++i) { - MoveOperands* move = moves_.Get(i); + for (size_t i = 0; i < moves_.size(); ++i) { + MoveOperands* move = moves_[i]; if (!move->IsEliminated()) { DCHECK(move->GetSource().IsConstant()); EmitMove(i); @@ -73,7 +75,7 @@ void ParallelMoveResolverWithSwap::EmitNativeCode(HParallelMove* parallel_move) } } - moves_.Reset(); + moves_.clear(); } Location LowOf(Location location) { @@ -123,7 +125,8 @@ MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) { // which means that a call to PerformMove could change any source operand // in the move graph. - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; DCHECK(!move->IsPending()); if (move->IsRedundant()) { // Because we swap register pairs first, following, un-pending @@ -143,8 +146,8 @@ MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) { // as this one's destination blocks this one so recursively perform all // such moves. MoveOperands* required_swap = nullptr; - for (size_t i = 0; i < moves_.Size(); ++i) { - const MoveOperands& other_move = *moves_.Get(i); + for (size_t i = 0; i < moves_.size(); ++i) { + const MoveOperands& other_move = *moves_[i]; if (other_move.Blocks(destination) && !other_move.IsPending()) { // Though PerformMove can change any source operand in the move graph, // calling `PerformMove` cannot create a blocking move via a swap @@ -163,7 +166,7 @@ MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) { // at the next moves. Swapping is not blocked by anything, it just // updates other moves's source. break; - } else if (required_swap == moves_.Get(i)) { + } else if (required_swap == moves_[i]) { // If `other_move` was swapped, we iterate again to find a new // potential cycle. required_swap = nullptr; @@ -171,7 +174,7 @@ MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) { } else if (required_swap != nullptr) { // A move is required to swap. We walk back the cycle to find the // move by just returning from this `PerforrmMove`. - moves_.Get(index)->ClearPending(destination); + moves_[index]->ClearPending(destination); return required_swap; } } @@ -197,14 +200,13 @@ MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) { DCHECK_EQ(required_swap, move); do_swap = true; } else { - for (size_t i = 0; i < moves_.Size(); ++i) { - const MoveOperands& other_move = *moves_.Get(i); - if (other_move.Blocks(destination)) { - DCHECK(other_move.IsPending()); - if (!move->Is64BitMove() && other_move.Is64BitMove()) { + for (MoveOperands* other_move : moves_) { + if (other_move->Blocks(destination)) { + DCHECK(other_move->IsPending()); + if (!move->Is64BitMove() && other_move->Is64BitMove()) { // We swap 64bits moves before swapping 32bits moves. Go back from the // cycle by returning the move that must be swapped. - return moves_.Get(i); + return other_move; } do_swap = true; break; @@ -220,12 +222,11 @@ MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) { Location source = move->GetSource(); Location swap_destination = move->GetDestination(); move->Eliminate(); - for (size_t i = 0; i < moves_.Size(); ++i) { - const MoveOperands& other_move = *moves_.Get(i); - if (other_move.Blocks(source)) { - UpdateSourceOf(moves_.Get(i), source, swap_destination); - } else if (other_move.Blocks(swap_destination)) { - UpdateSourceOf(moves_.Get(i), swap_destination, source); + for (MoveOperands* other_move : moves_) { + if (other_move->Blocks(source)) { + UpdateSourceOf(other_move, source, swap_destination); + } else if (other_move->Blocks(swap_destination)) { + UpdateSourceOf(other_move, swap_destination, source); } } // If the swap was required because of a 64bits move in the middle of a cycle, @@ -242,14 +243,14 @@ MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) { } bool ParallelMoveResolverWithSwap::IsScratchLocation(Location loc) { - for (size_t i = 0; i < moves_.Size(); ++i) { - if (moves_.Get(i)->Blocks(loc)) { + for (MoveOperands* move : moves_) { + if (move->Blocks(loc)) { return false; } } - for (size_t i = 0; i < moves_.Size(); ++i) { - if (moves_.Get(i)->GetDestination().Equals(loc)) { + for (MoveOperands* move : moves_) { + if (move->GetDestination().Equals(loc)) { return true; } } @@ -302,8 +303,8 @@ ParallelMoveResolverWithSwap::ScratchRegisterScope::~ScratchRegisterScope() { void ParallelMoveResolverNoSwap::EmitNativeCode(HParallelMove* parallel_move) { DCHECK_EQ(GetNumberOfPendingMoves(), 0u); - DCHECK(moves_.IsEmpty()); - DCHECK(scratches_.IsEmpty()); + DCHECK(moves_.empty()); + DCHECK(scratches_.empty()); // Backend dependent initialization. PrepareForEmitNativeCode(); @@ -311,8 +312,8 @@ void ParallelMoveResolverNoSwap::EmitNativeCode(HParallelMove* parallel_move) { // Build up a worklist of moves. BuildInitialMoveList(parallel_move); - for (size_t i = 0; i < moves_.Size(); ++i) { - const MoveOperands& move = *moves_.Get(i); + for (size_t i = 0; i < moves_.size(); ++i) { + const MoveOperands& move = *moves_[i]; // Skip constants to perform them last. They don't block other moves and // skipping such moves with register destinations keeps those registers // free for the whole algorithm. @@ -324,8 +325,8 @@ void ParallelMoveResolverNoSwap::EmitNativeCode(HParallelMove* parallel_move) { // Perform the moves with constant sources and register destinations with UpdateMoveSource() // to reduce the number of literal loads. Stack destinations are skipped since we won't be benefit // from changing the constant sources to stack locations. - for (size_t i = 0; i < moves_.Size(); ++i) { - MoveOperands* move = moves_.Get(i); + for (size_t i = 0; i < moves_.size(); ++i) { + MoveOperands* move = moves_[i]; Location destination = move->GetDestination(); if (!move->IsEliminated() && !destination.IsStackSlot() && !destination.IsDoubleStackSlot()) { Location source = move->GetSource(); @@ -344,8 +345,8 @@ void ParallelMoveResolverNoSwap::EmitNativeCode(HParallelMove* parallel_move) { } // Perform the rest of the moves. - for (size_t i = 0; i < moves_.Size(); ++i) { - MoveOperands* move = moves_.Get(i); + for (size_t i = 0; i < moves_.size(); ++i) { + MoveOperands* move = moves_[i]; if (!move->IsEliminated()) { EmitMove(i); move->Eliminate(); @@ -358,19 +359,18 @@ void ParallelMoveResolverNoSwap::EmitNativeCode(HParallelMove* parallel_move) { // Backend dependent cleanup. FinishEmitNativeCode(); - moves_.Reset(); - scratches_.Reset(); + moves_.clear(); + scratches_.clear(); } Location ParallelMoveResolverNoSwap::GetScratchLocation(Location::Kind kind) { - for (size_t i = 0; i < scratches_.Size(); ++i) { - Location loc = scratches_.Get(i); + for (Location loc : scratches_) { if (loc.GetKind() == kind && !IsBlockedByMoves(loc)) { return loc; } } - for (size_t i = 0; i < moves_.Size(); ++i) { - Location loc = moves_.Get(i)->GetDestination(); + for (MoveOperands* move : moves_) { + Location loc = move->GetDestination(); if (loc.GetKind() == kind && !IsBlockedByMoves(loc)) { return loc; } @@ -380,18 +380,18 @@ Location ParallelMoveResolverNoSwap::GetScratchLocation(Location::Kind kind) { void ParallelMoveResolverNoSwap::AddScratchLocation(Location loc) { if (kIsDebugBuild) { - for (size_t i = 0; i < scratches_.Size(); ++i) { - DCHECK(!loc.Equals(scratches_.Get(i))); + for (Location scratch : scratches_) { + CHECK(!loc.Equals(scratch)); } } - scratches_.Add(loc); + scratches_.push_back(loc); } void ParallelMoveResolverNoSwap::RemoveScratchLocation(Location loc) { DCHECK(!IsBlockedByMoves(loc)); - for (size_t i = 0; i < scratches_.Size(); ++i) { - if (loc.Equals(scratches_.Get(i))) { - scratches_.DeleteAt(i); + for (auto it = scratches_.begin(), end = scratches_.end(); it != end; ++it) { + if (loc.Equals(*it)) { + scratches_.erase(it); break; } } @@ -406,7 +406,8 @@ void ParallelMoveResolverNoSwap::PerformMove(size_t index) { // we will update source operand in the move graph to reduce dependencies in // the graph. - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; DCHECK(!move->IsPending()); DCHECK(!move->IsEliminated()); if (move->IsRedundant()) { @@ -433,8 +434,8 @@ void ParallelMoveResolverNoSwap::PerformMove(size_t index) { // dependencies. Any unperformed, unpending move with a source the same // as this one's destination blocks this one so recursively perform all // such moves. - for (size_t i = 0; i < moves_.Size(); ++i) { - const MoveOperands& other_move = *moves_.Get(i); + for (size_t i = 0; i < moves_.size(); ++i) { + const MoveOperands& other_move = *moves_[i]; if (other_move.Blocks(destination) && !other_move.IsPending()) { PerformMove(i); } @@ -490,8 +491,11 @@ void ParallelMoveResolverNoSwap::PerformMove(size_t index) { move->Eliminate(); UpdateMoveSource(pending_source, pending_destination); // Free any unblocked locations in the scratch location list. - for (size_t i = 0; i < scratches_.Size(); ++i) { - Location scratch = scratches_.Get(i); + // Note: Fetch size() on each iteration because scratches_ can be modified inside the loop. + // FIXME: If FreeScratchLocation() removes the location from scratches_, + // we skip the next location. This happens for arm64. + for (size_t i = 0; i < scratches_.size(); ++i) { + Location scratch = scratches_[i]; // Only scratch overlapping with performed move source can be unblocked. if (scratch.OverlapsWith(pending_source) && !IsBlockedByMoves(scratch)) { FreeScratchLocation(pending_source); @@ -512,8 +516,7 @@ void ParallelMoveResolverNoSwap::UpdateMoveSource(Location from, Location to) { // This is not something we must do, but we can use fewer scratch locations with // this trick. For example, we can avoid using additional scratch locations for // moves (0 -> 1), (1 -> 2), (1 -> 0). - for (size_t i = 0; i < moves_.Size(); ++i) { - MoveOperands* move = moves_.Get(i); + for (MoveOperands* move : moves_) { if (move->GetSource().Equals(from)) { move->SetSource(to); } @@ -522,16 +525,15 @@ void ParallelMoveResolverNoSwap::UpdateMoveSource(Location from, Location to) { void ParallelMoveResolverNoSwap::AddPendingMove(Location source, Location destination, Primitive::Type type) { - pending_moves_.Add(new (allocator_) MoveOperands(source, destination, type, nullptr)); + pending_moves_.push_back(new (allocator_) MoveOperands(source, destination, type, nullptr)); } void ParallelMoveResolverNoSwap::DeletePendingMove(MoveOperands* move) { - pending_moves_.Delete(move); + RemoveElement(pending_moves_, move); } MoveOperands* ParallelMoveResolverNoSwap::GetUnblockedPendingMove(Location loc) { - for (size_t i = 0; i < pending_moves_.Size(); ++i) { - MoveOperands* move = pending_moves_.Get(i); + for (MoveOperands* move : pending_moves_) { Location destination = move->GetDestination(); // Only moves with destination overlapping with input loc can be unblocked. if (destination.OverlapsWith(loc) && !IsBlockedByMoves(destination)) { @@ -542,13 +544,13 @@ MoveOperands* ParallelMoveResolverNoSwap::GetUnblockedPendingMove(Location loc) } bool ParallelMoveResolverNoSwap::IsBlockedByMoves(Location loc) { - for (size_t i = 0; i < pending_moves_.Size(); ++i) { - if (pending_moves_.Get(i)->Blocks(loc)) { + for (MoveOperands* move : pending_moves_) { + if (move->Blocks(loc)) { return true; } } - for (size_t i = 0; i < moves_.Size(); ++i) { - if (moves_.Get(i)->Blocks(loc)) { + for (MoveOperands* move : moves_) { + if (move->Blocks(loc)) { return true; } } @@ -558,7 +560,7 @@ bool ParallelMoveResolverNoSwap::IsBlockedByMoves(Location loc) { // So far it is only used for debugging purposes to make sure all pending moves // have been performed. size_t ParallelMoveResolverNoSwap::GetNumberOfPendingMoves() { - return pending_moves_.Size(); + return pending_moves_.size(); } } // namespace art diff --git a/compiler/optimizing/parallel_move_resolver.h b/compiler/optimizing/parallel_move_resolver.h index 9ede91013e..4278861690 100644 --- a/compiler/optimizing/parallel_move_resolver.h +++ b/compiler/optimizing/parallel_move_resolver.h @@ -17,8 +17,8 @@ #ifndef ART_COMPILER_OPTIMIZING_PARALLEL_MOVE_RESOLVER_H_ #define ART_COMPILER_OPTIMIZING_PARALLEL_MOVE_RESOLVER_H_ +#include "base/arena_containers.h" #include "base/value_object.h" -#include "utils/growable_array.h" #include "locations.h" #include "primitive.h" @@ -31,7 +31,10 @@ class MoveOperands; // have their own subclass that implements corresponding virtual functions. class ParallelMoveResolver : public ValueObject { public: - explicit ParallelMoveResolver(ArenaAllocator* allocator) : moves_(allocator, 32) {} + explicit ParallelMoveResolver(ArenaAllocator* allocator) + : moves_(allocator->Adapter(kArenaAllocParallelMoveResolver)) { + moves_.reserve(32); + } virtual ~ParallelMoveResolver() {} // Resolve a set of parallel moves, emitting assembler instructions. @@ -41,7 +44,7 @@ class ParallelMoveResolver : public ValueObject { // Build the initial list of moves. void BuildInitialMoveList(HParallelMove* parallel_move); - GrowableArray<MoveOperands*> moves_; + ArenaVector<MoveOperands*> moves_; private: DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolver); @@ -120,8 +123,13 @@ class ParallelMoveResolverWithSwap : public ParallelMoveResolver { class ParallelMoveResolverNoSwap : public ParallelMoveResolver { public: explicit ParallelMoveResolverNoSwap(ArenaAllocator* allocator) - : ParallelMoveResolver(allocator), scratches_(allocator, 32), - pending_moves_(allocator, 8), allocator_(allocator) {} + : ParallelMoveResolver(allocator), + scratches_(allocator->Adapter(kArenaAllocParallelMoveResolver)), + pending_moves_(allocator->Adapter(kArenaAllocParallelMoveResolver)), + allocator_(allocator) { + scratches_.reserve(32); + pending_moves_.reserve(8); + } virtual ~ParallelMoveResolverNoSwap() {} // Resolve a set of parallel moves, emitting assembler instructions. @@ -160,7 +168,7 @@ class ParallelMoveResolverNoSwap : public ParallelMoveResolver { void RemoveScratchLocation(Location loc); // List of scratch locations. - GrowableArray<Location> scratches_; + ArenaVector<Location> scratches_; private: // Perform the move at the given index in `moves_` (possibly requiring other moves to satisfy @@ -183,7 +191,7 @@ class ParallelMoveResolverNoSwap : public ParallelMoveResolver { size_t GetNumberOfPendingMoves(); // Additional pending moves which might be added to resolve dependency cycle. - GrowableArray<MoveOperands*> pending_moves_; + ArenaVector<MoveOperands*> pending_moves_; // Used to allocate pending MoveOperands. ArenaAllocator* const allocator_; diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc index f8f70105cf..da91cb811d 100644 --- a/compiler/optimizing/parallel_move_test.cc +++ b/compiler/optimizing/parallel_move_test.cc @@ -56,7 +56,8 @@ class TestParallelMoveResolverWithSwap : public ParallelMoveResolverWithSwap { : ParallelMoveResolverWithSwap(allocator) {} void EmitMove(size_t index) OVERRIDE { - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; if (!message_.str().empty()) { message_ << " "; } @@ -68,7 +69,8 @@ class TestParallelMoveResolverWithSwap : public ParallelMoveResolverWithSwap { } void EmitSwap(size_t index) OVERRIDE { - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; if (!message_.str().empty()) { message_ << " "; } @@ -127,7 +129,8 @@ class TestParallelMoveResolverNoSwap : public ParallelMoveResolverNoSwap { void FreeScratchLocation(Location loc ATTRIBUTE_UNUSED) OVERRIDE {} void EmitMove(size_t index) OVERRIDE { - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; if (!message_.str().empty()) { message_ << " "; } diff --git a/compiler/optimizing/primitive_type_propagation.cc b/compiler/optimizing/primitive_type_propagation.cc index af93438c9a..c98f43e461 100644 --- a/compiler/optimizing/primitive_type_propagation.cc +++ b/compiler/optimizing/primitive_type_propagation.cc @@ -108,8 +108,9 @@ void PrimitiveTypePropagation::VisitBasicBlock(HBasicBlock* block) { } void PrimitiveTypePropagation::ProcessWorklist() { - while (!worklist_.IsEmpty()) { - HPhi* instruction = worklist_.Pop(); + while (!worklist_.empty()) { + HPhi* instruction = worklist_.back(); + worklist_.pop_back(); if (UpdateType(instruction)) { AddDependentInstructionsToWorklist(instruction); } @@ -118,7 +119,7 @@ void PrimitiveTypePropagation::ProcessWorklist() { void PrimitiveTypePropagation::AddToWorklist(HPhi* instruction) { DCHECK(instruction->IsLive()); - worklist_.Add(instruction); + worklist_.push_back(instruction); } void PrimitiveTypePropagation::AddDependentInstructionsToWorklist(HInstruction* instruction) { diff --git a/compiler/optimizing/primitive_type_propagation.h b/compiler/optimizing/primitive_type_propagation.h index 6d370ed2ab..212fcfc69f 100644 --- a/compiler/optimizing/primitive_type_propagation.h +++ b/compiler/optimizing/primitive_type_propagation.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_PRIMITIVE_TYPE_PROPAGATION_H_ #define ART_COMPILER_OPTIMIZING_PRIMITIVE_TYPE_PROPAGATION_H_ +#include "base/arena_containers.h" #include "nodes.h" namespace art { @@ -25,7 +26,9 @@ namespace art { class PrimitiveTypePropagation : public ValueObject { public: explicit PrimitiveTypePropagation(HGraph* graph) - : graph_(graph), worklist_(graph->GetArena(), kDefaultWorklistSize) {} + : graph_(graph), worklist_(graph->GetArena()->Adapter(kArenaAllocPrimitiveTypePropagation)) { + worklist_.reserve(kDefaultWorklistSize); + } void Run(); @@ -37,7 +40,7 @@ class PrimitiveTypePropagation : public ValueObject { bool UpdateType(HPhi* phi); HGraph* const graph_; - GrowableArray<HPhi*> worklist_; + ArenaVector<HPhi*> worklist_; static constexpr size_t kDefaultWorklistSize = 8; diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc index a88c5431c5..f7a7e420bb 100644 --- a/compiler/optimizing/reference_type_propagation.cc +++ b/compiler/optimizing/reference_type_propagation.cc @@ -27,7 +27,7 @@ class RTPVisitor : public HGraphDelegateVisitor { public: RTPVisitor(HGraph* graph, StackHandleScopeCollection* handles, - GrowableArray<HInstruction*>* worklist, + ArenaVector<HInstruction*>* worklist, ReferenceTypeInfo::TypeHandle object_class_handle, ReferenceTypeInfo::TypeHandle class_class_handle, ReferenceTypeInfo::TypeHandle string_class_handle, @@ -52,6 +52,8 @@ class RTPVisitor : public HGraphDelegateVisitor { void SetClassAsTypeInfo(HInstruction* instr, mirror::Class* klass, bool is_exact); void VisitInstanceFieldGet(HInstanceFieldGet* instr) OVERRIDE; void VisitStaticFieldGet(HStaticFieldGet* instr) OVERRIDE; + void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* instr) OVERRIDE; + void VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet* instr) OVERRIDE; void VisitInvoke(HInvoke* instr) OVERRIDE; void VisitArrayGet(HArrayGet* instr) OVERRIDE; void VisitCheckCast(HCheckCast* instr) OVERRIDE; @@ -68,7 +70,7 @@ class RTPVisitor : public HGraphDelegateVisitor { ReferenceTypeInfo::TypeHandle class_class_handle_; ReferenceTypeInfo::TypeHandle string_class_handle_; ReferenceTypeInfo::TypeHandle throwable_class_handle_; - GrowableArray<HInstruction*>* worklist_; + ArenaVector<HInstruction*>* worklist_; static constexpr size_t kDefaultWorklistSize = 8; }; @@ -78,7 +80,8 @@ ReferenceTypePropagation::ReferenceTypePropagation(HGraph* graph, const char* name) : HOptimization(graph, name), handles_(handles), - worklist_(graph->GetArena(), kDefaultWorklistSize) { + worklist_(graph->GetArena()->Adapter(kArenaAllocReferenceTypePropagation)) { + worklist_.reserve(kDefaultWorklistSize); // Mutator lock is required for NewHandle, but annotalysis ignores constructors. ScopedObjectAccess soa(Thread::Current()); ClassLinker* linker = Runtime::Current()->GetClassLinker(); @@ -118,8 +121,9 @@ void ReferenceTypePropagation::Run() { if (instr->IsBoundType()) { DCHECK(instr->AsBoundType()->GetUpperBound().IsValid()); } else if (instr->IsLoadClass()) { - DCHECK(instr->AsLoadClass()->GetReferenceTypeInfo().IsExact()); - DCHECK(instr->AsLoadClass()->GetLoadedClassRTI().IsValid()); + HLoadClass* cls = instr->AsLoadClass(); + DCHECK(cls->GetReferenceTypeInfo().IsExact()); + DCHECK(!cls->GetLoadedClassRTI().IsValid() || cls->GetLoadedClassRTI().IsExact()); } else if (instr->IsNullCheck()) { DCHECK(instr->GetReferenceTypeInfo().IsEqual(instr->InputAt(0)->GetReferenceTypeInfo())) << "NullCheck " << instr->GetReferenceTypeInfo() @@ -165,6 +169,7 @@ static HBoundType* CreateBoundType(ArenaAllocator* arena, SHARED_REQUIRES(Locks::mutator_lock_) { ReferenceTypeInfo obj_rti = obj->GetReferenceTypeInfo(); ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI(); + DCHECK(class_rti.IsValid()); HBoundType* bound_type = new (arena) HBoundType(obj, class_rti, upper_can_be_null); // Narrow the type as much as possible. if (class_rti.GetTypeHandle()->CannotBeAssignedFromOtherTypes()) { @@ -313,6 +318,15 @@ void ReferenceTypePropagation::BoundTypeForIfInstanceOf(HBasicBlock* block) { return; } + HLoadClass* load_class = instanceOf->InputAt(1)->AsLoadClass(); + ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI(); + { + ScopedObjectAccess soa(Thread::Current()); + if (!class_rti.IsValid()) { + // He have loaded an unresolved class. Don't bother bounding the type. + return; + } + } // We only need to bound the type if we have uses in the relevant block. // So start with null and create the HBoundType lazily, only if it's needed. HBoundType* bound_type = nullptr; @@ -333,8 +347,6 @@ void ReferenceTypePropagation::BoundTypeForIfInstanceOf(HBasicBlock* block) { if (instanceOfTrueBlock->Dominates(user->GetBlock())) { if (bound_type == nullptr) { ScopedObjectAccess soa(Thread::Current()); - HLoadClass* load_class = instanceOf->InputAt(1)->AsLoadClass(); - ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI(); HInstruction* insert_point = instanceOfTrueBlock->GetFirstInstruction(); if (ShouldCreateBoundType(insert_point, obj, class_rti, nullptr, instanceOfTrueBlock)) { bound_type = CreateBoundType( @@ -449,6 +461,22 @@ void RTPVisitor::VisitStaticFieldGet(HStaticFieldGet* instr) { UpdateFieldAccessTypeInfo(instr, instr->GetFieldInfo()); } +void RTPVisitor::VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* instr) { + // TODO: Use descriptor to get the actual type. + if (instr->GetFieldType() == Primitive::kPrimNot) { + instr->SetReferenceTypeInfo( + ReferenceTypeInfo::Create(object_class_handle_, /* is_exact */ false)); + } +} + +void RTPVisitor::VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet* instr) { + // TODO: Use descriptor to get the actual type. + if (instr->GetFieldType() == Primitive::kPrimNot) { + instr->SetReferenceTypeInfo( + ReferenceTypeInfo::Create(object_class_handle_, /* is_exact */ false)); + } +} + void RTPVisitor::VisitLoadClass(HLoadClass* instr) { ScopedObjectAccess soa(Thread::Current()); mirror::DexCache* dex_cache = @@ -456,10 +484,10 @@ void RTPVisitor::VisitLoadClass(HLoadClass* instr) { // Get type from dex cache assuming it was populated by the verifier. mirror::Class* resolved_class = dex_cache->GetResolvedType(instr->GetTypeIndex()); // TODO: investigating why we are still getting unresolved classes: b/22821472. - ReferenceTypeInfo::TypeHandle handle = (resolved_class != nullptr) - ? handles_->NewHandle(resolved_class) - : object_class_handle_; - instr->SetLoadedClassRTI(ReferenceTypeInfo::Create(handle, /* is_exact */ true)); + if (resolved_class != nullptr) { + instr->SetLoadedClassRTI(ReferenceTypeInfo::Create( + handles_->NewHandle(resolved_class), /* is_exact */ true)); + } instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(class_class_handle_, /* is_exact */ true)); } @@ -498,6 +526,15 @@ void RTPVisitor::VisitFakeString(HFakeString* instr) { } void RTPVisitor::VisitCheckCast(HCheckCast* check_cast) { + HLoadClass* load_class = check_cast->InputAt(1)->AsLoadClass(); + ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI(); + { + ScopedObjectAccess soa(Thread::Current()); + if (!class_rti.IsValid()) { + // He have loaded an unresolved class. Don't bother bounding the type. + return; + } + } HInstruction* obj = check_cast->InputAt(0); HBoundType* bound_type = nullptr; for (HUseIterator<HInstruction*> it(obj->GetUses()); !it.Done(); it.Advance()) { @@ -505,8 +542,6 @@ void RTPVisitor::VisitCheckCast(HCheckCast* check_cast) { if (check_cast->StrictlyDominates(user)) { if (bound_type == nullptr) { ScopedObjectAccess soa(Thread::Current()); - HLoadClass* load_class = check_cast->InputAt(1)->AsLoadClass(); - ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI(); if (ShouldCreateBoundType(check_cast->GetNext(), obj, class_rti, check_cast, nullptr)) { bound_type = CreateBoundType( GetGraph()->GetArena(), @@ -649,7 +684,7 @@ void RTPVisitor::VisitArrayGet(HArrayGet* instr) { ScopedObjectAccess soa(Thread::Current()); UpdateArrayGet(instr, handles_, object_class_handle_); if (!instr->GetReferenceTypeInfo().IsValid()) { - worklist_->Add(instr); + worklist_->push_back(instr); } } @@ -718,8 +753,9 @@ bool ReferenceTypePropagation::UpdateNullability(HInstruction* instr) { } void ReferenceTypePropagation::ProcessWorklist() { - while (!worklist_.IsEmpty()) { - HInstruction* instruction = worklist_.Pop(); + while (!worklist_.empty()) { + HInstruction* instruction = worklist_.back(); + worklist_.pop_back(); if (UpdateNullability(instruction) || UpdateReferenceTypeInfo(instruction)) { AddDependentInstructionsToWorklist(instruction); } @@ -729,7 +765,7 @@ void ReferenceTypePropagation::ProcessWorklist() { void ReferenceTypePropagation::AddToWorklist(HInstruction* instruction) { DCHECK_EQ(instruction->GetType(), Primitive::kPrimNot) << instruction->DebugName() << ":" << instruction->GetType(); - worklist_.Add(instruction); + worklist_.push_back(instruction); } void ReferenceTypePropagation::AddDependentInstructionsToWorklist(HInstruction* instruction) { diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h index 62f6ab80b3..5493601adc 100644 --- a/compiler/optimizing/reference_type_propagation.h +++ b/compiler/optimizing/reference_type_propagation.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_REFERENCE_TYPE_PROPAGATION_H_ #define ART_COMPILER_OPTIMIZING_REFERENCE_TYPE_PROPAGATION_H_ +#include "base/arena_containers.h" #include "driver/dex_compilation_unit.h" #include "handle_scope-inl.h" #include "nodes.h" @@ -57,7 +58,7 @@ class ReferenceTypePropagation : public HOptimization { StackHandleScopeCollection* handles_; - GrowableArray<HInstruction*> worklist_; + ArenaVector<HInstruction*> worklist_; ReferenceTypeInfo::TypeHandle object_class_handle_; ReferenceTypeInfo::TypeHandle class_class_handle_; diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index 9594e3b8e1..9cdb89b7b3 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -43,21 +43,21 @@ RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator, : allocator_(allocator), codegen_(codegen), liveness_(liveness), - unhandled_core_intervals_(allocator, 0), - unhandled_fp_intervals_(allocator, 0), + unhandled_core_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)), + unhandled_fp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)), unhandled_(nullptr), - handled_(allocator, 0), - active_(allocator, 0), - inactive_(allocator, 0), - physical_core_register_intervals_(allocator, codegen->GetNumberOfCoreRegisters()), - physical_fp_register_intervals_(allocator, codegen->GetNumberOfFloatingPointRegisters()), - temp_intervals_(allocator, 4), - int_spill_slots_(allocator, kDefaultNumberOfSpillSlots), - long_spill_slots_(allocator, kDefaultNumberOfSpillSlots), - float_spill_slots_(allocator, kDefaultNumberOfSpillSlots), - double_spill_slots_(allocator, kDefaultNumberOfSpillSlots), + handled_(allocator->Adapter(kArenaAllocRegisterAllocator)), + active_(allocator->Adapter(kArenaAllocRegisterAllocator)), + inactive_(allocator->Adapter(kArenaAllocRegisterAllocator)), + physical_core_register_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)), + physical_fp_register_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)), + temp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)), + int_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)), + long_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)), + float_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)), + double_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)), catch_phi_spill_slots_(0), - safepoints_(allocator, 0), + safepoints_(allocator->Adapter(kArenaAllocRegisterAllocator)), processing_core_registers_(false), number_of_registers_(-1), registers_array_(nullptr), @@ -66,10 +66,16 @@ RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator, reserved_out_slots_(0), maximum_number_of_live_core_registers_(0), maximum_number_of_live_fp_registers_(0) { + temp_intervals_.reserve(4); + int_spill_slots_.reserve(kDefaultNumberOfSpillSlots); + long_spill_slots_.reserve(kDefaultNumberOfSpillSlots); + float_spill_slots_.reserve(kDefaultNumberOfSpillSlots); + double_spill_slots_.reserve(kDefaultNumberOfSpillSlots); + static constexpr bool kIsBaseline = false; codegen->SetupBlockedRegisters(kIsBaseline); - physical_core_register_intervals_.SetSize(codegen->GetNumberOfCoreRegisters()); - physical_fp_register_intervals_.SetSize(codegen->GetNumberOfFloatingPointRegisters()); + physical_core_register_intervals_.resize(codegen->GetNumberOfCoreRegisters(), nullptr); + physical_fp_register_intervals_.resize(codegen->GetNumberOfFloatingPointRegisters(), nullptr); // Always reserve for the current method and the graph's max out registers. // TODO: compute it instead. // ArtMethod* takes 2 vregs for 64 bits. @@ -129,17 +135,17 @@ void RegisterAllocator::BlockRegister(Location location, size_t start, size_t en int reg = location.reg(); DCHECK(location.IsRegister() || location.IsFpuRegister()); LiveInterval* interval = location.IsRegister() - ? physical_core_register_intervals_.Get(reg) - : physical_fp_register_intervals_.Get(reg); + ? physical_core_register_intervals_[reg] + : physical_fp_register_intervals_[reg]; Primitive::Type type = location.IsRegister() ? Primitive::kPrimInt : Primitive::kPrimFloat; if (interval == nullptr) { interval = LiveInterval::MakeFixedInterval(allocator_, reg, type); if (location.IsRegister()) { - physical_core_register_intervals_.Put(reg, interval); + physical_core_register_intervals_[reg] = interval; } else { - physical_fp_register_intervals_.Put(reg, interval); + physical_fp_register_intervals_[reg] = interval; } } DCHECK(interval->GetRegister() == reg); @@ -181,37 +187,37 @@ void RegisterAllocator::AllocateRegistersInternal() { } number_of_registers_ = codegen_->GetNumberOfCoreRegisters(); - registers_array_ = allocator_->AllocArray<size_t>(number_of_registers_); + registers_array_ = allocator_->AllocArray<size_t>(number_of_registers_, + kArenaAllocRegisterAllocator); processing_core_registers_ = true; unhandled_ = &unhandled_core_intervals_; - for (size_t i = 0, e = physical_core_register_intervals_.Size(); i < e; ++i) { - LiveInterval* fixed = physical_core_register_intervals_.Get(i); + for (LiveInterval* fixed : physical_core_register_intervals_) { if (fixed != nullptr) { // Fixed interval is added to inactive_ instead of unhandled_. // It's also the only type of inactive interval whose start position // can be after the current interval during linear scan. // Fixed interval is never split and never moves to unhandled_. - inactive_.Add(fixed); + inactive_.push_back(fixed); } } LinearScan(); - inactive_.Reset(); - active_.Reset(); - handled_.Reset(); + inactive_.clear(); + active_.clear(); + handled_.clear(); number_of_registers_ = codegen_->GetNumberOfFloatingPointRegisters(); - registers_array_ = allocator_->AllocArray<size_t>(number_of_registers_); + registers_array_ = allocator_->AllocArray<size_t>(number_of_registers_, + kArenaAllocRegisterAllocator); processing_core_registers_ = false; unhandled_ = &unhandled_fp_intervals_; - for (size_t i = 0, e = physical_fp_register_intervals_.Size(); i < e; ++i) { - LiveInterval* fixed = physical_fp_register_intervals_.Get(i); + for (LiveInterval* fixed : physical_fp_register_intervals_) { if (fixed != nullptr) { // Fixed interval is added to inactive_ instead of unhandled_. // It's also the only type of inactive interval whose start position // can be after the current interval during linear scan. // Fixed interval is never split and never moves to unhandled_. - inactive_.Add(fixed); + inactive_.push_back(fixed); } } LinearScan(); @@ -236,24 +242,24 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) { case Location::kRequiresRegister: { LiveInterval* interval = LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimInt); - temp_intervals_.Add(interval); + temp_intervals_.push_back(interval); interval->AddTempUse(instruction, i); - unhandled_core_intervals_.Add(interval); + unhandled_core_intervals_.push_back(interval); break; } case Location::kRequiresFpuRegister: { LiveInterval* interval = LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimDouble); - temp_intervals_.Add(interval); + temp_intervals_.push_back(interval); interval->AddTempUse(instruction, i); if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) { interval->AddHighInterval(/* is_temp */ true); LiveInterval* high = interval->GetHighInterval(); - temp_intervals_.Add(high); - unhandled_fp_intervals_.Add(high); + temp_intervals_.push_back(high); + unhandled_fp_intervals_.push_back(high); } - unhandled_fp_intervals_.Add(interval); + unhandled_fp_intervals_.push_back(interval); break; } @@ -276,7 +282,7 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) { instruction->GetBlock()->RemoveInstruction(instruction); return; } - safepoints_.Add(instruction); + safepoints_.push_back(instruction); if (locations->OnlyCallsOnSlowPath()) { // We add a synthesized range at this position to record the live registers // at this position. Ideally, we could just update the safepoints when locations @@ -310,28 +316,28 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) { LiveInterval* current = instruction->GetLiveInterval(); if (current == nullptr) return; - GrowableArray<LiveInterval*>& unhandled = core_register + ArenaVector<LiveInterval*>& unhandled = core_register ? unhandled_core_intervals_ : unhandled_fp_intervals_; - DCHECK(unhandled.IsEmpty() || current->StartsBeforeOrAt(unhandled.Peek())); + DCHECK(unhandled.empty() || current->StartsBeforeOrAt(unhandled.back())); if (codegen_->NeedsTwoRegisters(current->GetType())) { current->AddHighInterval(); } - for (size_t safepoint_index = safepoints_.Size(); safepoint_index > 0; --safepoint_index) { - HInstruction* safepoint = safepoints_.Get(safepoint_index - 1); + for (size_t safepoint_index = safepoints_.size(); safepoint_index > 0; --safepoint_index) { + HInstruction* safepoint = safepoints_[safepoint_index - 1u]; size_t safepoint_position = safepoint->GetLifetimePosition(); // Test that safepoints are ordered in the optimal way. - DCHECK(safepoint_index == safepoints_.Size() - || safepoints_.Get(safepoint_index)->GetLifetimePosition() < safepoint_position); + DCHECK(safepoint_index == safepoints_.size() || + safepoints_[safepoint_index]->GetLifetimePosition() < safepoint_position); if (safepoint_position == current->GetStart()) { // The safepoint is for this instruction, so the location of the instruction // does not need to be saved. - DCHECK_EQ(safepoint_index, safepoints_.Size()); + DCHECK_EQ(safepoint_index, safepoints_.size()); DCHECK_EQ(safepoint, instruction); continue; } else if (current->IsDeadAt(safepoint_position)) { @@ -437,34 +443,26 @@ class AllRangesIterator : public ValueObject { bool RegisterAllocator::ValidateInternal(bool log_fatal_on_failure) const { // To simplify unit testing, we eagerly create the array of intervals, and // call the helper method. - GrowableArray<LiveInterval*> intervals(allocator_, 0); + ArenaVector<LiveInterval*> intervals(allocator_->Adapter(kArenaAllocRegisterAllocator)); for (size_t i = 0; i < liveness_.GetNumberOfSsaValues(); ++i) { HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i); if (ShouldProcess(processing_core_registers_, instruction->GetLiveInterval())) { - intervals.Add(instruction->GetLiveInterval()); + intervals.push_back(instruction->GetLiveInterval()); } } - if (processing_core_registers_) { - for (size_t i = 0, e = physical_core_register_intervals_.Size(); i < e; ++i) { - LiveInterval* fixed = physical_core_register_intervals_.Get(i); - if (fixed != nullptr) { - intervals.Add(fixed); - } - } - } else { - for (size_t i = 0, e = physical_fp_register_intervals_.Size(); i < e; ++i) { - LiveInterval* fixed = physical_fp_register_intervals_.Get(i); - if (fixed != nullptr) { - intervals.Add(fixed); - } + const ArenaVector<LiveInterval*>* physical_register_intervals = processing_core_registers_ + ? &physical_core_register_intervals_ + : &physical_fp_register_intervals_; + for (LiveInterval* fixed : *physical_register_intervals) { + if (fixed != nullptr) { + intervals.push_back(fixed); } } - for (size_t i = 0, e = temp_intervals_.Size(); i < e; ++i) { - LiveInterval* temp = temp_intervals_.Get(i); + for (LiveInterval* temp : temp_intervals_) { if (ShouldProcess(processing_core_registers_, temp)) { - intervals.Add(temp); + intervals.push_back(temp); } } @@ -472,7 +470,7 @@ bool RegisterAllocator::ValidateInternal(bool log_fatal_on_failure) const { allocator_, processing_core_registers_, log_fatal_on_failure); } -bool RegisterAllocator::ValidateIntervals(const GrowableArray<LiveInterval*>& intervals, +bool RegisterAllocator::ValidateIntervals(const ArenaVector<LiveInterval*>& intervals, size_t number_of_spill_slots, size_t number_of_out_slots, const CodeGenerator& codegen, @@ -482,26 +480,27 @@ bool RegisterAllocator::ValidateIntervals(const GrowableArray<LiveInterval*>& in size_t number_of_registers = processing_core_registers ? codegen.GetNumberOfCoreRegisters() : codegen.GetNumberOfFloatingPointRegisters(); - GrowableArray<ArenaBitVector*> liveness_of_values( - allocator, number_of_registers + number_of_spill_slots); + ArenaVector<ArenaBitVector*> liveness_of_values( + allocator->Adapter(kArenaAllocRegisterAllocator)); + liveness_of_values.reserve(number_of_registers + number_of_spill_slots); // Allocate a bit vector per register. A live interval that has a register // allocated will populate the associated bit vector based on its live ranges. for (size_t i = 0; i < number_of_registers + number_of_spill_slots; ++i) { - liveness_of_values.Add(new (allocator) ArenaBitVector(allocator, 0, true)); + liveness_of_values.push_back(new (allocator) ArenaBitVector(allocator, 0, true)); } - for (size_t i = 0, e = intervals.Size(); i < e; ++i) { - for (AllRangesIterator it(intervals.Get(i)); !it.Done(); it.Advance()) { + for (LiveInterval* start_interval : intervals) { + for (AllRangesIterator it(start_interval); !it.Done(); it.Advance()) { LiveInterval* current = it.CurrentInterval(); HInstruction* defined_by = current->GetParent()->GetDefinedBy(); if (current->GetParent()->HasSpillSlot() // Parameters and current method have their own stack slot. && !(defined_by != nullptr && (defined_by->IsParameterValue() || defined_by->IsCurrentMethod()))) { - BitVector* liveness_of_spill_slot = liveness_of_values.Get(number_of_registers + BitVector* liveness_of_spill_slot = liveness_of_values[number_of_registers + current->GetParent()->GetSpillSlot() / kVRegSize - - number_of_out_slots); + - number_of_out_slots]; for (size_t j = it.CurrentRange()->GetStart(); j < it.CurrentRange()->GetEnd(); ++j) { if (liveness_of_spill_slot->IsBitSet(j)) { if (log_fatal_on_failure) { @@ -523,7 +522,7 @@ bool RegisterAllocator::ValidateIntervals(const GrowableArray<LiveInterval*>& in // and test code may not properly fill the right information to the code generator. CHECK(codegen.HasAllocatedRegister(processing_core_registers, current->GetRegister())); } - BitVector* liveness_of_register = liveness_of_values.Get(current->GetRegister()); + BitVector* liveness_of_register = liveness_of_values[current->GetRegister()]; for (size_t j = it.CurrentRange()->GetStart(); j < it.CurrentRange()->GetEnd(); ++j) { if (liveness_of_register->IsBitSet(j)) { if (current->IsUsingInputRegister() && current->CanUseInputRegister()) { @@ -572,93 +571,101 @@ void RegisterAllocator::DumpInterval(std::ostream& stream, LiveInterval* interva void RegisterAllocator::DumpAllIntervals(std::ostream& stream) const { stream << "inactive: " << std::endl; - for (size_t i = 0; i < inactive_.Size(); i ++) { - DumpInterval(stream, inactive_.Get(i)); + for (LiveInterval* inactive_interval : inactive_) { + DumpInterval(stream, inactive_interval); } stream << "active: " << std::endl; - for (size_t i = 0; i < active_.Size(); i ++) { - DumpInterval(stream, active_.Get(i)); + for (LiveInterval* active_interval : active_) { + DumpInterval(stream, active_interval); } stream << "unhandled: " << std::endl; auto unhandled = (unhandled_ != nullptr) ? unhandled_ : &unhandled_core_intervals_; - for (size_t i = 0; i < unhandled->Size(); i ++) { - DumpInterval(stream, unhandled->Get(i)); + for (LiveInterval* unhandled_interval : *unhandled) { + DumpInterval(stream, unhandled_interval); } stream << "handled: " << std::endl; - for (size_t i = 0; i < handled_.Size(); i ++) { - DumpInterval(stream, handled_.Get(i)); + for (LiveInterval* handled_interval : handled_) { + DumpInterval(stream, handled_interval); } } // By the book implementation of a linear scan register allocator. void RegisterAllocator::LinearScan() { - while (!unhandled_->IsEmpty()) { + while (!unhandled_->empty()) { // (1) Remove interval with the lowest start position from unhandled. - LiveInterval* current = unhandled_->Pop(); + LiveInterval* current = unhandled_->back(); + unhandled_->pop_back(); // Make sure the interval is an expected state. DCHECK(!current->IsFixed() && !current->HasSpillSlot()); // Make sure we are going in the right order. - DCHECK(unhandled_->IsEmpty() || unhandled_->Peek()->GetStart() >= current->GetStart()); + DCHECK(unhandled_->empty() || unhandled_->back()->GetStart() >= current->GetStart()); // Make sure a low interval is always with a high. - DCHECK(!current->IsLowInterval() || unhandled_->Peek()->IsHighInterval()); + DCHECK(!current->IsLowInterval() || unhandled_->back()->IsHighInterval()); // Make sure a high interval is always with a low. DCHECK(current->IsLowInterval() || - unhandled_->IsEmpty() || - !unhandled_->Peek()->IsHighInterval()); + unhandled_->empty() || + !unhandled_->back()->IsHighInterval()); size_t position = current->GetStart(); // Remember the inactive_ size here since the ones moved to inactive_ from // active_ below shouldn't need to be re-checked. - size_t inactive_intervals_to_handle = inactive_.Size(); + size_t inactive_intervals_to_handle = inactive_.size(); // (2) Remove currently active intervals that are dead at this position. // Move active intervals that have a lifetime hole at this position // to inactive. - for (size_t i = 0; i < active_.Size(); ++i) { - LiveInterval* interval = active_.Get(i); + // Note: Copy elements we keep to the beginning, just like + // v.erase(std::remove(v.begin(), v.end(), value), v.end()); + auto active_kept_end = active_.begin(); + for (auto it = active_.begin(), end = active_.end(); it != end; ++it) { + LiveInterval* interval = *it; if (interval->IsDeadAt(position)) { - active_.Delete(interval); - --i; - handled_.Add(interval); + handled_.push_back(interval); } else if (!interval->Covers(position)) { - active_.Delete(interval); - --i; - inactive_.Add(interval); + inactive_.push_back(interval); + } else { + *active_kept_end++ = interval; // Keep this interval. } } + // We have copied what we want to keep to [active_.begin(), active_kept_end), + // the rest of the data in active_ is junk - drop it. + active_.erase(active_kept_end, active_.end()); // (3) Remove currently inactive intervals that are dead at this position. // Move inactive intervals that cover this position to active. - for (size_t i = 0; i < inactive_intervals_to_handle; ++i) { - LiveInterval* interval = inactive_.Get(i); + // Note: Copy elements we keep to the beginning, just like + // v.erase(std::remove(v.begin(), v.begin() + num, value), v.begin() + num); + auto inactive_kept_end = inactive_.begin(); + auto inactive_to_handle_end = inactive_.begin() + inactive_intervals_to_handle; + for (auto it = inactive_.begin(); it != inactive_to_handle_end; ++it) { + LiveInterval* interval = *it; DCHECK(interval->GetStart() < position || interval->IsFixed()); if (interval->IsDeadAt(position)) { - inactive_.Delete(interval); - --i; - --inactive_intervals_to_handle; - handled_.Add(interval); + handled_.push_back(interval); } else if (interval->Covers(position)) { - inactive_.Delete(interval); - --i; - --inactive_intervals_to_handle; - active_.Add(interval); + active_.push_back(interval); + } else { + *inactive_kept_end++ = interval; // Keep this interval. } } + // We have copied what we want to keep to [inactive_.begin(), inactive_kept_end), + // the rest of the data in the processed interval is junk - drop it. + inactive_.erase(inactive_kept_end, inactive_to_handle_end); if (current->IsSlowPathSafepoint()) { // Synthesized interval to record the maximum number of live registers // at safepoints. No need to allocate a register for it. if (processing_core_registers_) { maximum_number_of_live_core_registers_ = - std::max(maximum_number_of_live_core_registers_, active_.Size()); + std::max(maximum_number_of_live_core_registers_, active_.size()); } else { maximum_number_of_live_fp_registers_ = - std::max(maximum_number_of_live_fp_registers_, active_.Size()); + std::max(maximum_number_of_live_fp_registers_, active_.size()); } - DCHECK(unhandled_->IsEmpty() || unhandled_->Peek()->GetStart() > current->GetStart()); + DCHECK(unhandled_->empty() || unhandled_->back()->GetStart() > current->GetStart()); continue; } @@ -683,7 +690,7 @@ void RegisterAllocator::LinearScan() { codegen_->AddAllocatedRegister(processing_core_registers_ ? Location::RegisterLocation(current->GetRegister()) : Location::FpuRegisterLocation(current->GetRegister())); - active_.Add(current); + active_.push_back(current); if (current->HasHighInterval() && !current->GetHighInterval()->HasRegister()) { current->GetHighInterval()->SetRegister(GetHighForLowRegister(current->GetRegister())); } @@ -726,8 +733,7 @@ bool RegisterAllocator::TryAllocateFreeReg(LiveInterval* current) { } // For each active interval, set its register to not free. - for (size_t i = 0, e = active_.Size(); i < e; ++i) { - LiveInterval* interval = active_.Get(i); + for (LiveInterval* interval : active_) { DCHECK(interval->HasRegister()); free_until[interval->GetRegister()] = 0; } @@ -762,8 +768,7 @@ bool RegisterAllocator::TryAllocateFreeReg(LiveInterval* current) { // For each inactive interval, set its register to be free until // the next intersection with `current`. - for (size_t i = 0, e = inactive_.Size(); i < e; ++i) { - LiveInterval* inactive = inactive_.Get(i); + for (LiveInterval* inactive : inactive_) { // Temp/Slow-path-safepoint interval has no holes. DCHECK(!inactive->IsTemp() && !inactive->IsSlowPathSafepoint()); if (!current->IsSplit() && !inactive->IsFixed()) { @@ -923,11 +928,29 @@ int RegisterAllocator::FindAvailableRegister(size_t* next_use, LiveInterval* cur return reg; } +// Remove interval and its other half if any. Return iterator to the following element. +static ArenaVector<LiveInterval*>::iterator RemoveIntervalAndPotentialOtherHalf( + ArenaVector<LiveInterval*>* intervals, ArenaVector<LiveInterval*>::iterator pos) { + DCHECK(intervals->begin() <= pos && pos < intervals->end()); + LiveInterval* interval = *pos; + if (interval->IsLowInterval()) { + DCHECK(pos + 1 < intervals->end()); + DCHECK_EQ(*(pos + 1), interval->GetHighInterval()); + return intervals->erase(pos, pos + 2); + } else if (interval->IsHighInterval()) { + DCHECK(intervals->begin() < pos); + DCHECK_EQ(*(pos - 1), interval->GetLowInterval()); + return intervals->erase(pos - 1, pos + 1); + } else { + return intervals->erase(pos); + } +} + bool RegisterAllocator::TrySplitNonPairOrUnalignedPairIntervalAt(size_t position, size_t first_register_use, size_t* next_use) { - for (size_t i = 0, e = active_.Size(); i < e; ++i) { - LiveInterval* active = active_.Get(i); + for (auto it = active_.begin(), end = active_.end(); it != end; ++it) { + LiveInterval* active = *it; DCHECK(active->HasRegister()); if (active->IsFixed()) continue; if (active->IsHighInterval()) continue; @@ -941,11 +964,10 @@ bool RegisterAllocator::TrySplitNonPairOrUnalignedPairIntervalAt(size_t position IsLowOfUnalignedPairInterval(active) || !IsLowRegister(active->GetRegister())) { LiveInterval* split = Split(active, position); - active_.DeleteAt(i); if (split != active) { - handled_.Add(active); + handled_.push_back(active); } - PotentiallyRemoveOtherHalf(active, &active_, i); + RemoveIntervalAndPotentialOtherHalf(&active_, it); AddSorted(unhandled_, split); return true; } @@ -953,23 +975,6 @@ bool RegisterAllocator::TrySplitNonPairOrUnalignedPairIntervalAt(size_t position return false; } -bool RegisterAllocator::PotentiallyRemoveOtherHalf(LiveInterval* interval, - GrowableArray<LiveInterval*>* intervals, - size_t index) { - if (interval->IsLowInterval()) { - DCHECK_EQ(intervals->Get(index), interval->GetHighInterval()); - intervals->DeleteAt(index); - return true; - } else if (interval->IsHighInterval()) { - DCHECK_GT(index, 0u); - DCHECK_EQ(intervals->Get(index - 1), interval->GetLowInterval()); - intervals->DeleteAt(index - 1); - return true; - } else { - return false; - } -} - // Find the register that is used the last, and spill the interval // that holds it. If the first use of `current` is after that register // we spill `current` instead. @@ -1001,8 +1006,7 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { // For each active interval, find the next use of its register after the // start of current. - for (size_t i = 0, e = active_.Size(); i < e; ++i) { - LiveInterval* active = active_.Get(i); + for (LiveInterval* active : active_) { DCHECK(active->HasRegister()); if (active->IsFixed()) { next_use[active->GetRegister()] = current->GetStart(); @@ -1016,8 +1020,7 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { // For each inactive interval, find the next use of its register after the // start of current. - for (size_t i = 0, e = inactive_.Size(); i < e; ++i) { - LiveInterval* inactive = inactive_.Get(i); + for (LiveInterval* inactive : inactive_) { // Temp/Slow-path-safepoint interval has no holes. DCHECK(!inactive->IsTemp() && !inactive->IsSlowPathSafepoint()); if (!current->IsSplit() && !inactive->IsFixed()) { @@ -1087,10 +1090,10 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { first_register_use, next_use); DCHECK(success); - LiveInterval* existing = unhandled_->Peek(); + LiveInterval* existing = unhandled_->back(); DCHECK(existing->IsHighInterval()); DCHECK_EQ(existing->GetLowInterval(), current); - unhandled_->Add(current); + unhandled_->push_back(current); } else { // If the first use of that instruction is after the last use of the found // register, we split this interval just before its first register use. @@ -1105,23 +1108,24 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { // have that register. current->SetRegister(reg); - for (size_t i = 0, e = active_.Size(); i < e; ++i) { - LiveInterval* active = active_.Get(i); + for (auto it = active_.begin(), end = active_.end(); it != end; ++it) { + LiveInterval* active = *it; if (active->GetRegister() == reg) { DCHECK(!active->IsFixed()); LiveInterval* split = Split(active, current->GetStart()); if (split != active) { - handled_.Add(active); + handled_.push_back(active); } - active_.DeleteAt(i); - PotentiallyRemoveOtherHalf(active, &active_, i); + RemoveIntervalAndPotentialOtherHalf(&active_, it); AddSorted(unhandled_, split); break; } } - for (size_t i = 0; i < inactive_.Size(); ++i) { - LiveInterval* inactive = inactive_.Get(i); + // NOTE: Retrieve end() on each iteration because we're removing elements in the loop body. + for (auto it = inactive_.begin(); it != inactive_.end(); ) { + LiveInterval* inactive = *it; + bool erased = false; if (inactive->GetRegister() == reg) { if (!current->IsSplit() && !inactive->IsFixed()) { // Neither current nor inactive are fixed. @@ -1129,43 +1133,43 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { // inactive interval should never intersect with that inactive interval. // Only if it's not fixed though, because fixed intervals don't come from SSA. DCHECK_EQ(inactive->FirstIntersectionWith(current), kNoLifetime); - continue; - } - size_t next_intersection = inactive->FirstIntersectionWith(current); - if (next_intersection != kNoLifetime) { - if (inactive->IsFixed()) { - LiveInterval* split = Split(current, next_intersection); - DCHECK_NE(split, current); - AddSorted(unhandled_, split); - } else { - // Split at the start of `current`, which will lead to splitting - // at the end of the lifetime hole of `inactive`. - LiveInterval* split = Split(inactive, current->GetStart()); - // If it's inactive, it must start before the current interval. - DCHECK_NE(split, inactive); - inactive_.DeleteAt(i); - if (PotentiallyRemoveOtherHalf(inactive, &inactive_, i) && inactive->IsHighInterval()) { - // We have removed an entry prior to `inactive`. So we need to decrement. - --i; + } else { + size_t next_intersection = inactive->FirstIntersectionWith(current); + if (next_intersection != kNoLifetime) { + if (inactive->IsFixed()) { + LiveInterval* split = Split(current, next_intersection); + DCHECK_NE(split, current); + AddSorted(unhandled_, split); + } else { + // Split at the start of `current`, which will lead to splitting + // at the end of the lifetime hole of `inactive`. + LiveInterval* split = Split(inactive, current->GetStart()); + // If it's inactive, it must start before the current interval. + DCHECK_NE(split, inactive); + it = RemoveIntervalAndPotentialOtherHalf(&inactive_, it); + erased = true; + handled_.push_back(inactive); + AddSorted(unhandled_, split); } - // Decrement because we have removed `inactive` from the list. - --i; - handled_.Add(inactive); - AddSorted(unhandled_, split); } } } + // If we have erased the element, `it` already points to the next element. + // Otherwise we need to move to the next element. + if (!erased) { + ++it; + } } return true; } } -void RegisterAllocator::AddSorted(GrowableArray<LiveInterval*>* array, LiveInterval* interval) { +void RegisterAllocator::AddSorted(ArenaVector<LiveInterval*>* array, LiveInterval* interval) { DCHECK(!interval->IsFixed() && !interval->HasSpillSlot()); size_t insert_at = 0; - for (size_t i = array->Size(); i > 0; --i) { - LiveInterval* current = array->Get(i - 1); + for (size_t i = array->size(); i > 0; --i) { + LiveInterval* current = (*array)[i - 1u]; // High intervals must be processed right after their low equivalent. if (current->StartsAfter(interval) && !current->IsHighInterval()) { insert_at = i; @@ -1173,18 +1177,20 @@ void RegisterAllocator::AddSorted(GrowableArray<LiveInterval*>* array, LiveInter } else if ((current->GetStart() == interval->GetStart()) && current->IsSlowPathSafepoint()) { // Ensure the slow path interval is the last to be processed at its location: we want the // interval to know all live registers at this location. - DCHECK(i == 1 || array->Get(i - 2)->StartsAfter(current)); + DCHECK(i == 1 || (*array)[i - 2u]->StartsAfter(current)); insert_at = i; break; } } - array->InsertAt(insert_at, interval); // Insert the high interval before the low, to ensure the low is processed before. + auto insert_pos = array->begin() + insert_at; if (interval->HasHighInterval()) { - array->InsertAt(insert_at, interval->GetHighInterval()); + array->insert(insert_pos, { interval->GetHighInterval(), interval }); } else if (interval->HasLowInterval()) { - array->InsertAt(insert_at + 1, interval->GetLowInterval()); + array->insert(insert_pos, { interval, interval->GetLowInterval() }); + } else { + array->insert(insert_pos, interval); } } @@ -1309,7 +1315,7 @@ void RegisterAllocator::AllocateSpillSlotFor(LiveInterval* interval) { return; } - GrowableArray<size_t>* spill_slots = nullptr; + ArenaVector<size_t>* spill_slots = nullptr; switch (interval->GetType()) { case Primitive::kPrimDouble: spill_slots = &double_spill_slots_; @@ -1334,32 +1340,27 @@ void RegisterAllocator::AllocateSpillSlotFor(LiveInterval* interval) { // Find an available spill slot. size_t slot = 0; - for (size_t e = spill_slots->Size(); slot < e; ++slot) { - if (spill_slots->Get(slot) <= parent->GetStart() - && (slot == (e - 1) || spill_slots->Get(slot + 1) <= parent->GetStart())) { + for (size_t e = spill_slots->size(); slot < e; ++slot) { + if ((*spill_slots)[slot] <= parent->GetStart() + && (slot == (e - 1) || (*spill_slots)[slot + 1] <= parent->GetStart())) { break; } } size_t end = interval->GetLastSibling()->GetEnd(); if (parent->NeedsTwoSpillSlots()) { - if (slot == spill_slots->Size()) { + if (slot + 2u > spill_slots->size()) { // We need a new spill slot. - spill_slots->Add(end); - spill_slots->Add(end); - } else if (slot == spill_slots->Size() - 1) { - spill_slots->Put(slot, end); - spill_slots->Add(end); - } else { - spill_slots->Put(slot, end); - spill_slots->Put(slot + 1, end); + spill_slots->resize(slot + 2u, end); } + (*spill_slots)[slot] = end; + (*spill_slots)[slot + 1] = end; } else { - if (slot == spill_slots->Size()) { + if (slot == spill_slots->size()) { // We need a new spill slot. - spill_slots->Add(end); + spill_slots->push_back(end); } else { - spill_slots->Put(slot, end); + (*spill_slots)[slot] = end; } } @@ -1817,13 +1818,13 @@ void RegisterAllocator::Resolve() { size_t slot = current->GetSpillSlot(); switch (current->GetType()) { case Primitive::kPrimDouble: - slot += long_spill_slots_.Size(); + slot += long_spill_slots_.size(); FALLTHROUGH_INTENDED; case Primitive::kPrimLong: - slot += float_spill_slots_.Size(); + slot += float_spill_slots_.size(); FALLTHROUGH_INTENDED; case Primitive::kPrimFloat: - slot += int_spill_slots_.Size(); + slot += int_spill_slots_.size(); FALLTHROUGH_INTENDED; case Primitive::kPrimNot: case Primitive::kPrimInt: @@ -1906,8 +1907,7 @@ void RegisterAllocator::Resolve() { } // Assign temp locations. - for (size_t i = 0; i < temp_intervals_.Size(); ++i) { - LiveInterval* temp = temp_intervals_.Get(i); + for (LiveInterval* temp : temp_intervals_) { if (temp->IsHighInterval()) { // High intervals can be skipped, they are already handled by the low interval. continue; diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h index e0304643e6..58600b789b 100644 --- a/compiler/optimizing/register_allocator.h +++ b/compiler/optimizing/register_allocator.h @@ -18,9 +18,9 @@ #define ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_H_ #include "arch/instruction_set.h" +#include "base/arena_containers.h" #include "base/macros.h" #include "primitive.h" -#include "utils/growable_array.h" namespace art { @@ -59,7 +59,7 @@ class RegisterAllocator { } // Helper method for validation. Used by unit testing. - static bool ValidateIntervals(const GrowableArray<LiveInterval*>& intervals, + static bool ValidateIntervals(const ArenaVector<LiveInterval*>& intervals, size_t number_of_spill_slots, size_t number_of_out_slots, const CodeGenerator& codegen, @@ -70,10 +70,10 @@ class RegisterAllocator { static bool CanAllocateRegistersFor(const HGraph& graph, InstructionSet instruction_set); size_t GetNumberOfSpillSlots() const { - return int_spill_slots_.Size() - + long_spill_slots_.Size() - + float_spill_slots_.Size() - + double_spill_slots_.Size() + return int_spill_slots_.size() + + long_spill_slots_.size() + + float_spill_slots_.size() + + double_spill_slots_.size() + catch_phi_spill_slots_; } @@ -87,7 +87,7 @@ class RegisterAllocator { void Resolve(); // Add `interval` in the given sorted list. - static void AddSorted(GrowableArray<LiveInterval*>* array, LiveInterval* interval); + static void AddSorted(ArenaVector<LiveInterval*>* array, LiveInterval* interval); // Split `interval` at the position `position`. The new interval starts at `position`. LiveInterval* Split(LiveInterval* interval, size_t position); @@ -159,13 +159,6 @@ class RegisterAllocator { size_t first_register_use, size_t* next_use); - // If `interval` has another half, remove it from the list of `intervals`. - // `index` holds the index at which `interval` is in `intervals`. - // Returns whether there is another half. - bool PotentiallyRemoveOtherHalf(LiveInterval* interval, - GrowableArray<LiveInterval*>* intervals, - size_t index); - ArenaAllocator* const allocator_; CodeGenerator* const codegen_; const SsaLivenessAnalysis& liveness_; @@ -173,43 +166,43 @@ class RegisterAllocator { // List of intervals for core registers that must be processed, ordered by start // position. Last entry is the interval that has the lowest start position. // This list is initially populated before doing the linear scan. - GrowableArray<LiveInterval*> unhandled_core_intervals_; + ArenaVector<LiveInterval*> unhandled_core_intervals_; // List of intervals for floating-point registers. Same comments as above. - GrowableArray<LiveInterval*> unhandled_fp_intervals_; + ArenaVector<LiveInterval*> unhandled_fp_intervals_; // Currently processed list of unhandled intervals. Either `unhandled_core_intervals_` // or `unhandled_fp_intervals_`. - GrowableArray<LiveInterval*>* unhandled_; + ArenaVector<LiveInterval*>* unhandled_; // List of intervals that have been processed. - GrowableArray<LiveInterval*> handled_; + ArenaVector<LiveInterval*> handled_; // List of intervals that are currently active when processing a new live interval. // That is, they have a live range that spans the start of the new interval. - GrowableArray<LiveInterval*> active_; + ArenaVector<LiveInterval*> active_; // List of intervals that are currently inactive when processing a new live interval. // That is, they have a lifetime hole that spans the start of the new interval. - GrowableArray<LiveInterval*> inactive_; + ArenaVector<LiveInterval*> inactive_; // Fixed intervals for physical registers. Such intervals cover the positions // where an instruction requires a specific register. - GrowableArray<LiveInterval*> physical_core_register_intervals_; - GrowableArray<LiveInterval*> physical_fp_register_intervals_; + ArenaVector<LiveInterval*> physical_core_register_intervals_; + ArenaVector<LiveInterval*> physical_fp_register_intervals_; // Intervals for temporaries. Such intervals cover the positions // where an instruction requires a temporary. - GrowableArray<LiveInterval*> temp_intervals_; + ArenaVector<LiveInterval*> temp_intervals_; // The spill slots allocated for live intervals. We ensure spill slots // are typed to avoid (1) doing moves and swaps between two different kinds // of registers, and (2) swapping between a single stack slot and a double // stack slot. This simplifies the parallel move resolver. - GrowableArray<size_t> int_spill_slots_; - GrowableArray<size_t> long_spill_slots_; - GrowableArray<size_t> float_spill_slots_; - GrowableArray<size_t> double_spill_slots_; + ArenaVector<size_t> int_spill_slots_; + ArenaVector<size_t> long_spill_slots_; + ArenaVector<size_t> float_spill_slots_; + ArenaVector<size_t> double_spill_slots_; // Spill slots allocated to catch phis. This category is special-cased because // (1) slots are allocated prior to linear scan and in reverse linear order, @@ -217,7 +210,7 @@ class RegisterAllocator { size_t catch_phi_spill_slots_; // Instructions that need a safepoint. - GrowableArray<HInstruction*> safepoints_; + ArenaVector<HInstruction*> safepoints_; // True if processing core registers. False if processing floating // point registers. diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc index b72df868d3..2bb5a8bb08 100644 --- a/compiler/optimizing/register_allocator_test.cc +++ b/compiler/optimizing/register_allocator_test.cc @@ -64,83 +64,83 @@ TEST(RegisterAllocatorTest, ValidateIntervals) { std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - GrowableArray<LiveInterval*> intervals(&allocator, 0); + ArenaVector<LiveInterval*> intervals(allocator.Adapter()); // Test with two intervals of the same range. { static constexpr size_t ranges[][2] = {{0, 42}}; - intervals.Add(BuildInterval(ranges, arraysize(ranges), &allocator, 0)); - intervals.Add(BuildInterval(ranges, arraysize(ranges), &allocator, 1)); + intervals.push_back(BuildInterval(ranges, arraysize(ranges), &allocator, 0)); + intervals.push_back(BuildInterval(ranges, arraysize(ranges), &allocator, 1)); ASSERT_TRUE(RegisterAllocator::ValidateIntervals( intervals, 0, 0, codegen, &allocator, true, false)); - intervals.Get(1)->SetRegister(0); + intervals[1]->SetRegister(0); ASSERT_FALSE(RegisterAllocator::ValidateIntervals( intervals, 0, 0, codegen, &allocator, true, false)); - intervals.Reset(); + intervals.clear(); } // Test with two non-intersecting intervals. { static constexpr size_t ranges1[][2] = {{0, 42}}; - intervals.Add(BuildInterval(ranges1, arraysize(ranges1), &allocator, 0)); + intervals.push_back(BuildInterval(ranges1, arraysize(ranges1), &allocator, 0)); static constexpr size_t ranges2[][2] = {{42, 43}}; - intervals.Add(BuildInterval(ranges2, arraysize(ranges2), &allocator, 1)); + intervals.push_back(BuildInterval(ranges2, arraysize(ranges2), &allocator, 1)); ASSERT_TRUE(RegisterAllocator::ValidateIntervals( intervals, 0, 0, codegen, &allocator, true, false)); - intervals.Get(1)->SetRegister(0); + intervals[1]->SetRegister(0); ASSERT_TRUE(RegisterAllocator::ValidateIntervals( intervals, 0, 0, codegen, &allocator, true, false)); - intervals.Reset(); + intervals.clear(); } // Test with two non-intersecting intervals, with one with a lifetime hole. { static constexpr size_t ranges1[][2] = {{0, 42}, {45, 48}}; - intervals.Add(BuildInterval(ranges1, arraysize(ranges1), &allocator, 0)); + intervals.push_back(BuildInterval(ranges1, arraysize(ranges1), &allocator, 0)); static constexpr size_t ranges2[][2] = {{42, 43}}; - intervals.Add(BuildInterval(ranges2, arraysize(ranges2), &allocator, 1)); + intervals.push_back(BuildInterval(ranges2, arraysize(ranges2), &allocator, 1)); ASSERT_TRUE(RegisterAllocator::ValidateIntervals( intervals, 0, 0, codegen, &allocator, true, false)); - intervals.Get(1)->SetRegister(0); + intervals[1]->SetRegister(0); ASSERT_TRUE(RegisterAllocator::ValidateIntervals( intervals, 0, 0, codegen, &allocator, true, false)); - intervals.Reset(); + intervals.clear(); } // Test with intersecting intervals. { static constexpr size_t ranges1[][2] = {{0, 42}, {44, 48}}; - intervals.Add(BuildInterval(ranges1, arraysize(ranges1), &allocator, 0)); + intervals.push_back(BuildInterval(ranges1, arraysize(ranges1), &allocator, 0)); static constexpr size_t ranges2[][2] = {{42, 47}}; - intervals.Add(BuildInterval(ranges2, arraysize(ranges2), &allocator, 1)); + intervals.push_back(BuildInterval(ranges2, arraysize(ranges2), &allocator, 1)); ASSERT_TRUE(RegisterAllocator::ValidateIntervals( intervals, 0, 0, codegen, &allocator, true, false)); - intervals.Get(1)->SetRegister(0); + intervals[1]->SetRegister(0); ASSERT_FALSE(RegisterAllocator::ValidateIntervals( intervals, 0, 0, codegen, &allocator, true, false)); - intervals.Reset(); + intervals.clear(); } // Test with siblings. { static constexpr size_t ranges1[][2] = {{0, 42}, {44, 48}}; - intervals.Add(BuildInterval(ranges1, arraysize(ranges1), &allocator, 0)); - intervals.Get(0)->SplitAt(43); + intervals.push_back(BuildInterval(ranges1, arraysize(ranges1), &allocator, 0)); + intervals[0]->SplitAt(43); static constexpr size_t ranges2[][2] = {{42, 47}}; - intervals.Add(BuildInterval(ranges2, arraysize(ranges2), &allocator, 1)); + intervals.push_back(BuildInterval(ranges2, arraysize(ranges2), &allocator, 1)); ASSERT_TRUE(RegisterAllocator::ValidateIntervals( intervals, 0, 0, codegen, &allocator, true, false)); - intervals.Get(1)->SetRegister(0); + intervals[1]->SetRegister(0); // Sibling of the first interval has no register allocated to it. ASSERT_TRUE(RegisterAllocator::ValidateIntervals( intervals, 0, 0, codegen, &allocator, true, false)); - intervals.Get(0)->GetNextSibling()->SetRegister(0); + intervals[0]->GetNextSibling()->SetRegister(0); ASSERT_FALSE(RegisterAllocator::ValidateIntervals( intervals, 0, 0, codegen, &allocator, true, false)); } @@ -429,7 +429,7 @@ TEST(RegisterAllocatorTest, FreeUntil) { // Populate the instructions in the liveness object, to please the register allocator. for (size_t i = 0; i < 60; ++i) { - liveness.instructions_from_lifetime_position_.Add( + liveness.instructions_from_lifetime_position_.push_back( graph->GetEntryBlock()->GetFirstInstruction()); } @@ -442,15 +442,15 @@ TEST(RegisterAllocatorTest, FreeUntil) { // we do not depend on an order. LiveInterval* interval = LiveInterval::MakeFixedInterval(&allocator, 0, Primitive::kPrimInt); interval->AddRange(40, 50); - register_allocator.inactive_.Add(interval); + register_allocator.inactive_.push_back(interval); interval = LiveInterval::MakeFixedInterval(&allocator, 0, Primitive::kPrimInt); interval->AddRange(20, 30); - register_allocator.inactive_.Add(interval); + register_allocator.inactive_.push_back(interval); interval = LiveInterval::MakeFixedInterval(&allocator, 0, Primitive::kPrimInt); interval->AddRange(60, 70); - register_allocator.inactive_.Add(interval); + register_allocator.inactive_.push_back(interval); register_allocator.number_of_registers_ = 1; register_allocator.registers_array_ = allocator.AllocArray<size_t>(1); @@ -460,10 +460,10 @@ TEST(RegisterAllocatorTest, FreeUntil) { ASSERT_TRUE(register_allocator.TryAllocateFreeReg(unhandled)); // Check that we have split the interval. - ASSERT_EQ(1u, register_allocator.unhandled_->Size()); + ASSERT_EQ(1u, register_allocator.unhandled_->size()); // Check that we know need to find a new register where the next interval // that uses the register starts. - ASSERT_EQ(20u, register_allocator.unhandled_->Get(0)->GetStart()); + ASSERT_EQ(20u, register_allocator.unhandled_->front()->GetStart()); } static HGraph* BuildIfElseWithPhi(ArenaAllocator* allocator, @@ -678,7 +678,7 @@ TEST(RegisterAllocatorTest, ExpectedInRegisterHint) { // Check that the field gets put in the register expected by its use. // Don't use SetInAt because we are overriding an already allocated location. - ret->GetLocations()->inputs_.Put(0, Location::RegisterLocation(2)); + ret->GetLocations()->inputs_[0] = Location::RegisterLocation(2); RegisterAllocator register_allocator(&allocator, &codegen, liveness); register_allocator.AllocateRegisters(); @@ -885,14 +885,14 @@ TEST(RegisterAllocatorTest, SpillInactive) { SsaLivenessAnalysis liveness(graph, &codegen); // Populate the instructions in the liveness object, to please the register allocator. for (size_t i = 0; i < 32; ++i) { - liveness.instructions_from_lifetime_position_.Add(user); + liveness.instructions_from_lifetime_position_.push_back(user); } RegisterAllocator register_allocator(&allocator, &codegen, liveness); - register_allocator.unhandled_core_intervals_.Add(fourth); - register_allocator.unhandled_core_intervals_.Add(third); - register_allocator.unhandled_core_intervals_.Add(second); - register_allocator.unhandled_core_intervals_.Add(first); + register_allocator.unhandled_core_intervals_.push_back(fourth); + register_allocator.unhandled_core_intervals_.push_back(third); + register_allocator.unhandled_core_intervals_.push_back(second); + register_allocator.unhandled_core_intervals_.push_back(first); // Set just one register available to make all intervals compete for the same. register_allocator.number_of_registers_ = 1; @@ -902,11 +902,11 @@ TEST(RegisterAllocatorTest, SpillInactive) { register_allocator.LinearScan(); // Test that there is no conflicts between intervals. - GrowableArray<LiveInterval*> intervals(&allocator, 0); - intervals.Add(first); - intervals.Add(second); - intervals.Add(third); - intervals.Add(fourth); + ArenaVector<LiveInterval*> intervals(allocator.Adapter()); + intervals.push_back(first); + intervals.push_back(second); + intervals.push_back(third); + intervals.push_back(fourth); ASSERT_TRUE(RegisterAllocator::ValidateIntervals( intervals, 0, 0, codegen, &allocator, true, false)); } diff --git a/compiler/optimizing/side_effects_analysis.cc b/compiler/optimizing/side_effects_analysis.cc index 1956781b79..338a3aaad0 100644 --- a/compiler/optimizing/side_effects_analysis.cc +++ b/compiler/optimizing/side_effects_analysis.cc @@ -21,8 +21,8 @@ namespace art { void SideEffectsAnalysis::Run() { // Inlining might have created more blocks, so we need to increase the size // if needed. - block_effects_.SetSize(graph_->GetBlocks().size()); - loop_effects_.SetSize(graph_->GetBlocks().size()); + block_effects_.resize(graph_->GetBlocks().size()); + loop_effects_.resize(graph_->GetBlocks().size()); // In DEBUG mode, ensure side effects are properly initialized to empty. if (kIsDebugBuild) { @@ -54,7 +54,7 @@ void SideEffectsAnalysis::Run() { } } - block_effects_.Put(block->GetBlockId(), effects); + block_effects_[block->GetBlockId()] = effects; if (block->IsLoopHeader()) { // The side effects of the loop header are part of the loop. @@ -76,16 +76,19 @@ void SideEffectsAnalysis::Run() { SideEffects SideEffectsAnalysis::GetLoopEffects(HBasicBlock* block) const { DCHECK(block->IsLoopHeader()); - return loop_effects_.Get(block->GetBlockId()); + DCHECK_LT(block->GetBlockId(), loop_effects_.size()); + return loop_effects_[block->GetBlockId()]; } SideEffects SideEffectsAnalysis::GetBlockEffects(HBasicBlock* block) const { - return block_effects_.Get(block->GetBlockId()); + DCHECK_LT(block->GetBlockId(), block_effects_.size()); + return block_effects_[block->GetBlockId()]; } void SideEffectsAnalysis::UpdateLoopEffects(HLoopInformation* info, SideEffects effects) { - int id = info->GetHeader()->GetBlockId(); - loop_effects_.Put(id, loop_effects_.Get(id).Union(effects)); + uint32_t id = info->GetHeader()->GetBlockId(); + DCHECK_LT(id, loop_effects_.size()); + loop_effects_[id] = loop_effects_[id].Union(effects); } } // namespace art diff --git a/compiler/optimizing/side_effects_analysis.h b/compiler/optimizing/side_effects_analysis.h index 9888140fb6..bac6088bf7 100644 --- a/compiler/optimizing/side_effects_analysis.h +++ b/compiler/optimizing/side_effects_analysis.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_SIDE_EFFECTS_ANALYSIS_H_ #define ART_COMPILER_OPTIMIZING_SIDE_EFFECTS_ANALYSIS_H_ +#include "base/arena_containers.h" #include "nodes.h" #include "optimization.h" @@ -27,8 +28,10 @@ class SideEffectsAnalysis : public HOptimization { explicit SideEffectsAnalysis(HGraph* graph) : HOptimization(graph, kSideEffectsAnalysisPassName), graph_(graph), - block_effects_(graph->GetArena(), graph->GetBlocks().size(), SideEffects::None()), - loop_effects_(graph->GetArena(), graph->GetBlocks().size(), SideEffects::None()) {} + block_effects_(graph->GetBlocks().size(), + graph->GetArena()->Adapter(kArenaAllocSideEffectsAnalysis)), + loop_effects_(graph->GetBlocks().size(), + graph->GetArena()->Adapter(kArenaAllocSideEffectsAnalysis)) {} SideEffects GetLoopEffects(HBasicBlock* block) const; SideEffects GetBlockEffects(HBasicBlock* block) const; @@ -51,11 +54,11 @@ class SideEffectsAnalysis : public HOptimization { // Side effects of individual blocks, that is the union of the side effects // of the instructions in the block. - GrowableArray<SideEffects> block_effects_; + ArenaVector<SideEffects> block_effects_; // Side effects of loops, that is the union of the side effects of the // blocks contained in that loop. - GrowableArray<SideEffects> loop_effects_; + ArenaVector<SideEffects> loop_effects_; ART_FRIEND_TEST(GVNTest, LoopSideEffects); DISALLOW_COPY_AND_ASSIGN(SideEffectsAnalysis); diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index 0ef86d80ed..40c75af6ef 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -56,6 +56,24 @@ class DeadPhiHandling : public ValueObject { DISALLOW_COPY_AND_ASSIGN(DeadPhiHandling); }; +static bool HasConflictingEquivalent(HPhi* phi) { + if (phi->GetNext() == nullptr) { + return false; + } + HPhi* next = phi->GetNext()->AsPhi(); + if (next->GetRegNumber() == phi->GetRegNumber()) { + if (next->GetType() == Primitive::kPrimVoid) { + // We only get a void type for an equivalent phi we processed and found out + // it was conflicting. + return true; + } else { + // Go to the next phi, in case it is also an equivalent. + return HasConflictingEquivalent(next); + } + } + return false; +} + bool DeadPhiHandling::UpdateType(HPhi* phi) { if (phi->IsDead()) { // Phi was rendered dead while waiting in the worklist because it was replaced @@ -87,21 +105,26 @@ bool DeadPhiHandling::UpdateType(HPhi* phi) { if (new_type == Primitive::kPrimVoid) { new_type = input_type; } else if (new_type == Primitive::kPrimNot && input_type == Primitive::kPrimInt) { + if (input->IsPhi() && HasConflictingEquivalent(input->AsPhi())) { + // If we already asked for an equivalent of the input phi, but that equivalent + // ended up conflicting, make this phi conflicting too. + conflict = true; + break; + } HInstruction* equivalent = SsaBuilder::GetReferenceTypeEquivalent(input); if (equivalent == nullptr) { conflict = true; break; - } else { - phi->ReplaceInput(equivalent, i); - if (equivalent->IsPhi()) { - DCHECK_EQ(equivalent->GetType(), Primitive::kPrimNot); - // We created a new phi, but that phi has the same inputs as the old phi. We - // add it to the worklist to ensure its inputs can also be converted to reference. - // If not, it will remain dead, and the algorithm will make the current phi dead - // as well. - equivalent->AsPhi()->SetLive(); - AddToWorklist(equivalent->AsPhi()); - } + } + phi->ReplaceInput(equivalent, i); + if (equivalent->IsPhi()) { + DCHECK_EQ(equivalent->GetType(), Primitive::kPrimNot); + // We created a new phi, but that phi has the same inputs as the old phi. We + // add it to the worklist to ensure its inputs can also be converted to reference. + // If not, it will remain dead, and the algorithm will make the current phi dead + // as well. + equivalent->AsPhi()->SetLive(); + AddToWorklist(equivalent->AsPhi()); } } else if (new_type == Primitive::kPrimInt && input_type == Primitive::kPrimNot) { new_type = Primitive::kPrimNot; @@ -145,8 +168,14 @@ void DeadPhiHandling::VisitBasicBlock(HBasicBlock* block) { if (phi->IsDead() && phi->HasEnvironmentUses()) { phi->SetLive(); if (block->IsLoopHeader()) { - // Give a type to the loop phi, to guarantee convergence of the algorithm. - phi->SetType(phi->InputAt(0)->GetType()); + // Give a type to the loop phi to guarantee convergence of the algorithm. + // Note that the dead phi may already have a type if it is an equivalent + // generated for a typed LoadLocal. In that case we do not change the + // type because it could lead to an unsupported PrimNot/Float/Double -> + // PrimInt/Long transition and create same type equivalents. + if (phi->GetType() == Primitive::kPrimVoid) { + phi->SetType(phi->InputAt(0)->GetType()); + } AddToWorklist(phi); } else { // Because we are doing a reverse post order visit, all inputs of @@ -191,12 +220,6 @@ void DeadPhiHandling::Run() { ProcessWorklist(); } -static bool IsPhiEquivalentOf(HInstruction* instruction, HPhi* phi) { - return instruction != nullptr - && instruction->IsPhi() - && instruction->AsPhi()->GetRegNumber() == phi->GetRegNumber(); -} - void SsaBuilder::FixNullConstantType() { // The order doesn't matter here. for (HReversePostOrderIterator itb(*GetGraph()); !itb.Done(); itb.Advance()) { @@ -324,13 +347,13 @@ void SsaBuilder::BuildSsa() { // If the phi is not dead, or has no environment uses, there is nothing to do. if (!phi->IsDead() || !phi->HasEnvironmentUses()) continue; HInstruction* next = phi->GetNext(); - if (!IsPhiEquivalentOf(next, phi)) continue; + if (!phi->IsVRegEquivalentOf(next)) continue; if (next->AsPhi()->IsDead()) { // If the phi equivalent is dead, check if there is another one. next = next->GetNext(); - if (!IsPhiEquivalentOf(next, phi)) continue; + if (!phi->IsVRegEquivalentOf(next)) continue; // There can be at most two phi equivalents. - DCHECK(!IsPhiEquivalentOf(next->GetNext(), phi)); + DCHECK(!phi->IsVRegEquivalentOf(next->GetNext())); if (next->AsPhi()->IsDead()) continue; } // We found a live phi equivalent. Update the environment uses of `phi` with it. @@ -403,6 +426,24 @@ void SsaBuilder::VisitBasicBlock(HBasicBlock* block) { if (block->IsCatchBlock()) { // Catch phis were already created and inputs collected from throwing sites. + if (kIsDebugBuild) { + // Make sure there was at least one throwing instruction which initialized + // locals (guaranteed by HGraphBuilder) and that all try blocks have been + // visited already (from HTryBoundary scoping and reverse post order). + bool throwing_instruction_found = false; + bool catch_block_visited = false; + for (HReversePostOrderIterator it(*GetGraph()); !it.Done(); it.Advance()) { + HBasicBlock* current = it.Current(); + if (current == block) { + catch_block_visited = true; + } else if (current->IsTryBlock() && + current->GetTryCatchInformation()->GetTryEntry().HasExceptionHandler(*block)) { + DCHECK(!catch_block_visited) << "Catch block visited before its try block."; + throwing_instruction_found |= current->HasThrowingInstructions(); + } + } + DCHECK(throwing_instruction_found) << "No instructions throwing into a live catch block."; + } } else if (block->IsLoopHeader()) { // If the block is a loop header, we know we only have visited the pre header // because we are visiting in reverse post order. We create phis for all initialized diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc index 1e9a813be9..b869d57be8 100644 --- a/compiler/optimizing/ssa_liveness_analysis.cc +++ b/compiler/optimizing/ssa_liveness_analysis.cc @@ -43,11 +43,11 @@ static bool IsInnerLoop(HLoopInformation* outer, HLoopInformation* inner) { && inner->IsIn(*outer); } -static void AddToListForLinearization(GrowableArray<HBasicBlock*>* worklist, HBasicBlock* block) { - size_t insert_at = worklist->Size(); +static void AddToListForLinearization(ArenaVector<HBasicBlock*>* worklist, HBasicBlock* block) { HLoopInformation* block_loop = block->GetLoopInformation(); - for (; insert_at > 0; --insert_at) { - HBasicBlock* current = worklist->Get(insert_at - 1); + auto insert_pos = worklist->rbegin(); // insert_pos.base() will be the actual position. + for (auto end = worklist->rend(); insert_pos != end; ++insert_pos) { + HBasicBlock* current = *insert_pos; HLoopInformation* current_loop = current->GetLoopInformation(); if (InSameLoop(block_loop, current_loop) || !IsLoop(current_loop) @@ -56,7 +56,7 @@ static void AddToListForLinearization(GrowableArray<HBasicBlock*>* worklist, HBa break; } } - worklist->InsertAt(insert_at, block); + worklist->insert(insert_pos.base(), block); } void SsaLivenessAnalysis::LinearizeGraph() { @@ -69,15 +69,15 @@ void SsaLivenessAnalysis::LinearizeGraph() { // current reverse post order in the graph, but it would require making // order queries to a GrowableArray, which is not the best data structure // for it. - GrowableArray<uint32_t> forward_predecessors(graph_->GetArena(), graph_->GetBlocks().size()); - forward_predecessors.SetSize(graph_->GetBlocks().size()); + ArenaVector<uint32_t> forward_predecessors(graph_->GetBlocks().size(), + graph_->GetArena()->Adapter(kArenaAllocSsaLiveness)); for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); size_t number_of_forward_predecessors = block->GetPredecessors().size(); if (block->IsLoopHeader()) { number_of_forward_predecessors -= block->GetLoopInformation()->NumberOfBackEdges(); } - forward_predecessors.Put(block->GetBlockId(), number_of_forward_predecessors); + forward_predecessors[block->GetBlockId()] = number_of_forward_predecessors; } // (2): Following a worklist approach, first start with the entry block, and @@ -85,20 +85,21 @@ void SsaLivenessAnalysis::LinearizeGraph() { // successor block are visited, the successor block is added in the worklist // following an order that satisfies the requirements to build our linear graph. graph_->linear_order_.reserve(graph_->GetReversePostOrder().size()); - GrowableArray<HBasicBlock*> worklist(graph_->GetArena(), 1); - worklist.Add(graph_->GetEntryBlock()); + ArenaVector<HBasicBlock*> worklist(graph_->GetArena()->Adapter(kArenaAllocSsaLiveness)); + worklist.push_back(graph_->GetEntryBlock()); do { - HBasicBlock* current = worklist.Pop(); + HBasicBlock* current = worklist.back(); + worklist.pop_back(); graph_->linear_order_.push_back(current); for (HBasicBlock* successor : current->GetSuccessors()) { int block_id = successor->GetBlockId(); - size_t number_of_remaining_predecessors = forward_predecessors.Get(block_id); + size_t number_of_remaining_predecessors = forward_predecessors[block_id]; if (number_of_remaining_predecessors == 1) { AddToListForLinearization(&worklist, successor); } - forward_predecessors.Put(block_id, number_of_remaining_predecessors - 1); + forward_predecessors[block_id] = number_of_remaining_predecessors - 1; } - } while (!worklist.IsEmpty()); + } while (!worklist.empty()); } void SsaLivenessAnalysis::NumberInstructions() { @@ -122,7 +123,7 @@ void SsaLivenessAnalysis::NumberInstructions() { codegen_->AllocateLocations(current); LocationSummary* locations = current->GetLocations(); if (locations != nullptr && locations->Out().IsValid()) { - instructions_from_ssa_index_.Add(current); + instructions_from_ssa_index_.push_back(current); current->SetSsaIndex(ssa_index++); current->SetLiveInterval( LiveInterval::MakeInterval(graph_->GetArena(), current->GetType(), current)); @@ -132,7 +133,7 @@ void SsaLivenessAnalysis::NumberInstructions() { lifetime_position += 2; // Add a null marker to notify we are starting a block. - instructions_from_lifetime_position_.Add(nullptr); + instructions_from_lifetime_position_.push_back(nullptr); for (HInstructionIterator inst_it(block->GetInstructions()); !inst_it.Done(); inst_it.Advance()) { @@ -140,12 +141,12 @@ void SsaLivenessAnalysis::NumberInstructions() { codegen_->AllocateLocations(current); LocationSummary* locations = current->GetLocations(); if (locations != nullptr && locations->Out().IsValid()) { - instructions_from_ssa_index_.Add(current); + instructions_from_ssa_index_.push_back(current); current->SetSsaIndex(ssa_index++); current->SetLiveInterval( LiveInterval::MakeInterval(graph_->GetArena(), current->GetType(), current)); } - instructions_from_lifetime_position_.Add(current); + instructions_from_lifetime_position_.push_back(current); current->SetLifetimePosition(lifetime_position); lifetime_position += 2; } @@ -158,9 +159,9 @@ void SsaLivenessAnalysis::NumberInstructions() { void SsaLivenessAnalysis::ComputeLiveness() { for (HLinearOrderIterator it(*graph_); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); - block_infos_.Put( - block->GetBlockId(), - new (graph_->GetArena()) BlockInfo(graph_->GetArena(), *block, number_of_ssa_values_)); + DCHECK_LT(block->GetBlockId(), block_infos_.size()); + block_infos_[block->GetBlockId()] = + new (graph_->GetArena()) BlockInfo(graph_->GetArena(), *block, number_of_ssa_values_); } // Compute the live ranges, as well as the initial live_in, live_out, and kill sets. @@ -212,7 +213,7 @@ void SsaLivenessAnalysis::ComputeLiveRanges() { // Add a range that covers this block to all instructions live_in because of successors. // Instructions defined in this block will have their start of the range adjusted. for (uint32_t idx : live_in->Indexes()) { - HInstruction* current = instructions_from_ssa_index_.Get(idx); + HInstruction* current = GetInstructionFromSsaIndex(idx); current->GetLiveInterval()->AddRange(block->GetLifetimeStart(), block->GetLifetimeEnd()); } @@ -277,7 +278,7 @@ void SsaLivenessAnalysis::ComputeLiveRanges() { // For all live_in instructions at the loop header, we need to create a range // that covers the full loop. for (uint32_t idx : live_in->Indexes()) { - HInstruction* current = instructions_from_ssa_index_.Get(idx); + HInstruction* current = GetInstructionFromSsaIndex(idx); current->GetLiveInterval()->AddLoopRange(block->GetLifetimeStart(), last_position); } } diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index 3aedaa56a2..e4b0999d4f 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -27,7 +27,7 @@ class SsaLivenessAnalysis; static constexpr int kNoRegister = -1; -class BlockInfo : public ArenaObject<kArenaAllocMisc> { +class BlockInfo : public ArenaObject<kArenaAllocSsaLiveness> { public: BlockInfo(ArenaAllocator* allocator, const HBasicBlock& block, size_t number_of_ssa_values) : block_(block), @@ -55,7 +55,7 @@ class BlockInfo : public ArenaObject<kArenaAllocMisc> { * A live range contains the start and end of a range where an instruction or a temporary * is live. */ -class LiveRange FINAL : public ArenaObject<kArenaAllocMisc> { +class LiveRange FINAL : public ArenaObject<kArenaAllocSsaLiveness> { public: LiveRange(size_t start, size_t end, LiveRange* next) : start_(start), end_(end), next_(next) { DCHECK_LT(start, end); @@ -101,7 +101,7 @@ class LiveRange FINAL : public ArenaObject<kArenaAllocMisc> { /** * A use position represents a live interval use at a given position. */ -class UsePosition : public ArenaObject<kArenaAllocMisc> { +class UsePosition : public ArenaObject<kArenaAllocSsaLiveness> { public: UsePosition(HInstruction* user, HEnvironment* environment, @@ -169,7 +169,7 @@ class UsePosition : public ArenaObject<kArenaAllocMisc> { DISALLOW_COPY_AND_ASSIGN(UsePosition); }; -class SafepointPosition : public ArenaObject<kArenaAllocMisc> { +class SafepointPosition : public ArenaObject<kArenaAllocSsaLiveness> { public: explicit SafepointPosition(HInstruction* instruction) : instruction_(instruction), @@ -206,7 +206,7 @@ class SafepointPosition : public ArenaObject<kArenaAllocMisc> { * An interval is a list of disjoint live ranges where an instruction is live. * Each instruction that has uses gets an interval. */ -class LiveInterval : public ArenaObject<kArenaAllocMisc> { +class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { public: static LiveInterval* MakeInterval(ArenaAllocator* allocator, Primitive::Type type, @@ -1106,33 +1106,39 @@ class SsaLivenessAnalysis : public ValueObject { SsaLivenessAnalysis(HGraph* graph, CodeGenerator* codegen) : graph_(graph), codegen_(codegen), - block_infos_(graph->GetArena(), graph->GetBlocks().size()), - instructions_from_ssa_index_(graph->GetArena(), 0), - instructions_from_lifetime_position_(graph->GetArena(), 0), + block_infos_(graph->GetBlocks().size(), + nullptr, + graph->GetArena()->Adapter(kArenaAllocSsaLiveness)), + instructions_from_ssa_index_(graph->GetArena()->Adapter(kArenaAllocSsaLiveness)), + instructions_from_lifetime_position_(graph->GetArena()->Adapter(kArenaAllocSsaLiveness)), number_of_ssa_values_(0) { - block_infos_.SetSize(graph->GetBlocks().size()); } void Analyze(); BitVector* GetLiveInSet(const HBasicBlock& block) const { - return &block_infos_.Get(block.GetBlockId())->live_in_; + DCHECK_LT(block.GetBlockId(), block_infos_.size()); + return &block_infos_[block.GetBlockId()]->live_in_; } BitVector* GetLiveOutSet(const HBasicBlock& block) const { - return &block_infos_.Get(block.GetBlockId())->live_out_; + DCHECK_LT(block.GetBlockId(), block_infos_.size()); + return &block_infos_[block.GetBlockId()]->live_out_; } BitVector* GetKillSet(const HBasicBlock& block) const { - return &block_infos_.Get(block.GetBlockId())->kill_; + DCHECK_LT(block.GetBlockId(), block_infos_.size()); + return &block_infos_[block.GetBlockId()]->kill_; } HInstruction* GetInstructionFromSsaIndex(size_t index) const { - return instructions_from_ssa_index_.Get(index); + DCHECK_LT(index, instructions_from_ssa_index_.size()); + return instructions_from_ssa_index_[index]; } HInstruction* GetInstructionFromPosition(size_t index) const { - return instructions_from_lifetime_position_.Get(index); + DCHECK_LT(index, instructions_from_lifetime_position_.size()); + return instructions_from_lifetime_position_[index]; } HBasicBlock* GetBlockFromPosition(size_t index) const { @@ -1163,7 +1169,7 @@ class SsaLivenessAnalysis : public ValueObject { } size_t GetMaxLifetimePosition() const { - return instructions_from_lifetime_position_.Size() * 2 - 1; + return instructions_from_lifetime_position_.size() * 2 - 1; } size_t GetNumberOfSsaValues() const { @@ -1218,13 +1224,13 @@ class SsaLivenessAnalysis : public ValueObject { HGraph* const graph_; CodeGenerator* const codegen_; - GrowableArray<BlockInfo*> block_infos_; + ArenaVector<BlockInfo*> block_infos_; // Temporary array used when computing live_in, live_out, and kill sets. - GrowableArray<HInstruction*> instructions_from_ssa_index_; + ArenaVector<HInstruction*> instructions_from_ssa_index_; // Temporary array used when inserting moves in the graph. - GrowableArray<HInstruction*> instructions_from_lifetime_position_; + ArenaVector<HInstruction*> instructions_from_lifetime_position_; size_t number_of_ssa_values_; ART_FRIEND_TEST(RegisterAllocatorTest, SpillInactive); diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc index a9f04cd201..72f9ddd506 100644 --- a/compiler/optimizing/ssa_phi_elimination.cc +++ b/compiler/optimizing/ssa_phi_elimination.cc @@ -35,7 +35,7 @@ void SsaDeadPhiElimination::MarkDeadPhis() { HUseListNode<HInstruction*>* current = use_it.Current(); HInstruction* user = current->GetUser(); if (!user->IsPhi()) { - worklist_.Add(phi); + worklist_.push_back(phi); phi->SetLive(); break; } @@ -44,12 +44,13 @@ void SsaDeadPhiElimination::MarkDeadPhis() { } // Process the worklist by propagating liveness to phi inputs. - while (!worklist_.IsEmpty()) { - HPhi* phi = worklist_.Pop(); + while (!worklist_.empty()) { + HPhi* phi = worklist_.back(); + worklist_.pop_back(); for (HInputIterator it(phi); !it.Done(); it.Advance()) { HInstruction* input = it.Current(); if (input->IsPhi() && input->AsPhi()->IsDead()) { - worklist_.Add(input->AsPhi()); + worklist_.push_back(input->AsPhi()); input->AsPhi()->SetLive(); } } @@ -103,12 +104,13 @@ void SsaRedundantPhiElimination::Run() { for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) { - worklist_.Add(inst_it.Current()->AsPhi()); + worklist_.push_back(inst_it.Current()->AsPhi()); } } - while (!worklist_.IsEmpty()) { - HPhi* phi = worklist_.Pop(); + while (!worklist_.empty()) { + HPhi* phi = worklist_.back(); + worklist_.pop_back(); // If the phi has already been processed, continue. if (!phi->IsInBlock()) { @@ -155,7 +157,7 @@ void SsaRedundantPhiElimination::Run() { HUseListNode<HInstruction*>* current = it.Current(); HInstruction* user = current->GetUser(); if (user->IsPhi()) { - worklist_.Add(user->AsPhi()); + worklist_.push_back(user->AsPhi()); } } diff --git a/compiler/optimizing/ssa_phi_elimination.h b/compiler/optimizing/ssa_phi_elimination.h index 67351f277b..b48e8200d5 100644 --- a/compiler/optimizing/ssa_phi_elimination.h +++ b/compiler/optimizing/ssa_phi_elimination.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_SSA_PHI_ELIMINATION_H_ #define ART_COMPILER_OPTIMIZING_SSA_PHI_ELIMINATION_H_ +#include "base/arena_containers.h" #include "nodes.h" #include "optimization.h" @@ -30,7 +31,9 @@ class SsaDeadPhiElimination : public HOptimization { public: explicit SsaDeadPhiElimination(HGraph* graph) : HOptimization(graph, kSsaDeadPhiEliminationPassName), - worklist_(graph->GetArena(), kDefaultWorklistSize) {} + worklist_(graph->GetArena()->Adapter(kArenaAllocSsaPhiElimination)) { + worklist_.reserve(kDefaultWorklistSize); + } void Run() OVERRIDE; @@ -40,7 +43,7 @@ class SsaDeadPhiElimination : public HOptimization { static constexpr const char* kSsaDeadPhiEliminationPassName = "dead_phi_elimination"; private: - GrowableArray<HPhi*> worklist_; + ArenaVector<HPhi*> worklist_; static constexpr size_t kDefaultWorklistSize = 8; @@ -57,14 +60,16 @@ class SsaRedundantPhiElimination : public HOptimization { public: explicit SsaRedundantPhiElimination(HGraph* graph) : HOptimization(graph, kSsaRedundantPhiEliminationPassName), - worklist_(graph->GetArena(), kDefaultWorklistSize) {} + worklist_(graph->GetArena()->Adapter(kArenaAllocSsaPhiElimination)) { + worklist_.reserve(kDefaultWorklistSize); + } void Run() OVERRIDE; static constexpr const char* kSsaRedundantPhiEliminationPassName = "redundant_phi_elimination"; private: - GrowableArray<HPhi*> worklist_; + ArenaVector<HPhi*> worklist_; static constexpr size_t kDefaultWorklistSize = 8; diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc index 1f0bac59e0..f27cecc8fa 100644 --- a/compiler/optimizing/stack_map_stream.cc +++ b/compiler/optimizing/stack_map_stream.cc @@ -30,8 +30,8 @@ void StackMapStream::BeginStackMapEntry(uint32_t dex_pc, current_entry_.sp_mask = sp_mask; current_entry_.num_dex_registers = num_dex_registers; current_entry_.inlining_depth = inlining_depth; - current_entry_.dex_register_locations_start_index = dex_register_locations_.Size(); - current_entry_.inline_infos_start_index = inline_infos_.Size(); + current_entry_.dex_register_locations_start_index = dex_register_locations_.size(); + current_entry_.inline_infos_start_index = inline_infos_.size(); current_entry_.dex_register_map_hash = 0; current_entry_.same_dex_register_map_as_ = kNoSameDexMapFound; if (num_dex_registers != 0) { @@ -55,7 +55,7 @@ void StackMapStream::BeginStackMapEntry(uint32_t dex_pc, void StackMapStream::EndStackMapEntry() { current_entry_.same_dex_register_map_as_ = FindEntryWithTheSameDexMap(); - stack_maps_.Add(current_entry_); + stack_maps_.push_back(current_entry_); current_entry_ = StackMapEntry(); } @@ -73,12 +73,12 @@ void StackMapStream::AddDexRegisterEntry(DexRegisterLocation::Kind kind, int32_t auto it = location_catalog_entries_indices_.Find(location); if (it != location_catalog_entries_indices_.end()) { // Retrieve the index from the hash map. - dex_register_locations_.Add(it->second); + dex_register_locations_.push_back(it->second); } else { // Create a new entry in the location catalog and the hash map. - size_t index = location_catalog_entries_.Size(); - location_catalog_entries_.Add(location); - dex_register_locations_.Add(index); + size_t index = location_catalog_entries_.size(); + location_catalog_entries_.push_back(location); + dex_register_locations_.push_back(index); location_catalog_entries_indices_.Insert(std::make_pair(location, index)); } @@ -108,7 +108,7 @@ void StackMapStream::BeginInlineInfoEntry(uint32_t method_index, current_inline_info_.dex_pc = dex_pc; current_inline_info_.invoke_type = invoke_type; current_inline_info_.num_dex_registers = num_dex_registers; - current_inline_info_.dex_register_locations_start_index = dex_register_locations_.Size(); + current_inline_info_.dex_register_locations_start_index = dex_register_locations_.size(); if (num_dex_registers != 0) { current_inline_info_.live_dex_registers_mask = new (allocator_) ArenaBitVector(allocator_, num_dex_registers, true); @@ -123,14 +123,14 @@ void StackMapStream::EndInlineInfoEntry() { DCHECK_EQ(current_dex_register_, current_inline_info_.num_dex_registers) << "Inline information contains less registers than expected"; in_inline_frame_ = false; - inline_infos_.Add(current_inline_info_); + inline_infos_.push_back(current_inline_info_); current_inline_info_ = InlineInfoEntry(); } uint32_t StackMapStream::ComputeMaxNativePcOffset() const { uint32_t max_native_pc_offset = 0u; - for (size_t i = 0, size = stack_maps_.Size(); i != size; ++i) { - max_native_pc_offset = std::max(max_native_pc_offset, stack_maps_.Get(i).native_pc_offset); + for (const StackMapEntry& entry : stack_maps_) { + max_native_pc_offset = std::max(max_native_pc_offset, entry.native_pc_offset); } return max_native_pc_offset; } @@ -147,7 +147,7 @@ size_t StackMapStream::PrepareForFillIn() { dex_pc_max_, max_native_pc_offset, register_mask_max_); - stack_maps_size_ = stack_maps_.Size() * stack_map_encoding_.ComputeStackMapSize(); + stack_maps_size_ = stack_maps_.size() * stack_map_encoding_.ComputeStackMapSize(); dex_register_location_catalog_size_ = ComputeDexRegisterLocationCatalogSize(); // Note: use RoundUp to word-size here if you want CodeInfo objects to be word aligned. @@ -170,33 +170,28 @@ size_t StackMapStream::PrepareForFillIn() { size_t StackMapStream::ComputeDexRegisterLocationCatalogSize() const { size_t size = DexRegisterLocationCatalog::kFixedSize; - for (size_t location_catalog_entry_index = 0; - location_catalog_entry_index < location_catalog_entries_.Size(); - ++location_catalog_entry_index) { - DexRegisterLocation dex_register_location = - location_catalog_entries_.Get(location_catalog_entry_index); + for (const DexRegisterLocation& dex_register_location : location_catalog_entries_) { size += DexRegisterLocationCatalog::EntrySize(dex_register_location); } return size; } size_t StackMapStream::ComputeDexRegisterMapSize(uint32_t num_dex_registers, - const BitVector& live_dex_registers_mask) const { + const BitVector* live_dex_registers_mask) const { + // For num_dex_registers == 0u live_dex_registers_mask may be null. + if (num_dex_registers == 0u) { + return 0u; // No register map will be emitted. + } + DCHECK(live_dex_registers_mask != nullptr); + // Size of the map in bytes. size_t size = DexRegisterMap::kFixedSize; // Add the live bit mask for the Dex register liveness. size += DexRegisterMap::GetLiveBitMaskSize(num_dex_registers); // Compute the size of the set of live Dex register entries. - size_t number_of_live_dex_registers = 0; - for (size_t dex_register_number = 0; - dex_register_number < num_dex_registers; - ++dex_register_number) { - if (live_dex_registers_mask.IsBitSet(dex_register_number)) { - ++number_of_live_dex_registers; - } - } + size_t number_of_live_dex_registers = live_dex_registers_mask->NumSetBits(); size_t map_entries_size_in_bits = - DexRegisterMap::SingleEntrySizeInBits(location_catalog_entries_.Size()) + DexRegisterMap::SingleEntrySizeInBits(location_catalog_entries_.size()) * number_of_live_dex_registers; size_t map_entries_size_in_bytes = RoundUp(map_entries_size_in_bits, kBitsPerByte) / kBitsPerByte; @@ -207,24 +202,24 @@ size_t StackMapStream::ComputeDexRegisterMapSize(uint32_t num_dex_registers, size_t StackMapStream::ComputeDexRegisterMapsSize() const { size_t size = 0; size_t inline_info_index = 0; - for (size_t i = 0; i < stack_maps_.Size(); ++i) { - StackMapEntry entry = stack_maps_.Get(i); + for (const StackMapEntry& entry : stack_maps_) { if (entry.same_dex_register_map_as_ == kNoSameDexMapFound) { - size += ComputeDexRegisterMapSize(entry.num_dex_registers, *entry.live_dex_registers_mask); + size += ComputeDexRegisterMapSize(entry.num_dex_registers, entry.live_dex_registers_mask); } else { // Entries with the same dex map will have the same offset. } for (size_t j = 0; j < entry.inlining_depth; ++j) { - InlineInfoEntry inline_entry = inline_infos_.Get(inline_info_index++); + DCHECK_LT(inline_info_index, inline_infos_.size()); + InlineInfoEntry inline_entry = inline_infos_[inline_info_index++]; size += ComputeDexRegisterMapSize(inline_entry.num_dex_registers, - *inline_entry.live_dex_registers_mask); + inline_entry.live_dex_registers_mask); } } return size; } size_t StackMapStream::ComputeInlineInfoSize() const { - return inline_infos_.Size() * InlineInfo::SingleEntrySize() + return inline_infos_.size() * InlineInfo::SingleEntrySize() // For encoding the depth. + (number_of_stack_maps_with_inline_info_ * InlineInfo::kFixedSize); } @@ -244,19 +239,18 @@ void StackMapStream::FillIn(MemoryRegion region) { inline_infos_start_, inline_info_size_); code_info.SetEncoding(stack_map_encoding_); - code_info.SetNumberOfStackMaps(stack_maps_.Size()); + code_info.SetNumberOfStackMaps(stack_maps_.size()); DCHECK_EQ(code_info.GetStackMapsSize(code_info.ExtractEncoding()), stack_maps_size_); // Set the Dex register location catalog. - code_info.SetNumberOfLocationCatalogEntries(location_catalog_entries_.Size()); + code_info.SetNumberOfLocationCatalogEntries(location_catalog_entries_.size()); MemoryRegion dex_register_location_catalog_region = region.Subregion( dex_register_location_catalog_start_, dex_register_location_catalog_size_); DexRegisterLocationCatalog dex_register_location_catalog(dex_register_location_catalog_region); // Offset in `dex_register_location_catalog` where to store the next // register location. size_t location_catalog_offset = DexRegisterLocationCatalog::kFixedSize; - for (size_t i = 0, e = location_catalog_entries_.Size(); i < e; ++i) { - DexRegisterLocation dex_register_location = location_catalog_entries_.Get(i); + for (DexRegisterLocation dex_register_location : location_catalog_entries_) { dex_register_location_catalog.SetRegisterInfo(location_catalog_offset, dex_register_location); location_catalog_offset += DexRegisterLocationCatalog::EntrySize(dex_register_location); } @@ -265,9 +259,9 @@ void StackMapStream::FillIn(MemoryRegion region) { uintptr_t next_dex_register_map_offset = 0; uintptr_t next_inline_info_offset = 0; - for (size_t i = 0, e = stack_maps_.Size(); i < e; ++i) { + for (size_t i = 0, e = stack_maps_.size(); i < e; ++i) { StackMap stack_map = code_info.GetStackMapAt(i, stack_map_encoding_); - StackMapEntry entry = stack_maps_.Get(i); + StackMapEntry entry = stack_maps_[i]; stack_map.SetDexPc(stack_map_encoding_, entry.dex_pc); stack_map.SetNativePcOffset(stack_map_encoding_, entry.native_pc_offset); @@ -291,7 +285,7 @@ void StackMapStream::FillIn(MemoryRegion region) { // New dex registers maps should be added to the stack map. MemoryRegion register_region = dex_register_locations_region.Subregion( next_dex_register_map_offset, - ComputeDexRegisterMapSize(entry.num_dex_registers, *entry.live_dex_registers_mask)); + ComputeDexRegisterMapSize(entry.num_dex_registers, entry.live_dex_registers_mask)); next_dex_register_map_offset += register_region.size(); DexRegisterMap dex_register_map(register_region); stack_map.SetDexRegisterMapOffset( @@ -318,8 +312,9 @@ void StackMapStream::FillIn(MemoryRegion region) { stack_map_encoding_, inline_region.start() - dex_register_locations_region.start()); inline_info.SetDepth(entry.inlining_depth); + DCHECK_LE(entry.inline_infos_start_index + entry.inlining_depth, inline_infos_.size()); for (size_t depth = 0; depth < entry.inlining_depth; ++depth) { - InlineInfoEntry inline_entry = inline_infos_.Get(depth + entry.inline_infos_start_index); + InlineInfoEntry inline_entry = inline_infos_[depth + entry.inline_infos_start_index]; inline_info.SetMethodIndexAtDepth(depth, inline_entry.method_index); inline_info.SetDexPcAtDepth(depth, inline_entry.dex_pc); inline_info.SetInvokeTypeAtDepth(depth, inline_entry.invoke_type); @@ -331,7 +326,7 @@ void StackMapStream::FillIn(MemoryRegion region) { MemoryRegion register_region = dex_register_locations_region.Subregion( next_dex_register_map_offset, ComputeDexRegisterMapSize(inline_entry.num_dex_registers, - *inline_entry.live_dex_registers_mask)); + inline_entry.live_dex_registers_mask)); next_dex_register_map_offset += register_region.size(); DexRegisterMap dex_register_map(register_region); inline_info.SetDexRegisterMapOffsetAtDepth( @@ -357,42 +352,43 @@ void StackMapStream::FillInDexRegisterMap(DexRegisterMap dex_register_map, uint32_t start_index_in_dex_register_locations) const { dex_register_map.SetLiveBitMask(num_dex_registers, live_dex_registers_mask); // Set the dex register location mapping data. - for (size_t dex_register_number = 0, index_in_dex_register_locations = 0; - dex_register_number < num_dex_registers; - ++dex_register_number) { - if (live_dex_registers_mask.IsBitSet(dex_register_number)) { - size_t location_catalog_entry_index = dex_register_locations_.Get( - start_index_in_dex_register_locations + index_in_dex_register_locations); - dex_register_map.SetLocationCatalogEntryIndex( - index_in_dex_register_locations, - location_catalog_entry_index, - num_dex_registers, - location_catalog_entries_.Size()); - ++index_in_dex_register_locations; - } + size_t number_of_live_dex_registers = live_dex_registers_mask.NumSetBits(); + DCHECK_LE(number_of_live_dex_registers, dex_register_locations_.size()); + DCHECK_LE(start_index_in_dex_register_locations, + dex_register_locations_.size() - number_of_live_dex_registers); + for (size_t index_in_dex_register_locations = 0; + index_in_dex_register_locations != number_of_live_dex_registers; + ++index_in_dex_register_locations) { + size_t location_catalog_entry_index = dex_register_locations_[ + start_index_in_dex_register_locations + index_in_dex_register_locations]; + dex_register_map.SetLocationCatalogEntryIndex( + index_in_dex_register_locations, + location_catalog_entry_index, + num_dex_registers, + location_catalog_entries_.size()); } } size_t StackMapStream::FindEntryWithTheSameDexMap() { - size_t current_entry_index = stack_maps_.Size(); + size_t current_entry_index = stack_maps_.size(); auto entries_it = dex_map_hash_to_stack_map_indices_.find(current_entry_.dex_register_map_hash); if (entries_it == dex_map_hash_to_stack_map_indices_.end()) { // We don't have a perfect hash functions so we need a list to collect all stack maps // which might have the same dex register map. - GrowableArray<uint32_t> stack_map_indices(allocator_, 1); - stack_map_indices.Add(current_entry_index); - dex_map_hash_to_stack_map_indices_.Put(current_entry_.dex_register_map_hash, stack_map_indices); + ArenaVector<uint32_t> stack_map_indices(allocator_->Adapter(kArenaAllocStackMapStream)); + stack_map_indices.push_back(current_entry_index); + dex_map_hash_to_stack_map_indices_.Put(current_entry_.dex_register_map_hash, + std::move(stack_map_indices)); return kNoSameDexMapFound; } // We might have collisions, so we need to check whether or not we really have a match. - for (size_t i = 0; i < entries_it->second.Size(); i++) { - size_t test_entry_index = entries_it->second.Get(i); - if (HaveTheSameDexMaps(stack_maps_.Get(test_entry_index), current_entry_)) { + for (uint32_t test_entry_index : entries_it->second) { + if (HaveTheSameDexMaps(GetStackMap(test_entry_index), current_entry_)) { return test_entry_index; } } - entries_it->second.Add(current_entry_index); + entries_it->second.push_back(current_entry_index); return kNoSameDexMapFound; } @@ -406,21 +402,22 @@ bool StackMapStream::HaveTheSameDexMaps(const StackMapEntry& a, const StackMapEn if (a.num_dex_registers != b.num_dex_registers) { return false; } - - int index_in_dex_register_locations = 0; - for (uint32_t i = 0; i < a.num_dex_registers; i++) { - if (a.live_dex_registers_mask->IsBitSet(i) != b.live_dex_registers_mask->IsBitSet(i)) { + if (a.num_dex_registers != 0u) { + DCHECK(a.live_dex_registers_mask != nullptr); + DCHECK(b.live_dex_registers_mask != nullptr); + if (!a.live_dex_registers_mask->Equal(b.live_dex_registers_mask)) { return false; } - if (a.live_dex_registers_mask->IsBitSet(i)) { - size_t a_loc = dex_register_locations_.Get( - a.dex_register_locations_start_index + index_in_dex_register_locations); - size_t b_loc = dex_register_locations_.Get( - b.dex_register_locations_start_index + index_in_dex_register_locations); - if (a_loc != b_loc) { - return false; - } - ++index_in_dex_register_locations; + size_t number_of_live_dex_registers = a.live_dex_registers_mask->NumSetBits(); + DCHECK_LE(number_of_live_dex_registers, dex_register_locations_.size()); + DCHECK_LE(a.dex_register_locations_start_index, + dex_register_locations_.size() - number_of_live_dex_registers); + DCHECK_LE(b.dex_register_locations_start_index, + dex_register_locations_.size() - number_of_live_dex_registers); + auto a_begin = dex_register_locations_.begin() + a.dex_register_locations_start_index; + auto b_begin = dex_register_locations_.begin() + b.dex_register_locations_start_index; + if (!std::equal(a_begin, a_begin + number_of_live_dex_registers, b_begin)) { + return false; } } return true; diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h index 703b6f7e13..4783e283b3 100644 --- a/compiler/optimizing/stack_map_stream.h +++ b/compiler/optimizing/stack_map_stream.h @@ -24,7 +24,6 @@ #include "memory_region.h" #include "nodes.h" #include "stack_map.h" -#include "utils/growable_array.h" namespace art { @@ -62,15 +61,16 @@ class StackMapStream : public ValueObject { public: explicit StackMapStream(ArenaAllocator* allocator) : allocator_(allocator), - stack_maps_(allocator, 10), - location_catalog_entries_(allocator, 4), - dex_register_locations_(allocator, 10 * 4), - inline_infos_(allocator, 2), + stack_maps_(allocator->Adapter(kArenaAllocStackMapStream)), + location_catalog_entries_(allocator->Adapter(kArenaAllocStackMapStream)), + dex_register_locations_(allocator->Adapter(kArenaAllocStackMapStream)), + inline_infos_(allocator->Adapter(kArenaAllocStackMapStream)), stack_mask_max_(-1), dex_pc_max_(0), register_mask_max_(0), number_of_stack_maps_with_inline_info_(0), - dex_map_hash_to_stack_map_indices_(std::less<uint32_t>(), allocator->Adapter()), + dex_map_hash_to_stack_map_indices_(std::less<uint32_t>(), + allocator->Adapter(kArenaAllocStackMapStream)), current_entry_(), current_inline_info_(), stack_mask_size_(0), @@ -84,7 +84,12 @@ class StackMapStream : public ValueObject { inline_infos_start_(0), needed_size_(0), current_dex_register_(0), - in_inline_frame_(false) {} + in_inline_frame_(false) { + stack_maps_.reserve(10); + location_catalog_entries_.reserve(4); + dex_register_locations_.reserve(10 * 4); + inline_infos_.reserve(2); + } // See runtime/stack_map.h to know what these fields contain. struct StackMapEntry { @@ -127,17 +132,17 @@ class StackMapStream : public ValueObject { void EndInlineInfoEntry(); size_t GetNumberOfStackMaps() const { - return stack_maps_.Size(); + return stack_maps_.size(); } const StackMapEntry& GetStackMap(size_t i) const { - DCHECK_LT(i, stack_maps_.Size()); - return stack_maps_.GetRawStorage()[i]; + DCHECK_LT(i, stack_maps_.size()); + return stack_maps_[i]; } void SetStackMapNativePcOffset(size_t i, uint32_t native_pc_offset) { - DCHECK_LT(i, stack_maps_.Size()); - stack_maps_.GetRawStorage()[i].native_pc_offset = native_pc_offset; + DCHECK_LT(i, stack_maps_.size()); + stack_maps_[i].native_pc_offset = native_pc_offset; } uint32_t ComputeMaxNativePcOffset() const; @@ -150,7 +155,7 @@ class StackMapStream : public ValueObject { private: size_t ComputeDexRegisterLocationCatalogSize() const; size_t ComputeDexRegisterMapSize(uint32_t num_dex_registers, - const BitVector& live_dex_registers_mask) const; + const BitVector* live_dex_registers_mask) const; size_t ComputeDexRegisterMapsSize() const; size_t ComputeInlineInfoSize() const; @@ -164,10 +169,10 @@ class StackMapStream : public ValueObject { uint32_t start_index_in_dex_register_locations) const; ArenaAllocator* allocator_; - GrowableArray<StackMapEntry> stack_maps_; + ArenaVector<StackMapEntry> stack_maps_; // A catalog of unique [location_kind, register_value] pairs (per method). - GrowableArray<DexRegisterLocation> location_catalog_entries_; + ArenaVector<DexRegisterLocation> location_catalog_entries_; // Map from Dex register location catalog entries to their indices in the // location catalog. typedef HashMap<DexRegisterLocation, size_t, LocationCatalogEntriesIndicesEmptyFn, @@ -175,14 +180,14 @@ class StackMapStream : public ValueObject { LocationCatalogEntriesIndices location_catalog_entries_indices_; // A set of concatenated maps of Dex register locations indices to `location_catalog_entries_`. - GrowableArray<size_t> dex_register_locations_; - GrowableArray<InlineInfoEntry> inline_infos_; + ArenaVector<size_t> dex_register_locations_; + ArenaVector<InlineInfoEntry> inline_infos_; int stack_mask_max_; uint32_t dex_pc_max_; uint32_t register_mask_max_; size_t number_of_stack_maps_with_inline_info_; - ArenaSafeMap<uint32_t, GrowableArray<uint32_t>> dex_map_hash_to_stack_map_indices_; + ArenaSafeMap<uint32_t, ArenaVector<uint32_t>> dex_map_hash_to_stack_map_indices_; StackMapEntry current_entry_; InlineInfoEntry current_inline_info_; diff --git a/compiler/utils/arm/assembler_arm32_test.cc b/compiler/utils/arm/assembler_arm32_test.cc index 2a0912e02d..43805966a9 100644 --- a/compiler/utils/arm/assembler_arm32_test.cc +++ b/compiler/utils/arm/assembler_arm32_test.cc @@ -287,7 +287,7 @@ class AssemblerArm32Test : public AssemblerArmTest<arm::Arm32Assembler, case 1: return Base::REG2_TOKEN; case 2: - return REG3_TOKEN; + return Base::REG3_TOKEN; case 3: return REG4_TOKEN; default: diff --git a/compiler/utils/array_ref.h b/compiler/utils/array_ref.h index 303e0d5ad4..48f0328dce 100644 --- a/compiler/utils/array_ref.h +++ b/compiler/utils/array_ref.h @@ -161,6 +161,15 @@ class ArrayRef { value_type* data() { return array_; } const value_type* data() const { return array_; } + ArrayRef SubArray(size_type pos) const { + return SubArray(pos, size_ - pos); + } + ArrayRef SubArray(size_type pos, size_type length) const { + DCHECK_LE(pos, size()); + DCHECK_LE(length, size() - pos); + return ArrayRef(array_ + pos, length); + } + private: T* array_; size_t size_; diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h index 017402dbd3..bd994f46fc 100644 --- a/compiler/utils/assembler_test.h +++ b/compiler/utils/assembler_test.h @@ -92,6 +92,17 @@ class AssemblerTest : public testing::Test { fmt); } + std::string RepeatRRR(void (Ass::*f)(Reg, Reg, Reg), std::string fmt) { + return RepeatTemplatedRegisters<Reg, Reg, Reg>(f, + GetRegisters(), + GetRegisters(), + GetRegisters(), + &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>, + &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>, + &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>, + fmt); + } + std::string Repeatrb(void (Ass::*f)(Reg, Reg), std::string fmt) { return RepeatTemplatedRegisters<Reg, Reg>(f, GetRegisters(), @@ -118,6 +129,66 @@ class AssemblerTest : public testing::Test { return RepeatRegisterImm<RegisterView::kUseSecondaryName>(f, imm_bytes, fmt); } + template <typename Reg1Type, typename Reg2Type, typename ImmType, + RegisterView Reg1View, RegisterView Reg2View> + std::string RepeatRegRegImmBits(void (Ass::*f)(Reg1Type, Reg2Type, ImmType), + int imm_bits, + std::string fmt) { + const std::vector<Reg1Type*> reg1_registers = GetRegisters(); + const std::vector<Reg2Type*> reg2_registers = GetRegisters(); + std::string str; + std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), imm_bits > 0); + + for (auto reg1 : reg1_registers) { + for (auto reg2 : reg2_registers) { + for (int64_t imm : imms) { + ImmType new_imm = CreateImmediate(imm); + (assembler_.get()->*f)(*reg1, *reg2, new_imm); + std::string base = fmt; + + std::string reg1_string = GetRegName<Reg1View>(*reg1); + size_t reg1_index; + while ((reg1_index = base.find(REG1_TOKEN)) != std::string::npos) { + base.replace(reg1_index, ConstexprStrLen(REG1_TOKEN), reg1_string); + } + + std::string reg2_string = GetRegName<Reg2View>(*reg2); + size_t reg2_index; + while ((reg2_index = base.find(REG2_TOKEN)) != std::string::npos) { + base.replace(reg2_index, ConstexprStrLen(REG2_TOKEN), reg2_string); + } + + size_t imm_index = base.find(IMM_TOKEN); + if (imm_index != std::string::npos) { + std::ostringstream sreg; + sreg << imm; + std::string imm_string = sreg.str(); + base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string); + } + + if (str.size() > 0) { + str += "\n"; + } + str += base; + } + } + } + // Add a newline at the end. + str += "\n"; + return str; + } + + template <typename Reg1Type, typename Reg2Type, typename ImmType> + std::string RepeatRRIb(void (Ass::*f)(Reg1Type, Reg2Type, ImmType), + int imm_bits, + std::string fmt) { + return RepeatRegRegImmBits<Reg1Type, + Reg2Type, + ImmType, + RegisterView::kUsePrimaryName, + RegisterView::kUsePrimaryName>(f, imm_bits, fmt); + } + std::string RepeatFF(void (Ass::*f)(FPReg, FPReg), std::string fmt) { return RepeatTemplatedRegisters<FPReg, FPReg>(f, GetFPRegisters(), @@ -127,14 +198,27 @@ class AssemblerTest : public testing::Test { fmt); } - std::string RepeatFFI(void (Ass::*f)(FPReg, FPReg, const Imm&), size_t imm_bytes, std::string fmt) { + std::string RepeatFFF(void (Ass::*f)(FPReg, FPReg, FPReg), std::string fmt) { + return RepeatTemplatedRegisters<FPReg, FPReg, FPReg>(f, + GetFPRegisters(), + GetFPRegisters(), + GetFPRegisters(), + &AssemblerTest::GetFPRegName, + &AssemblerTest::GetFPRegName, + &AssemblerTest::GetFPRegName, + fmt); + } + + std::string RepeatFFI(void (Ass::*f)(FPReg, FPReg, const Imm&), + size_t imm_bytes, + std::string fmt) { return RepeatTemplatedRegistersImm<FPReg, FPReg>(f, - GetFPRegisters(), - GetFPRegisters(), - &AssemblerTest::GetFPRegName, - &AssemblerTest::GetFPRegName, - imm_bytes, - fmt); + GetFPRegisters(), + GetFPRegisters(), + &AssemblerTest::GetFPRegName, + &AssemblerTest::GetFPRegName, + imm_bytes, + fmt); } std::string RepeatFR(void (Ass::*f)(FPReg, Reg), std::string fmt) { @@ -339,6 +423,63 @@ class AssemblerTest : public testing::Test { return res; } + const int kMaxBitsExhaustiveTest = 8; + + // Create a couple of immediate values up to the number of bits given. + virtual std::vector<int64_t> CreateImmediateValuesBits(const int imm_bits, bool as_uint = false) { + CHECK_GT(imm_bits, 0); + CHECK_LE(imm_bits, 64); + std::vector<int64_t> res; + + if (imm_bits <= kMaxBitsExhaustiveTest) { + if (as_uint) { + for (uint64_t i = MinInt<uint64_t>(imm_bits); i <= MaxInt<uint64_t>(imm_bits); i++) { + res.push_back(static_cast<int64_t>(i)); + } + } else { + for (int64_t i = MinInt<int64_t>(imm_bits); i <= MaxInt<int64_t>(imm_bits); i++) { + res.push_back(i); + } + } + } else { + if (as_uint) { + for (uint64_t i = MinInt<uint64_t>(kMaxBitsExhaustiveTest); + i <= MaxInt<uint64_t>(kMaxBitsExhaustiveTest); + i++) { + res.push_back(static_cast<int64_t>(i)); + } + for (int i = 0; i <= imm_bits; i++) { + uint64_t j = (MaxInt<uint64_t>(kMaxBitsExhaustiveTest) + 1) + + ((MaxInt<uint64_t>(imm_bits) - + (MaxInt<uint64_t>(kMaxBitsExhaustiveTest) + 1)) + * i / imm_bits); + res.push_back(static_cast<int64_t>(j)); + } + } else { + for (int i = 0; i <= imm_bits; i++) { + int64_t j = MinInt<int64_t>(imm_bits) + + ((((MinInt<int64_t>(kMaxBitsExhaustiveTest) - 1) - + MinInt<int64_t>(imm_bits)) + * i) / imm_bits); + res.push_back(static_cast<int64_t>(j)); + } + for (int64_t i = MinInt<int64_t>(kMaxBitsExhaustiveTest); + i <= MaxInt<int64_t>(kMaxBitsExhaustiveTest); + i++) { + res.push_back(static_cast<int64_t>(i)); + } + for (int i = 0; i <= imm_bits; i++) { + int64_t j = (MaxInt<int64_t>(kMaxBitsExhaustiveTest) + 1) + + ((MaxInt<int64_t>(imm_bits) - (MaxInt<int64_t>(kMaxBitsExhaustiveTest) + 1)) + * i / imm_bits); + res.push_back(static_cast<int64_t>(j)); + } + } + } + + return res; + } + // Create an immediate from the specific value. virtual Imm CreateImmediate(int64_t imm_value) = 0; @@ -406,6 +547,52 @@ class AssemblerTest : public testing::Test { return str; } + template <typename Reg1, typename Reg2, typename Reg3> + std::string RepeatTemplatedRegisters(void (Ass::*f)(Reg1, Reg2, Reg3), + const std::vector<Reg1*> reg1_registers, + const std::vector<Reg2*> reg2_registers, + const std::vector<Reg3*> reg3_registers, + std::string (AssemblerTest::*GetName1)(const Reg1&), + std::string (AssemblerTest::*GetName2)(const Reg2&), + std::string (AssemblerTest::*GetName3)(const Reg3&), + std::string fmt) { + std::string str; + for (auto reg1 : reg1_registers) { + for (auto reg2 : reg2_registers) { + for (auto reg3 : reg3_registers) { + (assembler_.get()->*f)(*reg1, *reg2, *reg3); + std::string base = fmt; + + std::string reg1_string = (this->*GetName1)(*reg1); + size_t reg1_index; + while ((reg1_index = base.find(REG1_TOKEN)) != std::string::npos) { + base.replace(reg1_index, ConstexprStrLen(REG1_TOKEN), reg1_string); + } + + std::string reg2_string = (this->*GetName2)(*reg2); + size_t reg2_index; + while ((reg2_index = base.find(REG2_TOKEN)) != std::string::npos) { + base.replace(reg2_index, ConstexprStrLen(REG2_TOKEN), reg2_string); + } + + std::string reg3_string = (this->*GetName3)(*reg3); + size_t reg3_index; + while ((reg3_index = base.find(REG3_TOKEN)) != std::string::npos) { + base.replace(reg3_index, ConstexprStrLen(REG3_TOKEN), reg3_string); + } + + if (str.size() > 0) { + str += "\n"; + } + str += base; + } + } + } + // Add a newline at the end. + str += "\n"; + return str; + } + template <typename Reg1, typename Reg2> std::string RepeatTemplatedRegistersImm(void (Ass::*f)(Reg1, Reg2, const Imm&), const std::vector<Reg1*> reg1_registers, @@ -500,6 +687,7 @@ class AssemblerTest : public testing::Test { static constexpr const char* REG_TOKEN = "{reg}"; static constexpr const char* REG1_TOKEN = "{reg1}"; static constexpr const char* REG2_TOKEN = "{reg2}"; + static constexpr const char* REG3_TOKEN = "{reg3}"; static constexpr const char* IMM_TOKEN = "{imm}"; private: diff --git a/compiler/utils/growable_array.h b/compiler/utils/growable_array.h deleted file mode 100644 index f85e026f16..0000000000 --- a/compiler/utils/growable_array.h +++ /dev/null @@ -1,174 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_UTILS_GROWABLE_ARRAY_H_ -#define ART_COMPILER_UTILS_GROWABLE_ARRAY_H_ - -#include <stdint.h> -#include <stddef.h> - -#include "base/arena_object.h" - -namespace art { - -// Deprecated -// TODO: Replace all uses with ArenaVector<T>. -template<typename T> -class GrowableArray : public ArenaObject<kArenaAllocGrowableArray> { - public: - GrowableArray(ArenaAllocator* arena, size_t init_length) - : arena_(arena), - num_allocated_(init_length), - num_used_(0) { - elem_list_ = arena_->AllocArray<T>(init_length, kArenaAllocGrowableArray); - } - - GrowableArray(ArenaAllocator* arena, size_t init_length, T initial_data) - : arena_(arena), - num_allocated_(init_length), - num_used_(init_length) { - elem_list_ = arena_->AllocArray<T>(init_length, kArenaAllocGrowableArray); - for (size_t i = 0; i < init_length; ++i) { - elem_list_[i] = initial_data; - } - } - - bool Contains(T value, size_t start_from = 0) const { - for (size_t i = start_from; i < num_used_; ++i) { - if (elem_list_[i] == value) { - return true; - } - } - return false; - } - - // Expand the list size to at least new length. - void Resize(size_t new_length) { - if (new_length <= num_allocated_) return; - // If it's a small list double the size, else grow 1.5x. - size_t target_length = - (num_allocated_ < 128) ? num_allocated_ << 1 : num_allocated_ + (num_allocated_ >> 1); - if (new_length > target_length) { - target_length = new_length; - } - T* new_array = arena_->AllocArray<T>(target_length, kArenaAllocGrowableArray); - memcpy(new_array, elem_list_, sizeof(T) * num_allocated_); - num_allocated_ = target_length; - elem_list_ = new_array; - } - - // NOTE: does not return storage, just resets use count. - void Reset() { - num_used_ = 0; - } - - // Insert an element to the end of a list, resizing if necessary. - void Insert(T elem) { - if (num_used_ == num_allocated_) { - Resize(num_used_ + 1); - } - elem_list_[num_used_++] = elem; - } - - void InsertAt(size_t index, T elem) { - DCHECK(index <= Size()); - Insert(elem); - for (size_t i = Size() - 1; i > index; --i) { - elem_list_[i] = elem_list_[i - 1]; - } - elem_list_[index] = elem; - } - - void Add(T elem) { - Insert(elem); - } - - T Get(size_t index) const { - DCHECK_LT(index, num_used_); - return elem_list_[index]; - } - - // Overwrite existing element at position index. List must be large enough. - void Put(size_t index, T elem) { - DCHECK_LT(index, num_used_); - elem_list_[index] = elem; - } - - void Increment(size_t index) { - DCHECK_LT(index, num_used_); - elem_list_[index]++; - } - - /* - * Remove an existing element from list. If there are more than one copy - * of the element, only the first one encountered will be deleted. - */ - // TODO: consider renaming this. - void Delete(T element) { - bool found = false; - for (size_t i = 0; i < num_used_ - 1; i++) { - if (!found && elem_list_[i] == element) { - found = true; - } - if (found) { - elem_list_[i] = elem_list_[i+1]; - } - } - // We should either have found the element, or it was the last (unscanned) element. - DCHECK(found || (element == elem_list_[num_used_ - 1])); - num_used_--; - } - - void DeleteAt(size_t index) { - for (size_t i = index; i < num_used_ - 1; i++) { - elem_list_[i] = elem_list_[i + 1]; - } - num_used_--; - } - - size_t GetNumAllocated() const { return num_allocated_; } - - size_t Size() const { return num_used_; } - - bool IsEmpty() const { return num_used_ == 0; } - - T Pop() { - DCHECK_GE(num_used_, (size_t)0); - return elem_list_[--num_used_]; - } - - T Peek() const { - DCHECK_GE(num_used_, (size_t)0); - return elem_list_[num_used_ - 1]; - } - - void SetSize(size_t new_size) { - Resize(new_size); - num_used_ = new_size; - } - - T* GetRawStorage() const { return elem_list_; } - - private: - ArenaAllocator* const arena_; - size_t num_allocated_; - size_t num_used_; - T* elem_list_; -}; - -} // namespace art - -#endif // ART_COMPILER_UTILS_GROWABLE_ARRAY_H_ diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h index c170313728..d083eb4306 100644 --- a/compiler/utils/mips64/assembler_mips64.h +++ b/compiler/utils/mips64/assembler_mips64.h @@ -46,6 +46,20 @@ enum StoreOperandType { kStoreDoubleword }; +// Used to test the values returned by ClassS/ClassD. +enum FPClassMaskType { + kSignalingNaN = 0x001, + kQuietNaN = 0x002, + kNegativeInfinity = 0x004, + kNegativeNormal = 0x008, + kNegativeSubnormal = 0x010, + kNegativeZero = 0x020, + kPositiveInfinity = 0x040, + kPositiveNormal = 0x080, + kPositiveSubnormal = 0x100, + kPositiveZero = 0x200, +}; + class Mips64Assembler FINAL : public Assembler { public: Mips64Assembler() {} diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc new file mode 100644 index 0000000000..2071aca546 --- /dev/null +++ b/compiler/utils/mips64/assembler_mips64_test.cc @@ -0,0 +1,380 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "assembler_mips64.h" + +#include <inttypes.h> +#include <map> +#include <random> + +#include "base/bit_utils.h" +#include "base/stl_util.h" +#include "utils/assembler_test.h" + +namespace art { + +struct MIPS64CpuRegisterCompare { + bool operator()(const mips64::GpuRegister& a, const mips64::GpuRegister& b) const { + return a < b; + } +}; + +class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler, + mips64::GpuRegister, + mips64::FpuRegister, + uint32_t> { + public: + typedef AssemblerTest<mips64::Mips64Assembler, + mips64::GpuRegister, + mips64::FpuRegister, + uint32_t> Base; + + protected: + // Get the typically used name for this architecture, e.g., aarch64, x86-64, ... + std::string GetArchitectureString() OVERRIDE { + return "mips64"; + } + + std::string GetAssemblerParameters() OVERRIDE { + return " --no-warn -march=mips64r6"; + } + + std::string GetDisassembleParameters() OVERRIDE { + return " -D -bbinary -mmips:isa64r6"; + } + + void SetUpHelpers() OVERRIDE { + if (registers_.size() == 0) { + registers_.push_back(new mips64::GpuRegister(mips64::ZERO)); + registers_.push_back(new mips64::GpuRegister(mips64::AT)); + registers_.push_back(new mips64::GpuRegister(mips64::V0)); + registers_.push_back(new mips64::GpuRegister(mips64::V1)); + registers_.push_back(new mips64::GpuRegister(mips64::A0)); + registers_.push_back(new mips64::GpuRegister(mips64::A1)); + registers_.push_back(new mips64::GpuRegister(mips64::A2)); + registers_.push_back(new mips64::GpuRegister(mips64::A3)); + registers_.push_back(new mips64::GpuRegister(mips64::A4)); + registers_.push_back(new mips64::GpuRegister(mips64::A5)); + registers_.push_back(new mips64::GpuRegister(mips64::A6)); + registers_.push_back(new mips64::GpuRegister(mips64::A7)); + registers_.push_back(new mips64::GpuRegister(mips64::T0)); + registers_.push_back(new mips64::GpuRegister(mips64::T1)); + registers_.push_back(new mips64::GpuRegister(mips64::T2)); + registers_.push_back(new mips64::GpuRegister(mips64::T3)); + registers_.push_back(new mips64::GpuRegister(mips64::S0)); + registers_.push_back(new mips64::GpuRegister(mips64::S1)); + registers_.push_back(new mips64::GpuRegister(mips64::S2)); + registers_.push_back(new mips64::GpuRegister(mips64::S3)); + registers_.push_back(new mips64::GpuRegister(mips64::S4)); + registers_.push_back(new mips64::GpuRegister(mips64::S5)); + registers_.push_back(new mips64::GpuRegister(mips64::S6)); + registers_.push_back(new mips64::GpuRegister(mips64::S7)); + registers_.push_back(new mips64::GpuRegister(mips64::T8)); + registers_.push_back(new mips64::GpuRegister(mips64::T9)); + registers_.push_back(new mips64::GpuRegister(mips64::K0)); + registers_.push_back(new mips64::GpuRegister(mips64::K1)); + registers_.push_back(new mips64::GpuRegister(mips64::GP)); + registers_.push_back(new mips64::GpuRegister(mips64::SP)); + registers_.push_back(new mips64::GpuRegister(mips64::S8)); + registers_.push_back(new mips64::GpuRegister(mips64::RA)); + + secondary_register_names_.emplace(mips64::GpuRegister(mips64::ZERO), "zero"); + secondary_register_names_.emplace(mips64::GpuRegister(mips64::AT), "at"); + secondary_register_names_.emplace(mips64::GpuRegister(mips64::V0), "v0"); + secondary_register_names_.emplace(mips64::GpuRegister(mips64::V1), "v1"); + secondary_register_names_.emplace(mips64::GpuRegister(mips64::A0), "a0"); + secondary_register_names_.emplace(mips64::GpuRegister(mips64::A1), "a1"); + secondary_register_names_.emplace(mips64::GpuRegister(mips64::A2), "a2"); + secondary_register_names_.emplace(mips64::GpuRegister(mips64::A3), "a3"); + secondary_register_names_.emplace(mips64::GpuRegister(mips64::A4), "a4"); + secondary_register_names_.emplace(mips64::GpuRegister(mips64::A5), "a5"); + secondary_register_names_.emplace(mips64::GpuRegister(mips64::A6), "a6"); + secondary_register_names_.emplace(mips64::GpuRegister(mips64::A7), "a7"); + secondary_register_names_.emplace(mips64::GpuRegister(mips64::T0), "t0"); + secondary_register_names_.emplace(mips64::GpuRegister(mips64::T1), "t1"); + secondary_register_names_.emplace(mips64::GpuRegister(mips64::T2), "t2"); + secondary_register_names_.emplace(mips64::GpuRegister(mips64::T3), "t3"); + secondary_register_names_.emplace(mips64::GpuRegister(mips64::S0), "s0"); + secondary_register_names_.emplace(mips64::GpuRegister(mips64::S1), "s1"); + secondary_register_names_.emplace(mips64::GpuRegister(mips64::S2), "s2"); + secondary_register_names_.emplace(mips64::GpuRegister(mips64::S3), "s3"); + secondary_register_names_.emplace(mips64::GpuRegister(mips64::S4), "s4"); + secondary_register_names_.emplace(mips64::GpuRegister(mips64::S5), "s5"); + secondary_register_names_.emplace(mips64::GpuRegister(mips64::S6), "s6"); + secondary_register_names_.emplace(mips64::GpuRegister(mips64::S7), "s7"); + secondary_register_names_.emplace(mips64::GpuRegister(mips64::T8), "t8"); + secondary_register_names_.emplace(mips64::GpuRegister(mips64::T9), "t9"); + secondary_register_names_.emplace(mips64::GpuRegister(mips64::K0), "k0"); + secondary_register_names_.emplace(mips64::GpuRegister(mips64::K1), "k1"); + secondary_register_names_.emplace(mips64::GpuRegister(mips64::GP), "gp"); + secondary_register_names_.emplace(mips64::GpuRegister(mips64::SP), "sp"); + secondary_register_names_.emplace(mips64::GpuRegister(mips64::S8), "s8"); + secondary_register_names_.emplace(mips64::GpuRegister(mips64::RA), "ra"); + + fp_registers_.push_back(new mips64::FpuRegister(mips64::F0)); + fp_registers_.push_back(new mips64::FpuRegister(mips64::F1)); + fp_registers_.push_back(new mips64::FpuRegister(mips64::F2)); + fp_registers_.push_back(new mips64::FpuRegister(mips64::F3)); + fp_registers_.push_back(new mips64::FpuRegister(mips64::F4)); + fp_registers_.push_back(new mips64::FpuRegister(mips64::F5)); + fp_registers_.push_back(new mips64::FpuRegister(mips64::F6)); + fp_registers_.push_back(new mips64::FpuRegister(mips64::F7)); + fp_registers_.push_back(new mips64::FpuRegister(mips64::F8)); + fp_registers_.push_back(new mips64::FpuRegister(mips64::F9)); + fp_registers_.push_back(new mips64::FpuRegister(mips64::F10)); + fp_registers_.push_back(new mips64::FpuRegister(mips64::F11)); + fp_registers_.push_back(new mips64::FpuRegister(mips64::F12)); + fp_registers_.push_back(new mips64::FpuRegister(mips64::F13)); + fp_registers_.push_back(new mips64::FpuRegister(mips64::F14)); + fp_registers_.push_back(new mips64::FpuRegister(mips64::F15)); + fp_registers_.push_back(new mips64::FpuRegister(mips64::F16)); + fp_registers_.push_back(new mips64::FpuRegister(mips64::F17)); + fp_registers_.push_back(new mips64::FpuRegister(mips64::F18)); + fp_registers_.push_back(new mips64::FpuRegister(mips64::F19)); + fp_registers_.push_back(new mips64::FpuRegister(mips64::F20)); + fp_registers_.push_back(new mips64::FpuRegister(mips64::F21)); + fp_registers_.push_back(new mips64::FpuRegister(mips64::F22)); + fp_registers_.push_back(new mips64::FpuRegister(mips64::F23)); + fp_registers_.push_back(new mips64::FpuRegister(mips64::F24)); + fp_registers_.push_back(new mips64::FpuRegister(mips64::F25)); + fp_registers_.push_back(new mips64::FpuRegister(mips64::F26)); + fp_registers_.push_back(new mips64::FpuRegister(mips64::F27)); + fp_registers_.push_back(new mips64::FpuRegister(mips64::F28)); + fp_registers_.push_back(new mips64::FpuRegister(mips64::F29)); + fp_registers_.push_back(new mips64::FpuRegister(mips64::F30)); + fp_registers_.push_back(new mips64::FpuRegister(mips64::F31)); + } + } + + void TearDown() OVERRIDE { + AssemblerTest::TearDown(); + STLDeleteElements(®isters_); + STLDeleteElements(&fp_registers_); + } + + std::vector<mips64::GpuRegister*> GetRegisters() OVERRIDE { + return registers_; + } + + std::vector<mips64::FpuRegister*> GetFPRegisters() OVERRIDE { + return fp_registers_; + } + + uint32_t CreateImmediate(int64_t imm_value) OVERRIDE { + return imm_value; + } + + std::string GetSecondaryRegisterName(const mips64::GpuRegister& reg) OVERRIDE { + CHECK(secondary_register_names_.find(reg) != secondary_register_names_.end()); + return secondary_register_names_[reg]; + } + + private: + std::vector<mips64::GpuRegister*> registers_; + std::map<mips64::GpuRegister, std::string, MIPS64CpuRegisterCompare> secondary_register_names_; + + std::vector<mips64::FpuRegister*> fp_registers_; +}; + + +TEST_F(AssemblerMIPS64Test, Toolchain) { + EXPECT_TRUE(CheckTools()); +} + + +/////////////////// +// FP Operations // +/////////////////// + +TEST_F(AssemblerMIPS64Test, SqrtS) { + DriverStr(RepeatFF(&mips64::Mips64Assembler::SqrtS, "sqrt.s ${reg1}, ${reg2}"), "sqrt.s"); +} + +TEST_F(AssemblerMIPS64Test, SqrtD) { + DriverStr(RepeatFF(&mips64::Mips64Assembler::SqrtD, "sqrt.d ${reg1}, ${reg2}"), "sqrt.d"); +} + +TEST_F(AssemblerMIPS64Test, AbsS) { + DriverStr(RepeatFF(&mips64::Mips64Assembler::AbsS, "abs.s ${reg1}, ${reg2}"), "abs.s"); +} + +TEST_F(AssemblerMIPS64Test, AbsD) { + DriverStr(RepeatFF(&mips64::Mips64Assembler::AbsD, "abs.d ${reg1}, ${reg2}"), "abs.d"); +} + +TEST_F(AssemblerMIPS64Test, RoundLS) { + DriverStr(RepeatFF(&mips64::Mips64Assembler::RoundLS, "round.l.s ${reg1}, ${reg2}"), "round.l.s"); +} + +TEST_F(AssemblerMIPS64Test, RoundLD) { + DriverStr(RepeatFF(&mips64::Mips64Assembler::RoundLD, "round.l.d ${reg1}, ${reg2}"), "round.l.d"); +} + +TEST_F(AssemblerMIPS64Test, RoundWS) { + DriverStr(RepeatFF(&mips64::Mips64Assembler::RoundWS, "round.w.s ${reg1}, ${reg2}"), "round.w.s"); +} + +TEST_F(AssemblerMIPS64Test, RoundWD) { + DriverStr(RepeatFF(&mips64::Mips64Assembler::RoundWD, "round.w.d ${reg1}, ${reg2}"), "round.w.d"); +} + +TEST_F(AssemblerMIPS64Test, CeilLS) { + DriverStr(RepeatFF(&mips64::Mips64Assembler::CeilLS, "ceil.l.s ${reg1}, ${reg2}"), "ceil.l.s"); +} + +TEST_F(AssemblerMIPS64Test, CeilLD) { + DriverStr(RepeatFF(&mips64::Mips64Assembler::CeilLD, "ceil.l.d ${reg1}, ${reg2}"), "ceil.l.d"); +} + +TEST_F(AssemblerMIPS64Test, CeilWS) { + DriverStr(RepeatFF(&mips64::Mips64Assembler::CeilWS, "ceil.w.s ${reg1}, ${reg2}"), "ceil.w.s"); +} + +TEST_F(AssemblerMIPS64Test, CeilWD) { + DriverStr(RepeatFF(&mips64::Mips64Assembler::CeilWD, "ceil.w.d ${reg1}, ${reg2}"), "ceil.w.d"); +} + +TEST_F(AssemblerMIPS64Test, FloorLS) { + DriverStr(RepeatFF(&mips64::Mips64Assembler::FloorLS, "floor.l.s ${reg1}, ${reg2}"), "floor.l.s"); +} + +TEST_F(AssemblerMIPS64Test, FloorLD) { + DriverStr(RepeatFF(&mips64::Mips64Assembler::FloorLD, "floor.l.d ${reg1}, ${reg2}"), "floor.l.d"); +} + +TEST_F(AssemblerMIPS64Test, FloorWS) { + DriverStr(RepeatFF(&mips64::Mips64Assembler::FloorWS, "floor.w.s ${reg1}, ${reg2}"), "floor.w.s"); +} + +TEST_F(AssemblerMIPS64Test, FloorWD) { + DriverStr(RepeatFF(&mips64::Mips64Assembler::FloorWD, "floor.w.d ${reg1}, ${reg2}"), "floor.w.d"); +} + +TEST_F(AssemblerMIPS64Test, SelS) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::SelS, "sel.s ${reg1}, ${reg2}, ${reg3}"), "sel.s"); +} + +TEST_F(AssemblerMIPS64Test, SelD) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::SelD, "sel.d ${reg1}, ${reg2}, ${reg3}"), "sel.d"); +} + +TEST_F(AssemblerMIPS64Test, RintS) { + DriverStr(RepeatFF(&mips64::Mips64Assembler::RintS, "rint.s ${reg1}, ${reg2}"), "rint.s"); +} + +TEST_F(AssemblerMIPS64Test, RintD) { + DriverStr(RepeatFF(&mips64::Mips64Assembler::RintD, "rint.d ${reg1}, ${reg2}"), "rint.d"); +} + +TEST_F(AssemblerMIPS64Test, ClassS) { + DriverStr(RepeatFF(&mips64::Mips64Assembler::ClassS, "class.s ${reg1}, ${reg2}"), "class.s"); +} + +TEST_F(AssemblerMIPS64Test, ClassD) { + DriverStr(RepeatFF(&mips64::Mips64Assembler::ClassD, "class.d ${reg1}, ${reg2}"), "class.d"); +} + +TEST_F(AssemblerMIPS64Test, MinS) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::MinS, "min.s ${reg1}, ${reg2}, ${reg3}"), "min.s"); +} + +TEST_F(AssemblerMIPS64Test, MinD) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::MinD, "min.d ${reg1}, ${reg2}, ${reg3}"), "min.d"); +} + +TEST_F(AssemblerMIPS64Test, MaxS) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::MaxS, "max.s ${reg1}, ${reg2}, ${reg3}"), "max.s"); +} + +TEST_F(AssemblerMIPS64Test, MaxD) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::MaxD, "max.d ${reg1}, ${reg2}, ${reg3}"), "max.d"); +} + +TEST_F(AssemblerMIPS64Test, CvtDL) { + DriverStr(RepeatFF(&mips64::Mips64Assembler::Cvtdl, "cvt.d.l ${reg1}, ${reg2}"), "cvt.d.l"); +} + +////////// +// MISC // +////////// + +TEST_F(AssemblerMIPS64Test, Bitswap) { + DriverStr(RepeatRR(&mips64::Mips64Assembler::Bitswap, "bitswap ${reg1}, ${reg2}"), "bitswap"); +} + +TEST_F(AssemblerMIPS64Test, Dbitswap) { + DriverStr(RepeatRR(&mips64::Mips64Assembler::Dbitswap, "dbitswap ${reg1}, ${reg2}"), "dbitswap"); +} + +TEST_F(AssemblerMIPS64Test, Dsbh) { + DriverStr(RepeatRR(&mips64::Mips64Assembler::Dsbh, "dsbh ${reg1}, ${reg2}"), "dsbh"); +} + +TEST_F(AssemblerMIPS64Test, Dshd) { + DriverStr(RepeatRR(&mips64::Mips64Assembler::Dshd, "dshd ${reg1}, ${reg2}"), "dshd"); +} + +TEST_F(AssemblerMIPS64Test, Wsbh) { + DriverStr(RepeatRR(&mips64::Mips64Assembler::Wsbh, "wsbh ${reg1}, ${reg2}"), "wsbh"); +} + +TEST_F(AssemblerMIPS64Test, Sc) { + DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Sc, -9, "sc ${reg1}, {imm}(${reg2})"), "sc"); +} + +TEST_F(AssemblerMIPS64Test, Scd) { + DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Scd, -9, "scd ${reg1}, {imm}(${reg2})"), "scd"); +} + +TEST_F(AssemblerMIPS64Test, Ll) { + DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Ll, -9, "ll ${reg1}, {imm}(${reg2})"), "ll"); +} + +TEST_F(AssemblerMIPS64Test, Lld) { + DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Lld, -9, "lld ${reg1}, {imm}(${reg2})"), "lld"); +} + +TEST_F(AssemblerMIPS64Test, Rotr) { + DriverStr(RepeatRRIb(&mips64::Mips64Assembler::Rotr, 5, "rotr ${reg1}, ${reg2}, {imm}"), "rotr"); +} + +TEST_F(AssemblerMIPS64Test, Seleqz) { + DriverStr(RepeatRRR(&mips64::Mips64Assembler::Seleqz, "seleqz ${reg1}, ${reg2}, ${reg3}"), + "seleqz"); +} + +TEST_F(AssemblerMIPS64Test, Selnez) { + DriverStr(RepeatRRR(&mips64::Mips64Assembler::Selnez, "selnez ${reg1}, ${reg2}, ${reg3}"), + "selnez"); +} + +TEST_F(AssemblerMIPS64Test, Clz) { + DriverStr(RepeatRR(&mips64::Mips64Assembler::Clz, "clz ${reg1}, ${reg2}"), "clz"); +} + +TEST_F(AssemblerMIPS64Test, Clo) { + DriverStr(RepeatRR(&mips64::Mips64Assembler::Clo, "clo ${reg1}, ${reg2}"), "clo"); +} + +TEST_F(AssemblerMIPS64Test, Dclz) { + DriverStr(RepeatRR(&mips64::Mips64Assembler::Dclz, "dclz ${reg1}, ${reg2}"), "dclz"); +} + +TEST_F(AssemblerMIPS64Test, Dclo) { + DriverStr(RepeatRR(&mips64::Mips64Assembler::Dclo, "dclo ${reg1}, ${reg2}"), "dclo"); +} + +} // namespace art diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc index cc32da1e59..680e2d7b45 100644 --- a/dex2oat/dex2oat.cc +++ b/dex2oat/dex2oat.cc @@ -235,11 +235,7 @@ NO_RETURN static void Usage(const char* fmt, ...) { UsageError(" --compiler-backend=(Quick|Optimizing): select compiler backend"); UsageError(" set."); UsageError(" Example: --compiler-backend=Optimizing"); - if (kUseOptimizingCompiler) { - UsageError(" Default: Optimizing"); - } else { - UsageError(" Default: Quick"); - } + UsageError(" Default: Optimizing"); UsageError(""); UsageError(" --compiler-filter=" "(verify-none" @@ -503,7 +499,7 @@ static bool UseSwap(bool is_image, std::vector<const DexFile*>& dex_files) { class Dex2Oat FINAL { public: explicit Dex2Oat(TimingLogger* timings) : - compiler_kind_(kUseOptimizingCompiler ? Compiler::kOptimizing : Compiler::kQuick), + compiler_kind_(Compiler::kOptimizing), instruction_set_(kRuntimeISA), // Take the default set of instruction features from the build. verification_results_(nullptr), @@ -752,10 +748,9 @@ class Dex2Oat FINAL { void ProcessOptions(ParserOptions* parser_options) { image_ = (!image_filename_.empty()); - if (!parser_options->requested_specific_compiler && !kUseOptimizingCompiler) { - // If no specific compiler is requested, the current behavior is - // to compile the boot image with Quick, and the rest with Optimizing. - compiler_kind_ = image_ ? Compiler::kQuick : Compiler::kOptimizing; + if (image_) { + // We need the boot image to always be debuggable. + parser_options->debuggable = true; } if (oat_filename_.empty() && oat_fd_ == -1) { diff --git a/dexdump/Android.mk b/dexdump/Android.mk index a208ccf89b..ec2529e18f 100755 --- a/dexdump/Android.mk +++ b/dexdump/Android.mk @@ -34,8 +34,6 @@ LOCAL_C_INCLUDES := $(dexdump_c_includes) LOCAL_CFLAGS += -Wall LOCAL_SHARED_LIBRARIES += $(dexdump_libraries) LOCAL_MODULE := dexdump2 -LOCAL_MODULE_TAGS := optional -LOCAL_MODULE_PATH := $(TARGET_OUT_OPTIONAL_EXECUTABLES) include $(BUILD_EXECUTABLE) endif # !SDK_ONLY diff --git a/dexdump/dexdump_test.cc b/dexdump/dexdump_test.cc index d9b210d767..4230cb26b7 100644 --- a/dexdump/dexdump_test.cc +++ b/dexdump/dexdump_test.cc @@ -43,12 +43,7 @@ class DexDumpTest : public CommonRuntimeTest { // Runs test with given arguments. bool Exec(const std::vector<std::string>& args, std::string* error_msg) { // TODO(ajcbik): dexdump2 -> dexdump - std::string file_path = GetTestAndroidRoot(); - if (IsHost()) { - file_path += "/bin/dexdump2"; - } else { - file_path += "/xbin/dexdump2"; - } + std::string file_path = GetTestAndroidRoot() + "/bin/dexdump2"; EXPECT_TRUE(OS::FileExists(file_path.c_str())) << file_path << " should be a valid file path"; std::vector<std::string> exec_argv = { file_path }; exec_argv.insert(exec_argv.end(), args.begin(), args.end()); diff --git a/imgdiag/imgdiag_test.cc b/imgdiag/imgdiag_test.cc index 1ac79304df..82bc8b9521 100644 --- a/imgdiag/imgdiag_test.cc +++ b/imgdiag/imgdiag_test.cc @@ -109,11 +109,12 @@ class ImgDiagTest : public CommonRuntimeTest { std::string boot_image_location_; }; -#if defined (ART_TARGET) +#if defined (ART_TARGET) && !defined(__mips__) TEST_F(ImgDiagTest, ImageDiffPidSelf) { #else // Can't run this test on the host, it will fail when trying to open /proc/kpagestats // because it's root read-only. +// Also test fails on mips. b/24596015. TEST_F(ImgDiagTest, DISABLED_ImageDiffPidSelf) { #endif // Invoke 'img_diag' against the current process. diff --git a/runtime/Android.mk b/runtime/Android.mk index 995a1d5c0d..8d81f2a7f6 100644 --- a/runtime/Android.mk +++ b/runtime/Android.mk @@ -67,6 +67,7 @@ LIBART_COMMON_SRC_FILES := \ gc/heap.cc \ gc/reference_processor.cc \ gc/reference_queue.cc \ + gc/scoped_gc_critical_section.cc \ gc/space/bump_pointer_space.cc \ gc/space/dlmalloc_space.cc \ gc/space/image_space.cc \ @@ -104,6 +105,7 @@ LIBART_COMMON_SRC_FILES := \ lambda/box_table.cc \ lambda/closure.cc \ lambda/closure_builder.cc \ + lambda/leaking_allocator.cc \ jni_internal.cc \ jobject_comparator.cc \ linear_alloc.cc \ diff --git a/runtime/arch/arm/context_arm.cc b/runtime/arch/arm/context_arm.cc index 403d348752..8f6b1ff0a5 100644 --- a/runtime/arch/arm/context_arm.cc +++ b/runtime/arch/arm/context_arm.cc @@ -30,9 +30,11 @@ void ArmContext::Reset() { std::fill_n(fprs_, arraysize(fprs_), nullptr); gprs_[SP] = &sp_; gprs_[PC] = &pc_; + gprs_[R0] = &arg0_; // Initialize registers with easy to spot debug values. sp_ = ArmContext::kBadGprBase + SP; pc_ = ArmContext::kBadGprBase + PC; + arg0_ = 0; } void ArmContext::FillCalleeSaves(const StackVisitor& fr) { diff --git a/runtime/arch/arm/context_arm.h b/runtime/arch/arm/context_arm.h index 77bb5c8399..ea31055e9d 100644 --- a/runtime/arch/arm/context_arm.h +++ b/runtime/arch/arm/context_arm.h @@ -45,6 +45,10 @@ class ArmContext : public Context { SetGPR(PC, new_pc); } + void SetArg0(uintptr_t new_arg0_value) OVERRIDE { + SetGPR(R0, new_arg0_value); + } + bool IsAccessibleGPR(uint32_t reg) OVERRIDE { DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCoreRegisters)); return gprs_[reg] != nullptr; @@ -84,7 +88,7 @@ class ArmContext : public Context { uintptr_t* gprs_[kNumberOfCoreRegisters]; uint32_t* fprs_[kNumberOfSRegisters]; // Hold values for sp and pc if they are not located within a stack frame. - uintptr_t sp_, pc_; + uintptr_t sp_, pc_, arg0_; }; } // namespace arm diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S index e45d828584..d09631bc71 100644 --- a/runtime/arch/arm/quick_entrypoints_arm.S +++ b/runtime/arch/arm/quick_entrypoints_arm.S @@ -437,8 +437,8 @@ ARM_ENTRY art_quick_do_long_jump ldr r14, [r0, #56] @ (LR from gprs_ 56=4*14) add r0, r0, #12 @ increment r0 to skip gprs_[0..2] 12=4*3 ldm r0, {r3-r13} @ load remaining gprs from argument gprs_ - mov r0, #0 @ clear result registers r0 and r1 - mov r1, #0 + ldr r0, [r0, #-12] @ load r0 value + mov r1, #0 @ clear result register r1 bx r2 @ do long jump END art_quick_do_long_jump @@ -839,13 +839,12 @@ TWO_ARG_REF_DOWNCALL art_quick_set32_static, artSet32StaticFromCode, RETURN_IF_R TWO_ARG_REF_DOWNCALL art_quick_set_obj_static, artSetObjStaticFromCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER /* * Called by managed code to resolve a static field and store a 64-bit primitive value. - * On entry r0 holds field index, r1:r2 hold new_val + * On entry r0 holds field index, r2:r3 hold new_val */ .extern artSet64StaticFromCode ENTRY art_quick_set64_static - SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r3, r12 @ save callee saves in case of GC - mov r3, r2 @ pass one half of wide argument - mov r2, r1 @ pass other half of wide argument + SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1, r12 @ save callee saves in case of GC + @ r2:r3 contain the wide argument ldr r1, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE] @ pass referrer str r9, [sp, #-16]! @ expand the frame and pass Thread::Current .cfi_adjust_cfa_offset 16 @@ -870,6 +869,7 @@ THREE_ARG_REF_DOWNCALL art_quick_set_obj_instance, artSetObjInstanceFromCode, RE .extern artSet64InstanceFromCode ENTRY art_quick_set64_instance SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r12, lr @ save callee saves in case of GC + @ r2:r3 contain the wide argument ldr r12, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE] @ pass referrer str r9, [sp, #-12]! @ expand the frame and pass Thread::Current .cfi_adjust_cfa_offset 12 @@ -1142,7 +1142,7 @@ END art_quick_deoptimize /* * Compiled code has requested that we deoptimize into the interpreter. The deoptimization - * will long jump to the upcall with a special exception of -1. + * will long jump to the interpreter bridge. */ .extern artDeoptimizeFromCompiledCode ENTRY art_quick_deoptimize_from_compiled_code diff --git a/runtime/arch/arm64/context_arm64.cc b/runtime/arch/arm64/context_arm64.cc index 60becc6aea..4477631c67 100644 --- a/runtime/arch/arm64/context_arm64.cc +++ b/runtime/arch/arm64/context_arm64.cc @@ -31,10 +31,12 @@ void Arm64Context::Reset() { std::fill_n(gprs_, arraysize(gprs_), nullptr); std::fill_n(fprs_, arraysize(fprs_), nullptr); gprs_[SP] = &sp_; - gprs_[LR] = &pc_; + gprs_[kPC] = &pc_; + gprs_[X0] = &arg0_; // Initialize registers with easy to spot debug values. sp_ = Arm64Context::kBadGprBase + SP; - pc_ = Arm64Context::kBadGprBase + LR; + pc_ = Arm64Context::kBadGprBase + kPC; + arg0_ = 0; } void Arm64Context::FillCalleeSaves(const StackVisitor& fr) { @@ -58,8 +60,8 @@ void Arm64Context::FillCalleeSaves(const StackVisitor& fr) { } void Arm64Context::SetGPR(uint32_t reg, uintptr_t value) { - DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfXRegisters)); - DCHECK_NE(reg, static_cast<uint32_t>(XZR)); + DCHECK_LT(reg, arraysize(gprs_)); + // Note: we use kPC == XZR, so do not ensure that reg != XZR. DCHECK(IsAccessibleGPR(reg)); DCHECK_NE(gprs_[reg], &gZero); // Can't overwrite this static value since they are never reset. *gprs_[reg] = value; @@ -124,13 +126,13 @@ void Arm64Context::SmashCallerSaves() { extern "C" NO_RETURN void art_quick_do_long_jump(uint64_t*, uint64_t*); void Arm64Context::DoLongJump() { - uint64_t gprs[kNumberOfXRegisters]; + uint64_t gprs[arraysize(gprs_)]; uint64_t fprs[kNumberOfDRegisters]; // The long jump routine called below expects to find the value for SP at index 31. DCHECK_EQ(SP, 31); - for (size_t i = 0; i < kNumberOfXRegisters; ++i) { + for (size_t i = 0; i < arraysize(gprs_); ++i) { gprs[i] = gprs_[i] != nullptr ? *gprs_[i] : Arm64Context::kBadGprBase + i; } for (size_t i = 0; i < kNumberOfDRegisters; ++i) { diff --git a/runtime/arch/arm64/context_arm64.h b/runtime/arch/arm64/context_arm64.h index 1c99f3c42d..11314e08ed 100644 --- a/runtime/arch/arm64/context_arm64.h +++ b/runtime/arch/arm64/context_arm64.h @@ -42,20 +42,25 @@ class Arm64Context : public Context { } void SetPC(uintptr_t new_lr) OVERRIDE { - SetGPR(LR, new_lr); + SetGPR(kPC, new_lr); + } + + void SetArg0(uintptr_t new_arg0_value) OVERRIDE { + SetGPR(X0, new_arg0_value); } bool IsAccessibleGPR(uint32_t reg) OVERRIDE { - DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfXRegisters)); + DCHECK_LT(reg, arraysize(gprs_)); return gprs_[reg] != nullptr; } uintptr_t* GetGPRAddress(uint32_t reg) OVERRIDE { - DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfXRegisters)); + DCHECK_LT(reg, arraysize(gprs_)); return gprs_[reg]; } uintptr_t GetGPR(uint32_t reg) OVERRIDE { + // Note: PC isn't an available GPR (outside of internals), so don't allow retrieving the value. DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfXRegisters)); DCHECK(IsAccessibleGPR(reg)); return *gprs_[reg]; @@ -79,12 +84,15 @@ class Arm64Context : public Context { void SmashCallerSaves() OVERRIDE; NO_RETURN void DoLongJump() OVERRIDE; + static constexpr size_t kPC = kNumberOfXRegisters; + private: - // Pointers to register locations, initialized to null or the specific registers below. - uintptr_t* gprs_[kNumberOfXRegisters]; + // Pointers to register locations, initialized to null or the specific registers below. We need + // an additional one for the PC. + uintptr_t* gprs_[kNumberOfXRegisters + 1]; uint64_t * fprs_[kNumberOfDRegisters]; - // Hold values for sp and pc if they are not located within a stack frame. - uintptr_t sp_, pc_; + // Hold values for sp, pc and arg0 if they are not located within a stack frame. + uintptr_t sp_, pc_, arg0_; }; } // namespace arm64 diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S index 169bc384a8..be5a15ec39 100644 --- a/runtime/arch/arm64/quick_entrypoints_arm64.S +++ b/runtime/arch/arm64/quick_entrypoints_arm64.S @@ -941,7 +941,7 @@ ENTRY art_quick_do_long_jump // Load GPRs // TODO: lots of those are smashed, could optimize. add x0, x0, #30*8 - ldp x30, x1, [x0], #-16 + ldp x30, x1, [x0], #-16 // LR & SP ldp x28, x29, [x0], #-16 ldp x26, x27, [x0], #-16 ldp x24, x25, [x0], #-16 @@ -958,10 +958,12 @@ ENTRY art_quick_do_long_jump ldp x2, x3, [x0], #-16 mov sp, x1 - // TODO: Is it really OK to use LR for the target PC? - mov x0, #0 - mov x1, #0 - br xLR + // Need to load PC, it's at the end (after the space for the unused XZR). Use x1. + ldr x1, [x0, #33*8] + // And the value of x0. + ldr x0, [x0] + + br x1 END art_quick_do_long_jump /* @@ -1419,9 +1421,8 @@ THREE_ARG_REF_DOWNCALL art_quick_set_obj_instance, artSetObjInstanceFromCode, RE .extern artSet64StaticFromCode ENTRY art_quick_set64_static SETUP_REFS_ONLY_CALLEE_SAVE_FRAME // save callee saves in case of GC - mov x3, x1 // Store value ldr x1, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE] // Load referrer - mov x2, x3 // Put value param + // x2 contains the parameter mov x3, xSELF // pass Thread::Current bl artSet64StaticFromCode RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME diff --git a/runtime/arch/context.h b/runtime/arch/context.h index 9ef761e981..9af7c04f5c 100644 --- a/runtime/arch/context.h +++ b/runtime/arch/context.h @@ -50,6 +50,9 @@ class Context { // Sets the program counter value. virtual void SetPC(uintptr_t new_pc) = 0; + // Sets the first argument register. + virtual void SetArg0(uintptr_t new_arg0_value) = 0; + // Returns whether the given GPR is accessible (read or write). virtual bool IsAccessibleGPR(uint32_t reg) = 0; diff --git a/runtime/arch/mips/context_mips.cc b/runtime/arch/mips/context_mips.cc index bc2bf68993..08ab356855 100644 --- a/runtime/arch/mips/context_mips.cc +++ b/runtime/arch/mips/context_mips.cc @@ -30,9 +30,11 @@ void MipsContext::Reset() { std::fill_n(fprs_, arraysize(fprs_), nullptr); gprs_[SP] = &sp_; gprs_[RA] = &ra_; + gprs_[A0] = &arg0_; // Initialize registers with easy to spot debug values. sp_ = MipsContext::kBadGprBase + SP; ra_ = MipsContext::kBadGprBase + RA; + arg0_ = 0; } void MipsContext::FillCalleeSaves(const StackVisitor& fr) { diff --git a/runtime/arch/mips/context_mips.h b/runtime/arch/mips/context_mips.h index 38cf29a6aa..0affe5397a 100644 --- a/runtime/arch/mips/context_mips.h +++ b/runtime/arch/mips/context_mips.h @@ -78,12 +78,17 @@ class MipsContext : public Context { void SmashCallerSaves() OVERRIDE; NO_RETURN void DoLongJump() OVERRIDE; + void SetArg0(uintptr_t new_arg0_value) OVERRIDE { + SetGPR(A0, new_arg0_value); + } + private: // Pointers to registers in the stack, initialized to null except for the special cases below. uintptr_t* gprs_[kNumberOfCoreRegisters]; uint32_t* fprs_[kNumberOfFRegisters]; - // Hold values for sp and ra (return address) if they are not located within a stack frame. - uintptr_t sp_, ra_; + // Hold values for sp and ra (return address) if they are not located within a stack frame, as + // well as the first argument. + uintptr_t sp_, ra_, arg0_; }; } // namespace mips } // namespace art diff --git a/runtime/arch/mips/memcmp16_mips.S b/runtime/arch/mips/memcmp16_mips.S index aef81afeca..c8eac9bd1c 100644 --- a/runtime/arch/mips/memcmp16_mips.S +++ b/runtime/arch/mips/memcmp16_mips.S @@ -26,7 +26,7 @@ ENTRY_NO_GP __memcmp16 beqz $a2,done /* 0 length string */ beq $a0,$a1,done /* strings are identical */ - /* Unoptimised... */ + /* Unoptimized... */ 1: lhu $t0,0($a0) lhu $t1,0($a1) addu $a1,2 diff --git a/runtime/arch/mips64/context_mips64.cc b/runtime/arch/mips64/context_mips64.cc index 6637c371d2..2c17f1c118 100644 --- a/runtime/arch/mips64/context_mips64.cc +++ b/runtime/arch/mips64/context_mips64.cc @@ -29,10 +29,12 @@ void Mips64Context::Reset() { std::fill_n(gprs_, arraysize(gprs_), nullptr); std::fill_n(fprs_, arraysize(fprs_), nullptr); gprs_[SP] = &sp_; - gprs_[RA] = &ra_; + gprs_[T9] = &t9_; + gprs_[A0] = &arg0_; // Initialize registers with easy to spot debug values. sp_ = Mips64Context::kBadGprBase + SP; - ra_ = Mips64Context::kBadGprBase + RA; + t9_ = Mips64Context::kBadGprBase + T9; + arg0_ = 0; } void Mips64Context::FillCalleeSaves(const StackVisitor& fr) { diff --git a/runtime/arch/mips64/context_mips64.h b/runtime/arch/mips64/context_mips64.h index e4a144f420..84b1c9bad4 100644 --- a/runtime/arch/mips64/context_mips64.h +++ b/runtime/arch/mips64/context_mips64.h @@ -41,7 +41,7 @@ class Mips64Context : public Context { } void SetPC(uintptr_t new_pc) OVERRIDE { - SetGPR(RA, new_pc); + SetGPR(T9, new_pc); } bool IsAccessibleGPR(uint32_t reg) OVERRIDE { @@ -78,13 +78,20 @@ class Mips64Context : public Context { void SmashCallerSaves() OVERRIDE; NO_RETURN void DoLongJump() OVERRIDE; + void SetArg0(uintptr_t new_arg0_value) OVERRIDE { + SetGPR(A0, new_arg0_value); + } + private: // Pointers to registers in the stack, initialized to null except for the special cases below. uintptr_t* gprs_[kNumberOfGpuRegisters]; uint64_t* fprs_[kNumberOfFpuRegisters]; - // Hold values for sp and ra (return address) if they are not located within a stack frame. - uintptr_t sp_, ra_; + // Hold values for sp and t9 if they are not located within a stack frame. We use t9 for the + // PC (as ra is required to be valid for single-frame deopt and must not be clobbered). We + // also need the first argument for single-frame deopt. + uintptr_t sp_, t9_, arg0_; }; + } // namespace mips64 } // namespace art diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S index 1b50b2e246..68156ae7e3 100644 --- a/runtime/arch/mips64/quick_entrypoints_mips64.S +++ b/runtime/arch/mips64/quick_entrypoints_mips64.S @@ -431,7 +431,7 @@ ENTRY_NO_GP art_quick_do_long_jump ld $ra, 248($a0) ld $a0, 32($a0) move $v0, $zero # clear result registers v0 and v1 - jalr $zero, $ra # do long jump + jalr $zero, $t9 # do long jump (do not use ra, it must not be clobbered) move $v1, $zero END art_quick_do_long_jump @@ -1244,7 +1244,7 @@ END art_quick_set32_static .extern artSet64StaticFromCode ENTRY art_quick_set64_static SETUP_REFS_ONLY_CALLEE_SAVE_FRAME # save callee saves in case of GC - move $a2, $a1 # pass new_val + # a2 contains the new val ld $a1, FRAME_SIZE_REFS_ONLY_CALLEE_SAVE($sp) # pass referrer's Method* jal artSet64StaticFromCode # (field_idx, referrer, new_val, Thread*) move $a3, rSELF # pass Thread::Current diff --git a/runtime/arch/mips64/registers_mips64.h b/runtime/arch/mips64/registers_mips64.h index 38bc8f2687..1d07d47a7c 100644 --- a/runtime/arch/mips64/registers_mips64.h +++ b/runtime/arch/mips64/registers_mips64.h @@ -59,6 +59,8 @@ enum GpuRegister { SP = 29, // Stack pointer. S8 = 30, // Saved value/frame pointer. RA = 31, // Return address. + TR = S1, // ART Thread Register + TMP = T8, // scratch register (in addition to AT) kNumberOfGpuRegisters = 32, kNoGpuRegister = -1 // Signals an illegal register. }; diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc index f10799cc28..0d2457e3dd 100644 --- a/runtime/arch/stub_test.cc +++ b/runtime/arch/stub_test.cc @@ -774,22 +774,6 @@ class StubTest : public CommonRuntimeTest { return result; } - // Method with 32b arg0, 64b arg1 - size_t Invoke3UWithReferrer(size_t arg0, uint64_t arg1, uintptr_t code, Thread* self, - ArtMethod* referrer) { -#if (defined(__x86_64__) && !defined(__APPLE__)) || (defined(__mips__) && defined(__LP64__)) || \ - defined(__aarch64__) - // Just pass through. - return Invoke3WithReferrer(arg0, arg1, 0U, code, self, referrer); -#else - // Need to split up arguments. - uint32_t lower = static_cast<uint32_t>(arg1 & 0xFFFFFFFF); - uint32_t upper = static_cast<uint32_t>((arg1 >> 32) & 0xFFFFFFFF); - - return Invoke3WithReferrer(arg0, lower, upper, code, self, referrer); -#endif - } - static uintptr_t GetEntrypoint(Thread* self, QuickEntrypointEnum entrypoint) { int32_t offset; #ifdef __LP64__ @@ -1974,21 +1958,23 @@ static void GetSetObjInstance(Handle<mirror::Object>* obj, ArtField* f, } -// TODO: Complete these tests for 32b architectures. +// TODO: Complete these tests for 32b architectures static void GetSet64Static(ArtField* f, Thread* self, ArtMethod* referrer, StubTest* test) SHARED_REQUIRES(Locks::mutator_lock_) { -#if (defined(__x86_64__) && !defined(__APPLE__)) || (defined(__mips__) && defined(__LP64__)) || \ - defined(__aarch64__) +#if (defined(__x86_64__) && !defined(__APPLE__)) || (defined(__mips__) && defined(__LP64__)) \ + || defined(__aarch64__) uint64_t values[] = { 0, 1, 2, 255, 32768, 1000000, 0xFFFFFFFF, 0xFFFFFFFFFFFF }; for (size_t i = 0; i < arraysize(values); ++i) { - test->Invoke3UWithReferrer(static_cast<size_t>(f->GetDexFieldIndex()), - values[i], - StubTest::GetEntrypoint(self, kQuickSet64Static), - self, - referrer); + // 64 bit FieldSet stores the set value in the second register. + test->Invoke3WithReferrer(static_cast<size_t>(f->GetDexFieldIndex()), + 0U, + values[i], + StubTest::GetEntrypoint(self, kQuickSet64Static), + self, + referrer); size_t res = test->Invoke3WithReferrer(static_cast<size_t>(f->GetDexFieldIndex()), 0U, 0U, diff --git a/runtime/arch/x86/context_x86.cc b/runtime/arch/x86/context_x86.cc index 7096c82aad..987ad60fd8 100644 --- a/runtime/arch/x86/context_x86.cc +++ b/runtime/arch/x86/context_x86.cc @@ -29,9 +29,11 @@ void X86Context::Reset() { std::fill_n(gprs_, arraysize(gprs_), nullptr); std::fill_n(fprs_, arraysize(fprs_), nullptr); gprs_[ESP] = &esp_; + gprs_[EAX] = &arg0_; // Initialize registers with easy to spot debug values. esp_ = X86Context::kBadGprBase + ESP; eip_ = X86Context::kBadGprBase + kNumberOfCpuRegisters; + arg0_ = 0; } void X86Context::FillCalleeSaves(const StackVisitor& fr) { diff --git a/runtime/arch/x86/context_x86.h b/runtime/arch/x86/context_x86.h index c4a11d8a88..59beb12ffa 100644 --- a/runtime/arch/x86/context_x86.h +++ b/runtime/arch/x86/context_x86.h @@ -44,6 +44,10 @@ class X86Context : public Context { eip_ = new_pc; } + void SetArg0(uintptr_t new_arg0_value) OVERRIDE { + SetGPR(EAX, new_arg0_value); + } + bool IsAccessibleGPR(uint32_t reg) OVERRIDE { DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCpuRegisters)); return gprs_[reg] != nullptr; @@ -95,10 +99,10 @@ class X86Context : public Context { // Pointers to register locations. Values are initialized to null or the special registers below. uintptr_t* gprs_[kNumberOfCpuRegisters]; uint32_t* fprs_[kNumberOfFloatRegisters]; - // Hold values for esp and eip if they are not located within a stack frame. EIP is somewhat + // Hold values for esp, eip and arg0 if they are not located within a stack frame. EIP is somewhat // special in that it cannot be encoded normally as a register operand to an instruction (except // in 64bit addressing modes). - uintptr_t esp_, eip_; + uintptr_t esp_, eip_, arg0_; }; } // namespace x86 } // namespace art diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S index 029a296e5a..3afc4d545f 100644 --- a/runtime/arch/x86/quick_entrypoints_x86.S +++ b/runtime/arch/x86/quick_entrypoints_x86.S @@ -1434,15 +1434,18 @@ END_FUNCTION art_quick_set64_instance // Call artSet64StaticFromCode with 3 word size arguments plus with the referrer in the 2nd position // so that new_val is aligned on even registers were we passing arguments in registers. DEFINE_FUNCTION art_quick_set64_static + // TODO: Implement SETUP_GOT_NOSAVE for got_reg = ecx to avoid moving around the registers. + movd %ebx, %xmm0 SETUP_REFS_ONLY_CALLEE_SAVE_FRAME ebx, ebx // save ref containing registers for GC - mov FRAME_SIZE_REFS_ONLY_CALLEE_SAVE(%esp), %ebx // get referrer + movd %xmm0, %ebx + mov FRAME_SIZE_REFS_ONLY_CALLEE_SAVE(%esp), %ecx // get referrer subl LITERAL(12), %esp // alignment padding CFI_ADJUST_CFA_OFFSET(12) pushl %fs:THREAD_SELF_OFFSET // pass Thread::Current() CFI_ADJUST_CFA_OFFSET(4) - PUSH edx // pass high half of new_val - PUSH ecx // pass low half of new_val - PUSH ebx // pass referrer + PUSH ebx // pass high half of new_val + PUSH edx // pass low half of new_val + PUSH ecx // pass referrer PUSH eax // pass field_idx call SYMBOL(artSet64StaticFromCode) // (field_idx, referrer, new_val, Thread*) addl LITERAL(32), %esp // pop arguments @@ -1695,7 +1698,7 @@ END_FUNCTION art_quick_deoptimize /* * Compiled code has requested that we deoptimize into the interpreter. The deoptimization - * will long jump to the upcall with a special exception of -1. + * will long jump to the interpreter bridge. */ DEFINE_FUNCTION art_quick_deoptimize_from_compiled_code SETUP_SAVE_ALL_CALLEE_SAVE_FRAME ebx, ebx diff --git a/runtime/arch/x86_64/context_x86_64.cc b/runtime/arch/x86_64/context_x86_64.cc index 1fe2ef8fd8..3dc7d71df4 100644 --- a/runtime/arch/x86_64/context_x86_64.cc +++ b/runtime/arch/x86_64/context_x86_64.cc @@ -29,9 +29,11 @@ void X86_64Context::Reset() { std::fill_n(gprs_, arraysize(gprs_), nullptr); std::fill_n(fprs_, arraysize(fprs_), nullptr); gprs_[RSP] = &rsp_; + gprs_[RDI] = &arg0_; // Initialize registers with easy to spot debug values. rsp_ = X86_64Context::kBadGprBase + RSP; rip_ = X86_64Context::kBadGprBase + kNumberOfCpuRegisters; + arg0_ = 0; } void X86_64Context::FillCalleeSaves(const StackVisitor& fr) { diff --git a/runtime/arch/x86_64/context_x86_64.h b/runtime/arch/x86_64/context_x86_64.h index 30bb9ec362..f05b7f093f 100644 --- a/runtime/arch/x86_64/context_x86_64.h +++ b/runtime/arch/x86_64/context_x86_64.h @@ -44,6 +44,10 @@ class X86_64Context : public Context { rip_ = new_pc; } + void SetArg0(uintptr_t new_arg0_value) OVERRIDE { + SetGPR(RDI, new_arg0_value); + } + bool IsAccessibleGPR(uint32_t reg) OVERRIDE { DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCpuRegisters)); return gprs_[reg] != nullptr; @@ -82,10 +86,10 @@ class X86_64Context : public Context { // Pointers to register locations. Values are initialized to null or the special registers below. uintptr_t* gprs_[kNumberOfCpuRegisters]; uint64_t* fprs_[kNumberOfFloatRegisters]; - // Hold values for rsp and rip if they are not located within a stack frame. RIP is somewhat + // Hold values for rsp, rip and arg0 if they are not located within a stack frame. RIP is somewhat // special in that it cannot be encoded normally as a register operand to an instruction (except // in 64bit addressing modes). - uintptr_t rsp_, rip_; + uintptr_t rsp_, rip_, arg0_; }; } // namespace x86_64 } // namespace art diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S index 861f8025a5..1133203e31 100644 --- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S @@ -1383,7 +1383,7 @@ ONE_ARG_REF_DOWNCALL art_quick_get_obj_static, artGetObjStaticFromCode, RETURN_O // This is singled out as the argument order is different. DEFINE_FUNCTION art_quick_set64_static - movq %rsi, %rdx // pass new_val + // new_val is already in %rdx movq 8(%rsp), %rsi // pass referrer SETUP_REFS_ONLY_CALLEE_SAVE_FRAME // field_idx is in rdi @@ -1724,18 +1724,18 @@ END_FUNCTION art_quick_instrumentation_exit * will long jump to the upcall with a special exception of -1. */ DEFINE_FUNCTION art_quick_deoptimize - pushq %rsi // Entry point for a jump. Fake that we were called. - // Use hidden arg. + pushq %rsi // Entry point for a jump. Fake that we were called. + // Use hidden arg. SETUP_SAVE_ALL_CALLEE_SAVE_FRAME - // Stack should be aligned now. - movq %gs:THREAD_SELF_OFFSET, %rdi // Pass Thread. - call SYMBOL(artDeoptimize) // artDeoptimize(Thread*) + // Stack should be aligned now. + movq %gs:THREAD_SELF_OFFSET, %rdi // Pass Thread. + call SYMBOL(artDeoptimize) // artDeoptimize(Thread*) UNREACHABLE END_FUNCTION art_quick_deoptimize /* * Compiled code has requested that we deoptimize into the interpreter. The deoptimization - * will long jump to the upcall with a special exception of -1. + * will long jump to the interpreter bridge. */ DEFINE_FUNCTION art_quick_deoptimize_from_compiled_code SETUP_SAVE_ALL_CALLEE_SAVE_FRAME diff --git a/runtime/art_method-inl.h b/runtime/art_method-inl.h index d6b2b7e04d..632a50f15c 100644 --- a/runtime/art_method-inl.h +++ b/runtime/art_method-inl.h @@ -35,6 +35,8 @@ #include "quick/quick_method_frame_info.h" #include "read_barrier-inl.h" #include "runtime-inl.h" +#include "scoped_thread_state_change.h" +#include "thread-inl.h" #include "utils.h" namespace art { @@ -75,9 +77,28 @@ inline bool ArtMethod::CASDeclaringClass(mirror::Class* expected_class, expected_root, desired_root); } +// AssertSharedHeld doesn't work in GetAccessFlags, so use a NO_THREAD_SAFETY_ANALYSIS helper. +// TODO: Figure out why ASSERT_SHARED_CAPABILITY doesn't work. +ALWAYS_INLINE +static inline void DoGetAccessFlagsHelper(ArtMethod* method) NO_THREAD_SAFETY_ANALYSIS { + CHECK(method->IsRuntimeMethod() || method->GetDeclaringClass()->IsIdxLoaded() || + method->GetDeclaringClass()->IsErroneous()); +} + inline uint32_t ArtMethod::GetAccessFlags() { - DCHECK(IsRuntimeMethod() || GetDeclaringClass()->IsIdxLoaded() || - GetDeclaringClass()->IsErroneous()); + if (kIsDebugBuild) { + Thread* self = Thread::Current(); + if (!Locks::mutator_lock_->IsSharedHeld(self)) { + ScopedObjectAccess soa(self); + CHECK(IsRuntimeMethod() || GetDeclaringClass()->IsIdxLoaded() || + GetDeclaringClass()->IsErroneous()); + } else { + // We cannot use SOA in this case. We might be holding the lock, but may not be in the + // runnable state (e.g., during GC). + Locks::mutator_lock_->AssertSharedHeld(self); + DoGetAccessFlagsHelper(this); + } + } return access_flags_; } diff --git a/runtime/art_method.h b/runtime/art_method.h index f78c8274b0..0315c3a953 100644 --- a/runtime/art_method.h +++ b/runtime/art_method.h @@ -75,7 +75,9 @@ class ArtMethod FINAL { return MemberOffset(OFFSETOF_MEMBER(ArtMethod, declaring_class_)); } - ALWAYS_INLINE uint32_t GetAccessFlags() SHARED_REQUIRES(Locks::mutator_lock_); + // Note: GetAccessFlags acquires the mutator lock in debug mode to check that it is not called for + // a proxy method. + ALWAYS_INLINE uint32_t GetAccessFlags(); void SetAccessFlags(uint32_t new_access_flags) { // Not called within a transaction. @@ -86,77 +88,78 @@ class ArtMethod FINAL { InvokeType GetInvokeType() SHARED_REQUIRES(Locks::mutator_lock_); // Returns true if the method is declared public. - bool IsPublic() SHARED_REQUIRES(Locks::mutator_lock_) { + bool IsPublic() { return (GetAccessFlags() & kAccPublic) != 0; } // Returns true if the method is declared private. - bool IsPrivate() SHARED_REQUIRES(Locks::mutator_lock_) { + bool IsPrivate() { return (GetAccessFlags() & kAccPrivate) != 0; } // Returns true if the method is declared static. - bool IsStatic() SHARED_REQUIRES(Locks::mutator_lock_) { + bool IsStatic() { return (GetAccessFlags() & kAccStatic) != 0; } // Returns true if the method is a constructor. - bool IsConstructor() SHARED_REQUIRES(Locks::mutator_lock_) { + bool IsConstructor() { return (GetAccessFlags() & kAccConstructor) != 0; } // Returns true if the method is a class initializer. - bool IsClassInitializer() SHARED_REQUIRES(Locks::mutator_lock_) { + bool IsClassInitializer() { return IsConstructor() && IsStatic(); } // Returns true if the method is static, private, or a constructor. - bool IsDirect() SHARED_REQUIRES(Locks::mutator_lock_) { + bool IsDirect() { return IsDirect(GetAccessFlags()); } static bool IsDirect(uint32_t access_flags) { - return (access_flags & (kAccStatic | kAccPrivate | kAccConstructor)) != 0; + constexpr uint32_t direct = kAccStatic | kAccPrivate | kAccConstructor; + return (access_flags & direct) != 0; } // Returns true if the method is declared synchronized. - bool IsSynchronized() SHARED_REQUIRES(Locks::mutator_lock_) { - uint32_t synchonized = kAccSynchronized | kAccDeclaredSynchronized; + bool IsSynchronized() { + constexpr uint32_t synchonized = kAccSynchronized | kAccDeclaredSynchronized; return (GetAccessFlags() & synchonized) != 0; } - bool IsFinal() SHARED_REQUIRES(Locks::mutator_lock_) { + bool IsFinal() { return (GetAccessFlags() & kAccFinal) != 0; } - bool IsMiranda() SHARED_REQUIRES(Locks::mutator_lock_) { + bool IsMiranda() { return (GetAccessFlags() & kAccMiranda) != 0; } - bool IsNative() SHARED_REQUIRES(Locks::mutator_lock_) { + bool IsNative() { return (GetAccessFlags() & kAccNative) != 0; } - bool IsFastNative() SHARED_REQUIRES(Locks::mutator_lock_) { - uint32_t mask = kAccFastNative | kAccNative; + bool IsFastNative() { + constexpr uint32_t mask = kAccFastNative | kAccNative; return (GetAccessFlags() & mask) == mask; } - bool IsAbstract() SHARED_REQUIRES(Locks::mutator_lock_) { + bool IsAbstract() { return (GetAccessFlags() & kAccAbstract) != 0; } - bool IsSynthetic() SHARED_REQUIRES(Locks::mutator_lock_) { + bool IsSynthetic() { return (GetAccessFlags() & kAccSynthetic) != 0; } bool IsProxyMethod() SHARED_REQUIRES(Locks::mutator_lock_); - bool IsPreverified() SHARED_REQUIRES(Locks::mutator_lock_) { + bool IsPreverified() { return (GetAccessFlags() & kAccPreverified) != 0; } - void SetPreverified() SHARED_REQUIRES(Locks::mutator_lock_) { + void SetPreverified() { DCHECK(!IsPreverified()); SetAccessFlags(GetAccessFlags() | kAccPreverified); } @@ -404,7 +407,7 @@ class ArtMethod FINAL { return GetNativePointer<void*>(EntryPointFromJniOffset(pointer_size), pointer_size); } - void SetEntryPointFromJni(const void* entrypoint) SHARED_REQUIRES(Locks::mutator_lock_) { + void SetEntryPointFromJni(const void* entrypoint) { DCHECK(IsNative()); SetEntryPointFromJniPtrSize(entrypoint, sizeof(void*)); } diff --git a/runtime/base/arena_allocator.cc b/runtime/base/arena_allocator.cc index e07520cfa7..6f2aa46816 100644 --- a/runtime/base/arena_allocator.cc +++ b/runtime/base/arena_allocator.cc @@ -54,6 +54,7 @@ const char* const ArenaAllocatorStatsImpl<kCount>::kAllocNames[] = { "RegAlloc ", "Data ", "STL ", + "GraphBuilder ", "Graph ", "BasicBlock ", "BlockList ", @@ -73,12 +74,25 @@ const char* const ArenaAllocatorStatsImpl<kCount>::kAllocNames[] = { "Environment ", "EnvVRegs ", "EnvLocations ", + "LocSummary ", "SsaBuilder ", "MoveOperands ", "CodeBuffer ", "StackMaps ", "BaselineMaps ", "Optimization ", + "GVN ", + "InductionVar ", + "BCE ", + "SsaLiveness ", + "SsaPhiElim ", + "RefTypeProp ", + "PrimTypeProp ", + "SideEffects ", + "RegAllocator ", + "StackMapStm ", + "CodeGen ", + "ParallelMove ", }; template <bool kCount> diff --git a/runtime/base/arena_allocator.h b/runtime/base/arena_allocator.h index f1cc5b1bf1..565b416b38 100644 --- a/runtime/base/arena_allocator.h +++ b/runtime/base/arena_allocator.h @@ -66,6 +66,7 @@ enum ArenaAllocKind { kArenaAllocRegAlloc, kArenaAllocData, kArenaAllocSTL, + kArenaAllocGraphBuilder, kArenaAllocGraph, kArenaAllocBasicBlock, kArenaAllocBlockList, @@ -85,12 +86,25 @@ enum ArenaAllocKind { kArenaAllocEnvironment, kArenaAllocEnvironmentVRegs, kArenaAllocEnvironmentLocations, + kArenaAllocLocationSummary, kArenaAllocSsaBuilder, kArenaAllocMoveOperands, kArenaAllocCodeBuffer, kArenaAllocStackMaps, kArenaAllocBaselineMaps, kArenaAllocOptimization, + kArenaAllocGvn, + kArenaAllocInductionVarAnalysis, + kArenaAllocBoundsCheckElimination, + kArenaAllocSsaLiveness, + kArenaAllocSsaPhiElimination, + kArenaAllocReferenceTypePropagation, + kArenaAllocPrimitiveTypePropagation, + kArenaAllocSideEffectsAnalysis, + kArenaAllocRegisterAllocator, + kArenaAllocStackMapStream, + kArenaAllocCodeGenerator, + kArenaAllocParallelMoveResolver, kNumArenaAllocKinds }; diff --git a/runtime/base/arena_object.h b/runtime/base/arena_object.h index ab97d0cb66..56e35d8751 100644 --- a/runtime/base/arena_object.h +++ b/runtime/base/arena_object.h @@ -40,6 +40,10 @@ class ArenaObject { LOG(FATAL) << "UNREACHABLE"; UNREACHABLE(); } + + // NOTE: Providing placement new (and matching delete) for constructing container elements. + ALWAYS_INLINE void* operator new(size_t, void* ptr) noexcept { return ptr; } + ALWAYS_INLINE void operator delete(void*, void*) noexcept { } }; diff --git a/runtime/base/bit_utils.h b/runtime/base/bit_utils.h index 1b0d774419..9c78ee59dd 100644 --- a/runtime/base/bit_utils.h +++ b/runtime/base/bit_utils.h @@ -263,6 +263,33 @@ static constexpr bool IsAbsoluteUint(T value) { : static_cast<unsigned_type>(value)); } +// Generate maximum/minimum values for signed/unsigned n-bit integers +template <typename T> +static constexpr T MaxInt(size_t bits) { + return + DCHECK_CONSTEXPR(bits > 0, "bits cannot be zero", 0) + DCHECK_CONSTEXPR(bits <= BitSizeOf<T>(), "kBits must be < max.", 0) + bits == BitSizeOf<T>() + ? std::numeric_limits<T>::max() + : std::is_signed<T>::value + ? (bits == 1 + ? 0 + : static_cast<T>(MaxInt<typename std::make_unsigned<T>::type>(bits - 1))) + : static_cast<T>(UINT64_C(1) << bits) - static_cast<T>(1); +} + +template <typename T> +static constexpr T MinInt(size_t bits) { + return + DCHECK_CONSTEXPR(bits > 0, "bits cannot be zero", 0) + DCHECK_CONSTEXPR(bits <= BitSizeOf<T>(), "kBits must be < max.", 0) + bits == BitSizeOf<T>() + ? std::numeric_limits<T>::min() + : std::is_signed<T>::value + ? (bits == 1 ? -1 : static_cast<T>(-1) - MaxInt<T>(bits)) + : static_cast<T>(0); +} + // Using the Curiously Recurring Template Pattern to implement everything shared // by LowToHighBitIterator and HighToLowBitIterator, i.e. everything but operator*(). template <typename T, typename Iter> diff --git a/runtime/check_jni.cc b/runtime/check_jni.cc index b6ad5473ff..beabce36fb 100644 --- a/runtime/check_jni.cc +++ b/runtime/check_jni.cc @@ -2463,6 +2463,9 @@ class CheckJNI { ScopedCheck sc(kFlag_Default, __FUNCTION__); JniValueType args[2] = {{.E = env}, {.L = obj}}; if (sc.Check(soa, true, "EL", args)) { + if (obj != nullptr) { + down_cast<JNIEnvExt*>(env)->RecordMonitorEnter(obj); + } JniValueType result; result.i = baseEnv(env)->MonitorEnter(env, obj); if (sc.Check(soa, false, "i", &result)) { @@ -2477,6 +2480,9 @@ class CheckJNI { ScopedCheck sc(kFlag_ExcepOkay, __FUNCTION__); JniValueType args[2] = {{.E = env}, {.L = obj}}; if (sc.Check(soa, true, "EL", args)) { + if (obj != nullptr) { + down_cast<JNIEnvExt*>(env)->CheckMonitorRelease(obj); + } JniValueType result; result.i = baseEnv(env)->MonitorExit(env, obj); if (sc.Check(soa, false, "i", &result)) { diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc index 6b9c8aa353..acb39c5402 100644 --- a/runtime/class_linker.cc +++ b/runtime/class_linker.cc @@ -79,6 +79,7 @@ #include "scoped_thread_state_change.h" #include "handle_scope-inl.h" #include "thread-inl.h" +#include "trace.h" #include "utils.h" #include "utils/dex_cache_arrays_layout-inl.h" #include "verifier/method_verifier.h" @@ -128,7 +129,11 @@ void ClassLinker::ThrowEarlierClassFailure(mirror::Class* c) { // the previous error. Runtime* const runtime = Runtime::Current(); if (!runtime->IsAotCompiler()) { // Give info if this occurs at runtime. - LOG(INFO) << "Rejecting re-init on previously-failed class " << PrettyClass(c); + std::string extra; + if (c->GetVerifyErrorClass() != nullptr) { + extra = PrettyDescriptor(c->GetVerifyErrorClass()); + } + LOG(INFO) << "Rejecting re-init on previously-failed class " << PrettyClass(c) << ": " << extra; } CHECK(c->IsErroneous()) << PrettyClass(c) << " " << c->GetStatus(); @@ -1295,6 +1300,9 @@ bool ClassLinker::ClassInClassTable(mirror::Class* klass) { } void ClassLinker::VisitClassRoots(RootVisitor* visitor, VisitRootFlags flags) { + // Acquire tracing_enabled before locking class linker lock to prevent lock order violation. Since + // enabling tracing requires the mutator lock, there are no race conditions here. + const bool tracing_enabled = Trace::IsTracingEnabled(); Thread* const self = Thread::Current(); WriterMutexLock mu(self, *Locks::classlinker_classes_lock_); BufferedRootVisitor<kDefaultBufferedRootCount> buffered_visitor( @@ -1316,12 +1324,13 @@ void ClassLinker::VisitClassRoots(RootVisitor* visitor, VisitRootFlags flags) { // Need to make sure to not copy ArtMethods without doing read barriers since the roots are // marked concurrently and we don't hold the classlinker_classes_lock_ when we do the copy. boot_class_table_.VisitRoots(buffered_visitor); - // TODO: Avoid marking these to enable class unloading. - JavaVMExt* const vm = Runtime::Current()->GetJavaVM(); - for (const ClassLoaderData& data : class_loaders_) { - mirror::Object* class_loader = vm->DecodeWeakGlobal(self, data.weak_root); - // Don't need to update anything since the class loaders will be updated by SweepSystemWeaks. - visitor->VisitRootIfNonNull(&class_loader, RootInfo(kRootVMInternal)); + + // If tracing is enabled, then mark all the class loaders to prevent unloading. + if (tracing_enabled) { + for (const ClassLoaderData& data : class_loaders_) { + GcRoot<mirror::Object> root(GcRoot<mirror::Object>(self->DecodeJObject(data.weak_root))); + root.VisitRoot(visitor, RootInfo(kRootVMInternal)); + } } } else if ((flags & kVisitRootFlagNewRoots) != 0) { for (auto& root : new_class_roots_) { @@ -1503,7 +1512,7 @@ ClassLinker::~ClassLinker() { Thread* const self = Thread::Current(); JavaVMExt* const vm = Runtime::Current()->GetJavaVM(); for (const ClassLoaderData& data : class_loaders_) { - vm->DecodeWeakGlobalDuringShutdown(self, data.weak_root); + vm->DeleteWeakGlobalRef(self, data.weak_root); delete data.allocator; delete data.class_table; } @@ -2653,10 +2662,8 @@ mirror::DexCache* ClassLinker::FindDexCacheLocked(Thread* self, const DexFile& dex_file, bool allow_failure) { // Search assuming unique-ness of dex file. - JavaVMExt* const vm = self->GetJniEnv()->vm; for (jweak weak_root : dex_caches_) { - mirror::DexCache* dex_cache = down_cast<mirror::DexCache*>( - vm->DecodeWeakGlobal(self, weak_root)); + mirror::DexCache* dex_cache = down_cast<mirror::DexCache*>(self->DecodeJObject(weak_root)); if (dex_cache != nullptr && dex_cache->GetDexFile() == &dex_file) { return dex_cache; } @@ -4189,6 +4196,8 @@ ClassTable* ClassLinker::InsertClassTableForClassLoader(mirror::ClassLoader* cla data.allocator = Runtime::Current()->CreateLinearAlloc(); class_loaders_.push_back(data); // Don't already have a class table, add it to the class loader. + CHECK(class_loader->GetClassTable() == nullptr); + CHECK(class_loader->GetAllocator() == nullptr); class_loader->SetClassTable(data.class_table); class_loader->SetAllocator(data.allocator); } @@ -4266,6 +4275,11 @@ bool ClassLinker::LinkClass(Thread* self, ClassTable* const table = InsertClassTableForClassLoader(class_loader); mirror::Class* existing = table->UpdateClass(descriptor, h_new_class.Get(), ComputeModifiedUtf8Hash(descriptor)); + if (class_loader != nullptr) { + // We updated the class in the class table, perform the write barrier so that the GC knows + // about the change. + Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(class_loader); + } CHECK_EQ(existing, klass.Get()); if (kIsDebugBuild && class_loader == nullptr && dex_cache_image_class_lookup_required_) { // Check a class loaded with the system class loader matches one in the image if the class @@ -6198,10 +6212,9 @@ void ClassLinker::DropFindArrayClassCache() { void ClassLinker::VisitClassLoaders(ClassLoaderVisitor* visitor) const { Thread* const self = Thread::Current(); - JavaVMExt* const vm = self->GetJniEnv()->vm; for (const ClassLoaderData& data : class_loaders_) { - auto* const class_loader = down_cast<mirror::ClassLoader*>( - vm->DecodeWeakGlobal(self, data.weak_root)); + // Need to use DecodeJObject so that we get null for cleared JNI weak globals. + auto* const class_loader = down_cast<mirror::ClassLoader*>(self->DecodeJObject(data.weak_root)); if (class_loader != nullptr) { visitor->Visit(class_loader); } @@ -6214,8 +6227,8 @@ void ClassLinker::CleanupClassLoaders() { JavaVMExt* const vm = Runtime::Current()->GetJavaVM(); for (auto it = class_loaders_.begin(); it != class_loaders_.end(); ) { const ClassLoaderData& data = *it; - auto* const class_loader = down_cast<mirror::ClassLoader*>( - vm->DecodeWeakGlobal(self, data.weak_root)); + // Need to use DecodeJObject so that we get null for cleared JNI weak globals. + auto* const class_loader = down_cast<mirror::ClassLoader*>(self->DecodeJObject(data.weak_root)); if (class_loader != nullptr) { ++it; } else { diff --git a/runtime/class_linker.h b/runtime/class_linker.h index f705330b14..7f3e93806e 100644 --- a/runtime/class_linker.h +++ b/runtime/class_linker.h @@ -560,7 +560,7 @@ class ClassLinker { private: struct ClassLoaderData { - jobject weak_root; // Weak root to enable class unloading. + jweak weak_root; // Weak root to enable class unloading. ClassTable* class_table; LinearAlloc* allocator; }; @@ -883,6 +883,7 @@ class ClassLinker { friend class ImageWriter; // for GetClassRoots friend class ImageDumper; // for FindOpenedOatFileFromOatLocation friend class JniCompilerTest; // for GetRuntimeQuickGenericJniStub + friend class JniInternalTest; // for GetRuntimeQuickGenericJniStub ART_FRIEND_TEST(mirror::DexCacheTest, Open); // for AllocDexCache DISALLOW_COPY_AND_ASSIGN(ClassLinker); diff --git a/runtime/debugger.cc b/runtime/debugger.cc index 450031abd5..b19381d879 100644 --- a/runtime/debugger.cc +++ b/runtime/debugger.cc @@ -30,6 +30,7 @@ #include "dex_instruction.h" #include "gc/accounting/card_table-inl.h" #include "gc/allocation_record.h" +#include "gc/scoped_gc_critical_section.h" #include "gc/space/large_object_space.h" #include "gc/space/space-inl.h" #include "handle_scope.h" @@ -559,14 +560,15 @@ void Dbg::GoActive() { return; } + Thread* const self = Thread::Current(); { // TODO: dalvik only warned if there were breakpoints left over. clear in Dbg::Disconnected? - ReaderMutexLock mu(Thread::Current(), *Locks::breakpoint_lock_); + ReaderMutexLock mu(self, *Locks::breakpoint_lock_); CHECK_EQ(gBreakpoints.size(), 0U); } { - MutexLock mu(Thread::Current(), *Locks::deoptimization_lock_); + MutexLock mu(self, *Locks::deoptimization_lock_); CHECK_EQ(deoptimization_requests_.size(), 0U); CHECK_EQ(full_deoptimization_event_count_, 0U); CHECK_EQ(dex_pc_change_event_ref_count_, 0U); @@ -579,15 +581,11 @@ void Dbg::GoActive() { Runtime* runtime = Runtime::Current(); ScopedSuspendAll ssa(__FUNCTION__); - Thread* self = Thread::Current(); - ThreadState old_state = self->SetStateUnsafe(kRunnable); - CHECK_NE(old_state, kRunnable); if (RequiresDeoptimization()) { runtime->GetInstrumentation()->EnableDeoptimization(); } instrumentation_events_ = 0; gDebuggerActive = true; - CHECK_EQ(self->SetStateUnsafe(old_state), kRunnable); LOG(INFO) << "Debugger is active"; } @@ -602,6 +600,10 @@ void Dbg::Disconnected() { Runtime* runtime = Runtime::Current(); Thread* self = Thread::Current(); { + // Required for DisableDeoptimization. + gc::ScopedGCCriticalSection gcs(self, + gc::kGcCauseInstrumentation, + gc::kCollectorTypeInstrumentation); ScopedSuspendAll ssa(__FUNCTION__); ThreadState old_state = self->SetStateUnsafe(kRunnable); // Debugger may not be active at this point. @@ -3166,6 +3168,10 @@ void Dbg::ManageDeoptimization() { } CHECK_EQ(self->GetState(), kRunnable); ScopedThreadSuspension sts(self, kWaitingForDeoptimization); + // Required for ProcessDeoptimizationRequest. + gc::ScopedGCCriticalSection gcs(self, + gc::kGcCauseInstrumentation, + gc::kCollectorTypeInstrumentation); // We need to suspend mutator threads first. ScopedSuspendAll ssa(__FUNCTION__); const ThreadState old_state = self->SetStateUnsafe(kRunnable); diff --git a/runtime/debugger.h b/runtime/debugger.h index b4d42de2bd..b3617e4bbb 100644 --- a/runtime/debugger.h +++ b/runtime/debugger.h @@ -731,7 +731,7 @@ class Dbg { SHARED_REQUIRES(Locks::mutator_lock_); static void ProcessDeoptimizationRequest(const DeoptimizationRequest& request) - REQUIRES(Locks::mutator_lock_); + REQUIRES(Locks::mutator_lock_, Roles::uninterruptible_); static void RequestDeoptimizationLocked(const DeoptimizationRequest& req) REQUIRES(Locks::deoptimization_lock_) SHARED_REQUIRES(Locks::mutator_lock_); diff --git a/runtime/dex_instruction-inl.h b/runtime/dex_instruction-inl.h index 7344d13805..e160a103d9 100644 --- a/runtime/dex_instruction-inl.h +++ b/runtime/dex_instruction-inl.h @@ -454,8 +454,8 @@ inline bool Instruction::HasVarArgs25x() const { return FormatOf(Opcode()) == k25x; } -// Copies all of the parameter registers into the arg array. Check the length with VRegB_25x()+1. -inline void Instruction::GetAllArgs25x(uint32_t arg[kMaxVarArgRegs]) const { +// Copies all of the parameter registers into the arg array. Check the length with VRegB_25x()+2. +inline void Instruction::GetAllArgs25x(uint32_t (&arg)[kMaxVarArgRegs25x]) const { DCHECK_EQ(FormatOf(Opcode()), k25x); /* @@ -500,19 +500,21 @@ inline void Instruction::GetAllArgs25x(uint32_t arg[kMaxVarArgRegs]) const { */ switch (count) { case 4: - arg[4] = (Fetch16(0) >> 8) & 0x0f; // vG + arg[5] = (Fetch16(0) >> 8) & 0x0f; // vG FALLTHROUGH_INTENDED; case 3: - arg[3] = (reg_list >> 12) & 0x0f; // vF + arg[4] = (reg_list >> 12) & 0x0f; // vF FALLTHROUGH_INTENDED; case 2: - arg[2] = (reg_list >> 8) & 0x0f; // vE + arg[3] = (reg_list >> 8) & 0x0f; // vE FALLTHROUGH_INTENDED; case 1: - arg[1] = (reg_list >> 4) & 0x0f; // vD + arg[2] = (reg_list >> 4) & 0x0f; // vD FALLTHROUGH_INTENDED; default: // case 0 + // The required lambda 'this' is actually a pair, but the pair is implicit. arg[0] = VRegC_25x(); // vC + arg[1] = arg[0] + 1; // vC + 1 break; } } diff --git a/runtime/dex_instruction.cc b/runtime/dex_instruction.cc index fc4df1475a..5250b0d79b 100644 --- a/runtime/dex_instruction.cc +++ b/runtime/dex_instruction.cc @@ -322,10 +322,10 @@ std::string Instruction::DumpString(const DexFile* file) const { } case k25x: { if (Opcode() == INVOKE_LAMBDA) { - uint32_t arg[kMaxVarArgRegs]; + uint32_t arg[kMaxVarArgRegs25x]; GetAllArgs25x(arg); const size_t num_extra_var_args = VRegB_25x(); - DCHECK_LE(num_extra_var_args + 1, kMaxVarArgRegs); + DCHECK_LE(num_extra_var_args + 2, arraysize(arg)); // invoke-lambda vC, {vD, vE, vF, vG} os << opcode << " v" << arg[0] << ", {"; @@ -333,7 +333,7 @@ std::string Instruction::DumpString(const DexFile* file) const { if (i != 0) { os << ", "; } - os << "v" << arg[i+1]; + os << "v" << arg[i+2]; // Don't print the pair of vC registers. Pair is implicit. } os << "}"; break; diff --git a/runtime/dex_instruction.h b/runtime/dex_instruction.h index df2d3799ab..48a12e53af 100644 --- a/runtime/dex_instruction.h +++ b/runtime/dex_instruction.h @@ -180,9 +180,11 @@ class Instruction { kVerifyVarArgRangeNonZero = 0x100000, kVerifyRuntimeOnly = 0x200000, kVerifyError = 0x400000, + kVerifyRegCString = 0x800000, }; static constexpr uint32_t kMaxVarArgRegs = 5; + static constexpr uint32_t kMaxVarArgRegs25x = 6; // lambdas are 2 registers. // Returns the size (in 2 byte code units) of this instruction. size_t SizeInCodeUnits() const { @@ -408,7 +410,7 @@ class Instruction { void GetVarArgs(uint32_t args[kMaxVarArgRegs]) const { return GetVarArgs(args, Fetch16(0)); } - void GetAllArgs25x(uint32_t args[kMaxVarArgRegs]) const; + void GetAllArgs25x(uint32_t (&args)[kMaxVarArgRegs25x]) const; // Returns the opcode field of the instruction. The given "inst_data" parameter must be the first // 16 bits of instruction. @@ -536,7 +538,7 @@ class Instruction { int GetVerifyTypeArgumentC() const { return (kInstructionVerifyFlags[Opcode()] & (kVerifyRegC | kVerifyRegCField | - kVerifyRegCNewArray | kVerifyRegCType | kVerifyRegCWide)); + kVerifyRegCNewArray | kVerifyRegCType | kVerifyRegCWide | kVerifyRegCString)); } int GetVerifyExtraFlags() const { diff --git a/runtime/dex_instruction_list.h b/runtime/dex_instruction_list.h index a176772a84..9d7e0c4409 100644 --- a/runtime/dex_instruction_list.h +++ b/runtime/dex_instruction_list.h @@ -263,10 +263,10 @@ V(0xF2, IGET_SHORT_QUICK, "iget-short-quick", k22c, true, kIndexFieldOffset, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \ V(0xF3, INVOKE_LAMBDA, "invoke-lambda", k25x, false, kIndexNone, kContinue | kThrow | kInvoke | kExperimental, kVerifyRegC /*TODO: | kVerifyVarArg*/) \ V(0xF4, UNUSED_F4, "unused-f4", k10x, false, kIndexUnknown, 0, kVerifyError) \ - V(0xF5, UNUSED_F5, "unused-f5", k10x, false, kIndexUnknown, 0, kVerifyError) \ + V(0xF5, CAPTURE_VARIABLE, "capture-variable", k21c, false, kIndexStringRef, kExperimental, kVerifyRegA | kVerifyRegBString) \ /* TODO(iam): get rid of the unused 'false' column */ \ V(0xF6, CREATE_LAMBDA, "create-lambda", k21c, false_UNUSED, kIndexMethodRef, kContinue | kThrow | kExperimental, kVerifyRegA | kVerifyRegBMethod) \ - V(0xF7, UNUSED_F7, "unused-f7", k10x, false, kIndexUnknown, 0, kVerifyError) \ + V(0xF7, LIBERATE_VARIABLE, "liberate-variable", k22c, false, kIndexStringRef, kExperimental, kVerifyRegA | kVerifyRegB | kVerifyRegCString) \ V(0xF8, BOX_LAMBDA, "box-lambda", k22x, true, kIndexNone, kContinue | kExperimental, kVerifyRegA | kVerifyRegB) \ V(0xF9, UNBOX_LAMBDA, "unbox-lambda", k22c, true, kIndexTypeRef, kContinue | kThrow | kExperimental, kVerifyRegA | kVerifyRegB | kVerifyRegCType) \ V(0xFA, UNUSED_FA, "unused-fa", k10x, false, kIndexUnknown, 0, kVerifyError) \ diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h index cc3eefed34..f66628d7cb 100644 --- a/runtime/entrypoints/entrypoint_utils-inl.h +++ b/runtime/entrypoints/entrypoint_utils-inl.h @@ -71,44 +71,6 @@ inline ArtMethod* GetResolvedMethod(ArtMethod* outer_method, *outer_method->GetDexFile(), method_index, dex_cache, class_loader, nullptr, invoke_type); } -inline ArtMethod* GetCalleeSaveMethodCaller(ArtMethod** sp, - Runtime::CalleeSaveType type, - bool do_caller_check = false) - SHARED_REQUIRES(Locks::mutator_lock_) { - DCHECK_EQ(*sp, Runtime::Current()->GetCalleeSaveMethod(type)); - - const size_t callee_frame_size = GetCalleeSaveFrameSize(kRuntimeISA, type); - auto** caller_sp = reinterpret_cast<ArtMethod**>( - reinterpret_cast<uintptr_t>(sp) + callee_frame_size); - ArtMethod* outer_method = *caller_sp; - ArtMethod* caller = outer_method; - - if ((outer_method != nullptr) && outer_method->IsOptimized(sizeof(void*))) { - const size_t callee_return_pc_offset = GetCalleeSaveReturnPcOffset(kRuntimeISA, type); - uintptr_t caller_pc = *reinterpret_cast<uintptr_t*>( - (reinterpret_cast<uint8_t*>(sp) + callee_return_pc_offset)); - uintptr_t native_pc_offset = outer_method->NativeQuickPcOffset(caller_pc); - CodeInfo code_info = outer_method->GetOptimizedCodeInfo(); - StackMapEncoding encoding = code_info.ExtractEncoding(); - StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding); - DCHECK(stack_map.IsValid()); - if (stack_map.HasInlineInfo(encoding)) { - InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding); - caller = GetResolvedMethod(outer_method, inline_info, inline_info.GetDepth() - 1); - } - } - - if (kIsDebugBuild && do_caller_check) { - // Note that do_caller_check is optional, as this method can be called by - // stubs, and tests without a proper call stack. - NthCallerVisitor visitor(Thread::Current(), 1, true); - visitor.WalkStack(); - CHECK_EQ(caller, visitor.caller); - } - - return caller; -} - inline ArtMethod* GetCalleeSaveMethodCaller(Thread* self, Runtime::CalleeSaveType type) SHARED_REQUIRES(Locks::mutator_lock_) { return GetCalleeSaveMethodCaller( @@ -449,12 +411,6 @@ inline ArtMethod* FindMethodFromCode(uint32_t method_idx, mirror::Object** this_ ThrowNullPointerExceptionForMethodAccess(method_idx, type); return nullptr; // Failure. } else if (access_check) { - // Incompatible class change should have been handled in resolve method. - if (UNLIKELY(resolved_method->CheckIncompatibleClassChange(type))) { - ThrowIncompatibleClassChangeError(type, resolved_method->GetInvokeType(), resolved_method, - referrer); - return nullptr; // Failure. - } mirror::Class* methods_class = resolved_method->GetDeclaringClass(); mirror::Class* referring_class = referrer->GetDeclaringClass(); bool can_access_resolved_method = @@ -464,6 +420,12 @@ inline ArtMethod* FindMethodFromCode(uint32_t method_idx, mirror::Object** this_ DCHECK(self->IsExceptionPending()); // Throw exception and unwind. return nullptr; // Failure. } + // Incompatible class change should have been handled in resolve method. + if (UNLIKELY(resolved_method->CheckIncompatibleClassChange(type))) { + ThrowIncompatibleClassChangeError(type, resolved_method->GetInvokeType(), resolved_method, + referrer); + return nullptr; // Failure. + } } switch (type) { case kStatic: diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc index 94aced27ed..f1939993f7 100644 --- a/runtime/entrypoints/entrypoint_utils.cc +++ b/runtime/entrypoints/entrypoint_utils.cc @@ -21,11 +21,15 @@ #include "base/mutex.h" #include "class_linker-inl.h" #include "dex_file-inl.h" +#include "entrypoints/entrypoint_utils-inl.h" +#include "entrypoints/quick/callee_save_frame.h" +#include "entrypoints/runtime_asm_entrypoints.h" #include "gc/accounting/card_table-inl.h" #include "mirror/class-inl.h" #include "mirror/method.h" #include "mirror/object-inl.h" #include "mirror/object_array-inl.h" +#include "nth_caller_visitor.h" #include "reflection.h" #include "scoped_thread_state_change.h" #include "ScopedLocalRef.h" @@ -345,4 +349,54 @@ bool FillArrayData(mirror::Object* obj, const Instruction::ArrayDataPayload* pay return true; } +ArtMethod* GetCalleeSaveMethodCaller(ArtMethod** sp, + Runtime::CalleeSaveType type, + bool do_caller_check) + SHARED_REQUIRES(Locks::mutator_lock_) { + DCHECK_EQ(*sp, Runtime::Current()->GetCalleeSaveMethod(type)); + + const size_t callee_frame_size = GetCalleeSaveFrameSize(kRuntimeISA, type); + auto** caller_sp = reinterpret_cast<ArtMethod**>( + reinterpret_cast<uintptr_t>(sp) + callee_frame_size); + ArtMethod* outer_method = *caller_sp; + ArtMethod* caller = outer_method; + + if ((outer_method != nullptr) && outer_method->IsOptimized(sizeof(void*))) { + const size_t callee_return_pc_offset = GetCalleeSaveReturnPcOffset(kRuntimeISA, type); + uintptr_t caller_pc = *reinterpret_cast<uintptr_t*>( + (reinterpret_cast<uint8_t*>(sp) + callee_return_pc_offset)); + if (LIKELY(caller_pc != reinterpret_cast<uintptr_t>(GetQuickInstrumentationExitPc()))) { + uintptr_t native_pc_offset = outer_method->NativeQuickPcOffset(caller_pc); + CodeInfo code_info = outer_method->GetOptimizedCodeInfo(); + StackMapEncoding encoding = code_info.ExtractEncoding(); + StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding); + DCHECK(stack_map.IsValid()); + if (stack_map.HasInlineInfo(encoding)) { + InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding); + caller = GetResolvedMethod(outer_method, inline_info, inline_info.GetDepth() - 1); + } + } else { + // We're instrumenting, just use the StackVisitor which knows how to + // handle instrumented frames. + NthCallerVisitor visitor(Thread::Current(), 1, true); + visitor.WalkStack(); + caller = visitor.caller; + if (kIsDebugBuild) { + // Avoid doing the check below. + do_caller_check = false; + } + } + } + + if (kIsDebugBuild && do_caller_check) { + // Note that do_caller_check is optional, as this method can be called by + // stubs, and tests without a proper call stack. + NthCallerVisitor visitor(Thread::Current(), 1, true); + visitor.WalkStack(); + CHECK_EQ(caller, visitor.caller); + } + + return caller; +} + } // namespace art diff --git a/runtime/entrypoints/entrypoint_utils.h b/runtime/entrypoints/entrypoint_utils.h index 53f2677e7e..4217cab697 100644 --- a/runtime/entrypoints/entrypoint_utils.h +++ b/runtime/entrypoints/entrypoint_utils.h @@ -26,6 +26,7 @@ #include "gc/allocator_type.h" #include "invoke_type.h" #include "jvalue.h" +#include "runtime.h" namespace art { @@ -179,6 +180,10 @@ bool FillArrayData(mirror::Object* obj, const Instruction::ArrayDataPayload* pay template <typename INT_TYPE, typename FLOAT_TYPE> inline INT_TYPE art_float_to_integral(FLOAT_TYPE f); +ArtMethod* GetCalleeSaveMethodCaller(ArtMethod** sp, + Runtime::CalleeSaveType type, + bool do_caller_check = false); + } // namespace art #endif // ART_RUNTIME_ENTRYPOINTS_ENTRYPOINT_UTILS_H_ diff --git a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc index d749664d12..dfd9fcddb8 100644 --- a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc @@ -22,13 +22,16 @@ #include "mirror/class-inl.h" #include "mirror/object_array-inl.h" #include "mirror/object-inl.h" +#include "quick_exception_handler.h" #include "stack.h" #include "thread.h" #include "verifier/method_verifier.h" namespace art { -NO_RETURN static void artDeoptimizeImpl(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) { +extern "C" NO_RETURN void artDeoptimize(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) { + ScopedQuickEntrypointChecks sqec(self); + if (VLOG_IS_ON(deopt)) { LOG(INFO) << "Deopting:"; self->Dump(LOG(INFO)); @@ -39,19 +42,26 @@ NO_RETURN static void artDeoptimizeImpl(Thread* self) SHARED_REQUIRES(Locks::mut self->QuickDeliverException(); } -extern "C" NO_RETURN void artDeoptimize(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) { - ScopedQuickEntrypointChecks sqec(self); - artDeoptimizeImpl(self); -} - extern "C" NO_RETURN void artDeoptimizeFromCompiledCode(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) { ScopedQuickEntrypointChecks sqec(self); + + // Deopt logging will be in DeoptimizeSingleFrame. It is there to take advantage of the + // specialized visitor that will show whether a method is Quick or Shadow. + // Before deoptimizing to interpreter, we must push the deoptimization context. JValue return_value; return_value.SetJ(0); // we never deoptimize from compiled code with an invoke result. self->PushDeoptimizationContext(return_value, false, self->GetException()); - artDeoptimizeImpl(self); + + QuickExceptionHandler exception_handler(self, true); + exception_handler.DeoptimizeSingleFrame(); + exception_handler.UpdateInstrumentationStack(); + exception_handler.DeoptimizeSingleFrameArchDependentFixup(); + // We cannot smash the caller-saves, as we need the ArtMethod in a parameter register that would + // be caller-saved. This has the downside that we cannot track incorrect register usage down the + // line. + exception_handler.DoLongJump(false); } } // namespace art diff --git a/runtime/entrypoints/quick/quick_jni_entrypoints.cc b/runtime/entrypoints/quick/quick_jni_entrypoints.cc index f69c39e8bc..58f256a191 100644 --- a/runtime/entrypoints/quick/quick_jni_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_jni_entrypoints.cc @@ -65,6 +65,9 @@ static void GoToRunnable(Thread* self) NO_THREAD_SAFETY_ANALYSIS { static void PopLocalReferences(uint32_t saved_local_ref_cookie, Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) { JNIEnvExt* env = self->GetJniEnv(); + if (UNLIKELY(env->check_jni)) { + env->CheckNoHeldMonitors(); + } env->locals.SetSegmentState(env->local_ref_cookie); env->local_ref_cookie = saved_local_ref_cookie; self->PopHandleScope(); @@ -112,4 +115,61 @@ extern mirror::Object* JniMethodEndWithReferenceSynchronized(jobject result, return JniMethodEndWithReferenceHandleResult(result, saved_local_ref_cookie, self); } +extern uint64_t GenericJniMethodEnd(Thread* self, + uint32_t saved_local_ref_cookie, + jvalue result, + uint64_t result_f, + ArtMethod* called, + HandleScope* handle_scope) + // TODO: NO_THREAD_SAFETY_ANALYSIS as GoToRunnable() is NO_THREAD_SAFETY_ANALYSIS + NO_THREAD_SAFETY_ANALYSIS { + GoToRunnable(self); + // We need the mutator lock (i.e., calling GoToRunnable()) before accessing the shorty or the + // locked object. + jobject locked = called->IsSynchronized() ? handle_scope->GetHandle(0).ToJObject() : nullptr; + char return_shorty_char = called->GetShorty()[0]; + if (return_shorty_char == 'L') { + if (locked != nullptr) { + UnlockJniSynchronizedMethod(locked, self); + } + return reinterpret_cast<uint64_t>(JniMethodEndWithReferenceHandleResult( + result.l, saved_local_ref_cookie, self)); + } else { + if (locked != nullptr) { + UnlockJniSynchronizedMethod(locked, self); // Must decode before pop. + } + PopLocalReferences(saved_local_ref_cookie, self); + switch (return_shorty_char) { + case 'F': { + if (kRuntimeISA == kX86) { + // Convert back the result to float. + double d = bit_cast<double, uint64_t>(result_f); + return bit_cast<uint32_t, float>(static_cast<float>(d)); + } else { + return result_f; + } + } + case 'D': + return result_f; + case 'Z': + return result.z; + case 'B': + return result.b; + case 'C': + return result.c; + case 'S': + return result.s; + case 'I': + return result.i; + case 'J': + return result.j; + case 'V': + return 0; + default: + LOG(FATAL) << "Unexpected return shorty character " << return_shorty_char; + return 0; + } + } +} + } // namespace art diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc index 1302c5f17b..5d3ac73d77 100644 --- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc @@ -29,8 +29,10 @@ #include "mirror/method.h" #include "mirror/object-inl.h" #include "mirror/object_array-inl.h" +#include "quick_exception_handler.h" #include "runtime.h" #include "scoped_thread_state_change.h" +#include "stack.h" #include "debugger.h" namespace art { @@ -646,27 +648,86 @@ extern "C" uint64_t artQuickToInterpreterBridge(ArtMethod* method, Thread* self, if (method->IsAbstract()) { ThrowAbstractMethodError(method); return 0; + } + + JValue tmp_value; + ShadowFrame* deopt_frame = self->PopStackedShadowFrame( + StackedShadowFrameType::kSingleFrameDeoptimizationShadowFrame, false); + const DexFile::CodeItem* code_item = method->GetCodeItem(); + DCHECK(code_item != nullptr) << PrettyMethod(method); + ManagedStack fragment; + + DCHECK(!method->IsNative()) << PrettyMethod(method); + uint32_t shorty_len = 0; + auto* non_proxy_method = method->GetInterfaceMethodIfProxy(sizeof(void*)); + const char* shorty = non_proxy_method->GetShorty(&shorty_len); + + JValue result; + + if (deopt_frame != nullptr) { + // Coming from single-frame deopt. + + if (kIsDebugBuild) { + // Sanity-check: are the methods as expected? We check that the last shadow frame (the bottom + // of the call-stack) corresponds to the called method. + ShadowFrame* linked = deopt_frame; + while (linked->GetLink() != nullptr) { + linked = linked->GetLink(); + } + CHECK_EQ(method, linked->GetMethod()) << PrettyMethod(method) << " " + << PrettyMethod(linked->GetMethod()); + } + + if (VLOG_IS_ON(deopt)) { + // Print out the stack to verify that it was a single-frame deopt. + LOG(INFO) << "Continue-ing from deopt. Stack is:"; + QuickExceptionHandler::DumpFramesWithType(self, true); + } + + mirror::Throwable* pending_exception = nullptr; + self->PopDeoptimizationContext(&result, &pending_exception); + + // Push a transition back into managed code onto the linked list in thread. + self->PushManagedStackFragment(&fragment); + + // Ensure that the stack is still in order. + if (kIsDebugBuild) { + class DummyStackVisitor : public StackVisitor { + public: + explicit DummyStackVisitor(Thread* self_in) SHARED_REQUIRES(Locks::mutator_lock_) + : StackVisitor(self_in, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames) {} + + bool VisitFrame() OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) { + // Nothing to do here. In a debug build, SanityCheckFrame will do the work in the walking + // logic. Just always say we want to continue. + return true; + } + }; + DummyStackVisitor dsv(self); + dsv.WalkStack(); + } + + // Restore the exception that was pending before deoptimization then interpret the + // deoptimized frames. + if (pending_exception != nullptr) { + self->SetException(pending_exception); + } + interpreter::EnterInterpreterFromDeoptimize(self, deopt_frame, &result); } else { - DCHECK(!method->IsNative()) << PrettyMethod(method); const char* old_cause = self->StartAssertNoThreadSuspension( "Building interpreter shadow frame"); - const DexFile::CodeItem* code_item = method->GetCodeItem(); - DCHECK(code_item != nullptr) << PrettyMethod(method); uint16_t num_regs = code_item->registers_size_; - void* memory = alloca(ShadowFrame::ComputeSize(num_regs)); // No last shadow coming from quick. - ShadowFrame* shadow_frame(ShadowFrame::Create(num_regs, nullptr, method, 0, memory)); + ShadowFrameAllocaUniquePtr shadow_frame_unique_ptr = + CREATE_SHADOW_FRAME(num_regs, nullptr, method, 0); + ShadowFrame* shadow_frame = shadow_frame_unique_ptr.get(); size_t first_arg_reg = code_item->registers_size_ - code_item->ins_size_; - uint32_t shorty_len = 0; - auto* non_proxy_method = method->GetInterfaceMethodIfProxy(sizeof(void*)); - const char* shorty = non_proxy_method->GetShorty(&shorty_len); BuildQuickShadowFrameVisitor shadow_frame_builder(sp, method->IsStatic(), shorty, shorty_len, shadow_frame, first_arg_reg); shadow_frame_builder.VisitArguments(); const bool needs_initialization = method->IsStatic() && !method->GetDeclaringClass()->IsInitialized(); // Push a transition back into managed code onto the linked list in thread. - ManagedStack fragment; self->PushManagedStackFragment(&fragment); self->PushShadowFrame(shadow_frame); self->EndAssertNoThreadSuspension(old_cause); @@ -681,24 +742,26 @@ extern "C" uint64_t artQuickToInterpreterBridge(ArtMethod* method, Thread* self, return 0; } } - JValue result = interpreter::EnterInterpreterFromEntryPoint(self, code_item, shadow_frame); - // Pop transition. - self->PopManagedStackFragment(fragment); - - // Request a stack deoptimization if needed - ArtMethod* caller = QuickArgumentVisitor::GetCallingMethod(sp); - if (UNLIKELY(Dbg::IsForcedInterpreterNeededForUpcall(self, caller))) { - // Push the context of the deoptimization stack so we can restore the return value and the - // exception before executing the deoptimized frames. - self->PushDeoptimizationContext(result, shorty[0] == 'L', self->GetException()); - - // Set special exception to cause deoptimization. - self->SetException(Thread::GetDeoptimizationException()); - } - // No need to restore the args since the method has already been run by the interpreter. - return result.GetJ(); + result = interpreter::EnterInterpreterFromEntryPoint(self, code_item, shadow_frame); + } + + // Pop transition. + self->PopManagedStackFragment(fragment); + + // Request a stack deoptimization if needed + ArtMethod* caller = QuickArgumentVisitor::GetCallingMethod(sp); + if (UNLIKELY(Dbg::IsForcedInterpreterNeededForUpcall(self, caller))) { + // Push the context of the deoptimization stack so we can restore the return value and the + // exception before executing the deoptimized frames. + self->PushDeoptimizationContext(result, shorty[0] == 'L', self->GetException()); + + // Set special exception to cause deoptimization. + self->SetException(Thread::GetDeoptimizationException()); } + + // No need to restore the args since the method has already been run by the interpreter. + return result.GetJ(); } // Visits arguments on the stack placing them into the args vector, Object* arguments are converted @@ -1926,62 +1989,27 @@ extern "C" TwoWordReturn artQuickGenericJniTrampoline(Thread* self, ArtMethod** reinterpret_cast<uintptr_t>(nativeCode)); } +// Defined in quick_jni_entrypoints.cc. +extern uint64_t GenericJniMethodEnd(Thread* self, uint32_t saved_local_ref_cookie, + jvalue result, uint64_t result_f, ArtMethod* called, + HandleScope* handle_scope); /* * Is called after the native JNI code. Responsible for cleanup (handle scope, saved state) and * unlocking. */ -extern "C" uint64_t artQuickGenericJniEndTrampoline(Thread* self, jvalue result, uint64_t result_f) - SHARED_REQUIRES(Locks::mutator_lock_) { +extern "C" uint64_t artQuickGenericJniEndTrampoline(Thread* self, + jvalue result, + uint64_t result_f) { + // We're here just back from a native call. We don't have the shared mutator lock at this point + // yet until we call GoToRunnable() later in GenericJniMethodEnd(). Accessing objects or doing + // anything that requires a mutator lock before that would cause problems as GC may have the + // exclusive mutator lock and may be moving objects, etc. ArtMethod** sp = self->GetManagedStack()->GetTopQuickFrame(); uint32_t* sp32 = reinterpret_cast<uint32_t*>(sp); ArtMethod* called = *sp; uint32_t cookie = *(sp32 - 1); - - jobject lock = nullptr; - if (called->IsSynchronized()) { - HandleScope* table = reinterpret_cast<HandleScope*>(reinterpret_cast<uint8_t*>(sp) - + sizeof(*sp)); - lock = table->GetHandle(0).ToJObject(); - } - - char return_shorty_char = called->GetShorty()[0]; - - if (return_shorty_char == 'L') { - return artQuickGenericJniEndJNIRef(self, cookie, result.l, lock); - } else { - artQuickGenericJniEndJNINonRef(self, cookie, lock); - - switch (return_shorty_char) { - case 'F': { - if (kRuntimeISA == kX86) { - // Convert back the result to float. - double d = bit_cast<double, uint64_t>(result_f); - return bit_cast<uint32_t, float>(static_cast<float>(d)); - } else { - return result_f; - } - } - case 'D': - return result_f; - case 'Z': - return result.z; - case 'B': - return result.b; - case 'C': - return result.c; - case 'S': - return result.s; - case 'I': - return result.i; - case 'J': - return result.j; - case 'V': - return 0; - default: - LOG(FATAL) << "Unexpected return shorty character " << return_shorty_char; - return 0; - } - } + HandleScope* table = reinterpret_cast<HandleScope*>(reinterpret_cast<uint8_t*>(sp) + sizeof(*sp)); + return GenericJniMethodEnd(self, cookie, result, result_f, called, table); } // We use TwoWordReturn to optimize scalar returns. We use the hi value for code, and the lo value diff --git a/runtime/gc/allocator/rosalloc-inl.h b/runtime/gc/allocator/rosalloc-inl.h index 25fdd7cbc9..2510514c04 100644 --- a/runtime/gc/allocator/rosalloc-inl.h +++ b/runtime/gc/allocator/rosalloc-inl.h @@ -53,13 +53,7 @@ inline ALWAYS_INLINE void* RosAlloc::Alloc(Thread* self, size_t size, size_t* by } inline bool RosAlloc::Run::IsFull() { - const size_t num_vec = NumberOfBitmapVectors(); - for (size_t v = 0; v < num_vec; ++v) { - if (~alloc_bit_map_[v] != 0) { - return false; - } - } - return true; + return free_list_.Size() == 0; } inline bool RosAlloc::CanAllocFromThreadLocalRun(Thread* self, size_t size) { @@ -120,45 +114,14 @@ inline size_t RosAlloc::MaxBytesBulkAllocatedFor(size_t size) { } inline void* RosAlloc::Run::AllocSlot() { - const size_t idx = size_bracket_idx_; - while (true) { - if (kIsDebugBuild) { - // Make sure that no slots leaked, the bitmap should be full for all previous vectors. - for (size_t i = 0; i < first_search_vec_idx_; ++i) { - CHECK_EQ(~alloc_bit_map_[i], 0U); - } - } - uint32_t* const alloc_bitmap_ptr = &alloc_bit_map_[first_search_vec_idx_]; - uint32_t ffz1 = __builtin_ffs(~*alloc_bitmap_ptr); - if (LIKELY(ffz1 != 0)) { - const uint32_t ffz = ffz1 - 1; - const uint32_t slot_idx = ffz + - first_search_vec_idx_ * sizeof(*alloc_bitmap_ptr) * kBitsPerByte; - const uint32_t mask = 1U << ffz; - DCHECK_LT(slot_idx, numOfSlots[idx]) << "out of range"; - // Found an empty slot. Set the bit. - DCHECK_EQ(*alloc_bitmap_ptr & mask, 0U); - *alloc_bitmap_ptr |= mask; - DCHECK_NE(*alloc_bitmap_ptr & mask, 0U); - uint8_t* slot_addr = reinterpret_cast<uint8_t*>(this) + - headerSizes[idx] + slot_idx * bracketSizes[idx]; - if (kTraceRosAlloc) { - LOG(INFO) << "RosAlloc::Run::AllocSlot() : 0x" << std::hex - << reinterpret_cast<intptr_t>(slot_addr) - << ", bracket_size=" << std::dec << bracketSizes[idx] - << ", slot_idx=" << slot_idx; - } - return slot_addr; - } - const size_t num_words = RoundUp(numOfSlots[idx], 32) / 32; - if (first_search_vec_idx_ + 1 >= num_words) { - DCHECK(IsFull()); - // Already at the last word, return null. - return nullptr; - } - // Increase the index to the next word and try again. - ++first_search_vec_idx_; + Slot* slot = free_list_.Remove(); + if (kTraceRosAlloc && slot != nullptr) { + const uint8_t idx = size_bracket_idx_; + LOG(INFO) << "RosAlloc::Run::AllocSlot() : " << slot + << ", bracket_size=" << std::dec << bracketSizes[idx] + << ", slot_idx=" << SlotIndex(slot); } + return slot; } } // namespace allocator diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc index 470bc1cb22..9c8e4df1e0 100644 --- a/runtime/gc/allocator/rosalloc.cc +++ b/runtime/gc/allocator/rosalloc.cc @@ -35,7 +35,7 @@ namespace art { namespace gc { namespace allocator { -static constexpr bool kUsePrefetchDuringAllocRun = true; +static constexpr bool kUsePrefetchDuringAllocRun = false; static constexpr bool kPrefetchNewRunDataByZeroing = false; static constexpr size_t kPrefetchStride = 64; @@ -43,8 +43,6 @@ size_t RosAlloc::bracketSizes[kNumOfSizeBrackets]; size_t RosAlloc::numOfPages[kNumOfSizeBrackets]; size_t RosAlloc::numOfSlots[kNumOfSizeBrackets]; size_t RosAlloc::headerSizes[kNumOfSizeBrackets]; -size_t RosAlloc::bulkFreeBitMapOffsets[kNumOfSizeBrackets]; -size_t RosAlloc::threadLocalFreeBitMapOffsets[kNumOfSizeBrackets]; bool RosAlloc::initialized_ = false; size_t RosAlloc::dedicated_full_run_storage_[kPageSize / sizeof(size_t)] = { 0 }; RosAlloc::Run* RosAlloc::dedicated_full_run_ = @@ -556,9 +554,7 @@ RosAlloc::Run* RosAlloc::AllocRun(Thread* self, size_t idx) { new_run->magic_num_ = kMagicNum; } new_run->size_bracket_idx_ = idx; - new_run->SetAllocBitMapBitsForInvalidSlots(); DCHECK(!new_run->IsThreadLocal()); - DCHECK_EQ(new_run->first_search_vec_idx_, 0U); DCHECK(!new_run->to_be_bulk_freed_); if (kUsePrefetchDuringAllocRun && idx < kNumThreadLocalSizeBrackets) { // Take ownership of the cache lines if we are likely to be thread local run. @@ -576,6 +572,7 @@ RosAlloc::Run* RosAlloc::AllocRun(Thread* self, size_t idx) { } } } + new_run->InitFreeList(); } return new_run; } @@ -695,15 +692,11 @@ void* RosAlloc::AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated, MutexLock mu(self, *size_bracket_locks_[idx]); bool is_all_free_after_merge; // This is safe to do for the dedicated_full_run_ since the bitmaps are empty. - if (thread_local_run->MergeThreadLocalFreeBitMapToAllocBitMap(&is_all_free_after_merge)) { + if (thread_local_run->MergeThreadLocalFreeListToFreeList(&is_all_free_after_merge)) { DCHECK_NE(thread_local_run, dedicated_full_run_); // Some slot got freed. Keep it. DCHECK(!thread_local_run->IsFull()); DCHECK_EQ(is_all_free_after_merge, thread_local_run->IsAllFree()); - if (is_all_free_after_merge) { - // Check that the bitmap idx is back at 0 if it's all free. - DCHECK_EQ(thread_local_run->first_search_vec_idx_, 0U); - } } else { // No slots got freed. Try to refill the thread-local run. DCHECK(thread_local_run->IsFull()); @@ -792,7 +785,7 @@ size_t RosAlloc::FreeFromRun(Thread* self, void* ptr, Run* run) { DCHECK_LT(run->size_bracket_idx_, kNumThreadLocalSizeBrackets); DCHECK(non_full_runs_[idx].find(run) == non_full_runs_[idx].end()); DCHECK(full_runs_[idx].find(run) == full_runs_[idx].end()); - run->MarkThreadLocalFreeBitMap(ptr); + run->AddToThreadLocalFreeList(ptr); if (kTraceRosAlloc) { LOG(INFO) << "RosAlloc::FreeFromRun() : Freed a slot in a thread local run 0x" << std::hex << reinterpret_cast<intptr_t>(run); @@ -818,7 +811,7 @@ size_t RosAlloc::FreeFromRun(Thread* self, void* ptr, Run* run) { } DCHECK(non_full_runs_[idx].find(run) == non_full_runs_[idx].end()); DCHECK(full_runs_[idx].find(run) == full_runs_[idx].end()); - run->ZeroHeader(); + run->ZeroHeaderAndSlotHeaders(); { MutexLock lock_mu(self, lock_); FreePages(self, run, true); @@ -853,271 +846,145 @@ size_t RosAlloc::FreeFromRun(Thread* self, void* ptr, Run* run) { return bracket_size; } -std::string RosAlloc::Run::BitMapToStr(uint32_t* bit_map_base, size_t num_vec) { - std::string bit_map_str; - for (size_t v = 0; v < num_vec; v++) { - uint32_t vec = bit_map_base[v]; - if (v != num_vec - 1) { - bit_map_str.append(StringPrintf("%x-", vec)); +template<bool kUseTail> +std::string RosAlloc::Run::FreeListToStr(SlotFreeList<kUseTail>* free_list) { + std::string free_list_str; + const uint8_t idx = size_bracket_idx_; + const size_t bracket_size = bracketSizes[idx]; + for (Slot* slot = free_list->Head(); slot != nullptr; slot = slot->Next()) { + bool is_last = slot->Next() == nullptr; + uintptr_t slot_offset = reinterpret_cast<uintptr_t>(slot) - + reinterpret_cast<uintptr_t>(FirstSlot()); + DCHECK_EQ(slot_offset % bracket_size, 0U); + uintptr_t slot_idx = slot_offset / bracket_size; + if (!is_last) { + free_list_str.append(StringPrintf("%u-", static_cast<uint32_t>(slot_idx))); } else { - bit_map_str.append(StringPrintf("%x", vec)); + free_list_str.append(StringPrintf("%u", static_cast<uint32_t>(slot_idx))); } } - return bit_map_str.c_str(); + return free_list_str; } std::string RosAlloc::Run::Dump() { size_t idx = size_bracket_idx_; - size_t num_slots = numOfSlots[idx]; - size_t num_vec = RoundUp(num_slots, 32) / 32; std::ostringstream stream; stream << "RosAlloc Run = " << reinterpret_cast<void*>(this) << "{ magic_num=" << static_cast<int>(magic_num_) << " size_bracket_idx=" << idx << " is_thread_local=" << static_cast<int>(is_thread_local_) << " to_be_bulk_freed=" << static_cast<int>(to_be_bulk_freed_) - << " first_search_vec_idx=" << first_search_vec_idx_ - << " alloc_bit_map=" << BitMapToStr(alloc_bit_map_, num_vec) - << " bulk_free_bit_map=" << BitMapToStr(BulkFreeBitMap(), num_vec) - << " thread_local_bit_map=" << BitMapToStr(ThreadLocalFreeBitMap(), num_vec) + << " free_list=" << FreeListToStr(&free_list_) + << " bulk_free_list=" << FreeListToStr(&bulk_free_list_) + << " thread_local_list=" << FreeListToStr(&thread_local_free_list_) << " }" << std::endl; return stream.str(); } -void RosAlloc::Run::FreeSlot(void* ptr) { - DCHECK(!IsThreadLocal()); +inline size_t RosAlloc::Run::SlotIndex(Slot* slot) { const uint8_t idx = size_bracket_idx_; const size_t bracket_size = bracketSizes[idx]; - const size_t offset_from_slot_base = reinterpret_cast<uint8_t*>(ptr) - - (reinterpret_cast<uint8_t*>(this) + headerSizes[idx]); + const size_t offset_from_slot_base = reinterpret_cast<uint8_t*>(slot) + - reinterpret_cast<uint8_t*>(FirstSlot()); DCHECK_EQ(offset_from_slot_base % bracket_size, static_cast<size_t>(0)); size_t slot_idx = offset_from_slot_base / bracket_size; DCHECK_LT(slot_idx, numOfSlots[idx]); - size_t vec_idx = slot_idx / 32; - if (kIsDebugBuild) { - size_t num_vec = RoundUp(numOfSlots[idx], 32) / 32; - DCHECK_LT(vec_idx, num_vec); - } - size_t vec_off = slot_idx % 32; - uint32_t* vec = &alloc_bit_map_[vec_idx]; - first_search_vec_idx_ = std::min(first_search_vec_idx_, static_cast<uint32_t>(vec_idx)); - const uint32_t mask = 1U << vec_off; - DCHECK_NE(*vec & mask, 0U); - *vec &= ~mask; - DCHECK_EQ(*vec & mask, 0U); + return slot_idx; +} + +void RosAlloc::Run::FreeSlot(void* ptr) { + DCHECK(!IsThreadLocal()); + const uint8_t idx = size_bracket_idx_; + const size_t bracket_size = bracketSizes[idx]; + Slot* slot = ToSlot(ptr); // Zero out the memory. // TODO: Investigate alternate memset since ptr is guaranteed to be aligned to 16. - memset(ptr, 0, bracket_size); + memset(slot, 0, bracket_size); + free_list_.Add(slot); if (kTraceRosAlloc) { - LOG(INFO) << "RosAlloc::Run::FreeSlot() : 0x" << std::hex << reinterpret_cast<intptr_t>(ptr) - << ", bracket_size=" << std::dec << bracketSizes[idx] << ", slot_idx=" << slot_idx; + LOG(INFO) << "RosAlloc::Run::FreeSlot() : " << slot + << ", bracket_size=" << std::dec << bracket_size << ", slot_idx=" << SlotIndex(slot); } } -size_t RosAlloc::Run::NumberOfFreeSlots() { - size_t num_alloc_slots = 0; - const size_t idx = size_bracket_idx_; - const size_t num_slots = numOfSlots[idx]; - const size_t num_vec = RoundUp(num_slots, 32) / 32; - DCHECK_NE(num_vec, 0U); - for (size_t v = 0; v < num_vec - 1; v++) { - num_alloc_slots += POPCOUNT(alloc_bit_map_[v]); - } - // Don't count the invalid bits in the last vector. - uint32_t last_vec_masked = alloc_bit_map_[num_vec - 1] & - ~GetBitmapLastVectorMask(num_slots, num_vec); - num_alloc_slots += POPCOUNT(last_vec_masked); - size_t num_free_slots = num_slots - num_alloc_slots; - DCHECK_LE(num_alloc_slots, num_slots); - DCHECK_LE(num_free_slots, num_slots); - return num_free_slots; -} - -inline bool RosAlloc::Run::MergeThreadLocalFreeBitMapToAllocBitMap(bool* is_all_free_after_out) { +inline bool RosAlloc::Run::MergeThreadLocalFreeListToFreeList(bool* is_all_free_after_out) { DCHECK(IsThreadLocal()); - // Free slots in the alloc bit map based on the thread local free bit map. - const size_t idx = size_bracket_idx_; - const size_t num_of_slots = numOfSlots[idx]; - const size_t num_vec = RoundUp(num_of_slots, 32) / 32; - bool changed = false; - uint32_t* vecp = &alloc_bit_map_[0]; - uint32_t* tl_free_vecp = &ThreadLocalFreeBitMap()[0]; - bool is_all_free_after = true; - for (size_t v = 0; v < num_vec; v++, vecp++, tl_free_vecp++) { - uint32_t tl_free_vec = *tl_free_vecp; - uint32_t vec_before = *vecp; - uint32_t vec_after; - if (tl_free_vec != 0) { - first_search_vec_idx_ = std::min(first_search_vec_idx_, static_cast<uint32_t>(v)); - vec_after = vec_before & ~tl_free_vec; - *vecp = vec_after; - changed = true; - *tl_free_vecp = 0; // clear the thread local free bit map. - } else { - vec_after = vec_before; - } - if (vec_after != 0) { - if (v == num_vec - 1) { - // Only not all free if a bit other than the mask bits are set. - is_all_free_after = - is_all_free_after && GetBitmapLastVectorMask(num_of_slots, num_vec) == vec_after; - } else { - is_all_free_after = false; - } - } - DCHECK_EQ(*tl_free_vecp, static_cast<uint32_t>(0)); - } - *is_all_free_after_out = is_all_free_after; - // Return true if there was at least a bit set in the thread-local - // free bit map and at least a bit in the alloc bit map changed. - return changed; + // Merge the thread local free list into the free list and clear the thread local free list. + const uint8_t idx = size_bracket_idx_; + bool thread_local_free_list_size = thread_local_free_list_.Size(); + const size_t size_before = free_list_.Size(); + free_list_.Merge(&thread_local_free_list_); + const size_t size_after = free_list_.Size(); + DCHECK_EQ(size_before < size_after, thread_local_free_list_size > 0); + DCHECK_LE(size_before, size_after); + *is_all_free_after_out = free_list_.Size() == numOfSlots[idx]; + // Return true at least one slot was added to the free list. + return size_before < size_after; } -inline void RosAlloc::Run::MergeBulkFreeBitMapIntoAllocBitMap() { +inline void RosAlloc::Run::MergeBulkFreeListToFreeList() { DCHECK(!IsThreadLocal()); - // Free slots in the alloc bit map based on the bulk free bit map. - const size_t num_vec = NumberOfBitmapVectors(); - uint32_t* vecp = &alloc_bit_map_[0]; - uint32_t* free_vecp = &BulkFreeBitMap()[0]; - for (size_t v = 0; v < num_vec; v++, vecp++, free_vecp++) { - uint32_t free_vec = *free_vecp; - if (free_vec != 0) { - first_search_vec_idx_ = std::min(first_search_vec_idx_, static_cast<uint32_t>(v)); - *vecp &= ~free_vec; - *free_vecp = 0; // clear the bulk free bit map. - } - DCHECK_EQ(*free_vecp, static_cast<uint32_t>(0)); - } + // Merge the bulk free list into the free list and clear the bulk free list. + free_list_.Merge(&bulk_free_list_); } -inline void RosAlloc::Run::UnionBulkFreeBitMapToThreadLocalFreeBitMap() { +inline void RosAlloc::Run::MergeBulkFreeListToThreadLocalFreeList() { DCHECK(IsThreadLocal()); - // Union the thread local bit map with the bulk free bit map. - size_t num_vec = NumberOfBitmapVectors(); - uint32_t* to_vecp = &ThreadLocalFreeBitMap()[0]; - uint32_t* from_vecp = &BulkFreeBitMap()[0]; - for (size_t v = 0; v < num_vec; v++, to_vecp++, from_vecp++) { - uint32_t from_vec = *from_vecp; - if (from_vec != 0) { - *to_vecp |= from_vec; - *from_vecp = 0; // clear the bulk free bit map. - } - DCHECK_EQ(*from_vecp, static_cast<uint32_t>(0)); - } + // Merge the bulk free list into the thread local free list and clear the bulk free list. + thread_local_free_list_.Merge(&bulk_free_list_); } -inline void RosAlloc::Run::MarkThreadLocalFreeBitMap(void* ptr) { +inline void RosAlloc::Run::AddToThreadLocalFreeList(void* ptr) { DCHECK(IsThreadLocal()); - MarkFreeBitMapShared(ptr, ThreadLocalFreeBitMap(), "MarkThreadLocalFreeBitMap"); + AddToFreeListShared(ptr, &thread_local_free_list_, __FUNCTION__); } -inline size_t RosAlloc::Run::MarkBulkFreeBitMap(void* ptr) { - return MarkFreeBitMapShared(ptr, BulkFreeBitMap(), "MarkFreeBitMap"); +inline size_t RosAlloc::Run::AddToBulkFreeList(void* ptr) { + return AddToFreeListShared(ptr, &bulk_free_list_, __FUNCTION__); } -inline size_t RosAlloc::Run::MarkFreeBitMapShared(void* ptr, uint32_t* free_bit_map_base, - const char* caller_name) { +inline size_t RosAlloc::Run::AddToFreeListShared(void* ptr, + SlotFreeList<true>* free_list, + const char* caller_name) { const uint8_t idx = size_bracket_idx_; - const size_t offset_from_slot_base = reinterpret_cast<uint8_t*>(ptr) - - (reinterpret_cast<uint8_t*>(this) + headerSizes[idx]); const size_t bracket_size = bracketSizes[idx]; - memset(ptr, 0, bracket_size); - DCHECK_EQ(offset_from_slot_base % bracket_size, static_cast<size_t>(0)); - size_t slot_idx = offset_from_slot_base / bracket_size; - DCHECK_LT(slot_idx, numOfSlots[idx]); - size_t vec_idx = slot_idx / 32; - if (kIsDebugBuild) { - size_t num_vec = NumberOfBitmapVectors(); - DCHECK_LT(vec_idx, num_vec); - } - size_t vec_off = slot_idx % 32; - uint32_t* vec = &free_bit_map_base[vec_idx]; - const uint32_t mask = 1U << vec_off; - DCHECK_EQ(*vec & mask, 0U); - *vec |= mask; - DCHECK_NE(*vec & mask, 0U); + Slot* slot = ToSlot(ptr); + memset(slot, 0, bracket_size); + free_list->Add(slot); if (kTraceRosAlloc) { - LOG(INFO) << "RosAlloc::Run::" << caller_name << "() : 0x" << std::hex - << reinterpret_cast<intptr_t>(ptr) - << ", bracket_size=" << std::dec << bracketSizes[idx] << ", slot_idx=" << slot_idx; + LOG(INFO) << "RosAlloc::Run::" << caller_name << "() : " << ptr + << ", bracket_size=" << std::dec << bracket_size << ", slot_idx=" << SlotIndex(slot); } return bracket_size; } -inline uint32_t RosAlloc::Run::GetBitmapLastVectorMask(size_t num_slots, size_t num_vec) { - const size_t kBitsPerVec = 32; - DCHECK_GE(num_vec * kBitsPerVec, num_slots); - DCHECK_NE(num_vec, 0U); - size_t remain = num_vec * kBitsPerVec - num_slots; - DCHECK_LT(remain, kBitsPerVec); - return ((1U << remain) - 1) << ((kBitsPerVec - remain) & 0x1F); -} - -inline bool RosAlloc::Run::IsAllFree() { +inline void RosAlloc::Run::ZeroHeaderAndSlotHeaders() { + DCHECK(IsAllFree()); const uint8_t idx = size_bracket_idx_; - const size_t num_slots = numOfSlots[idx]; - const size_t num_vec = NumberOfBitmapVectors(); - DCHECK_NE(num_vec, 0U); - // Check the last vector after the loop since it uses a special case for the masked bits. - for (size_t v = 0; v < num_vec - 1; v++) { - uint32_t vec = alloc_bit_map_[v]; - if (vec != 0) { - return false; - } - } - // Make sure the last word is equal to the mask, all other bits must be 0. - return alloc_bit_map_[num_vec - 1] == GetBitmapLastVectorMask(num_slots, num_vec); -} - -inline bool RosAlloc::Run::IsBulkFreeBitmapClean() { - const size_t num_vec = NumberOfBitmapVectors(); - for (size_t v = 0; v < num_vec; v++) { - uint32_t vec = BulkFreeBitMap()[v]; - if (vec != 0) { - return false; - } + // Zero the slot header (next pointers). + for (Slot* slot = free_list_.Head(); slot != nullptr; ) { + Slot* next_slot = slot->Next(); + slot->Clear(); + slot = next_slot; } - return true; -} - -inline bool RosAlloc::Run::IsThreadLocalFreeBitmapClean() { - const size_t num_vec = NumberOfBitmapVectors(); - for (size_t v = 0; v < num_vec; v++) { - uint32_t vec = ThreadLocalFreeBitMap()[v]; - if (vec != 0) { - return false; + // Zero the header. + memset(this, 0, headerSizes[idx]); + // Check that the entire run is all zero. + if (kIsDebugBuild) { + const size_t size = numOfPages[idx] * kPageSize; + const uintptr_t* word_ptr = reinterpret_cast<uintptr_t*>(this); + for (size_t i = 0; i < size / sizeof(uintptr_t); ++i) { + CHECK_EQ(word_ptr[i], 0U) << "words don't match at index " << i; } } - return true; -} - -inline void RosAlloc::Run::SetAllocBitMapBitsForInvalidSlots() { - const size_t idx = size_bracket_idx_; - const size_t num_slots = numOfSlots[idx]; - const size_t num_vec = RoundUp(num_slots, 32) / 32; - DCHECK_NE(num_vec, 0U); - // Make sure to set the bits at the end of the bitmap so that we don't allocate there since they - // don't represent valid slots. - alloc_bit_map_[num_vec - 1] |= GetBitmapLastVectorMask(num_slots, num_vec); -} - -inline void RosAlloc::Run::ZeroHeader() { - const uint8_t idx = size_bracket_idx_; - memset(this, 0, headerSizes[idx]); } inline void RosAlloc::Run::ZeroData() { const uint8_t idx = size_bracket_idx_; - uint8_t* slot_begin = reinterpret_cast<uint8_t*>(this) + headerSizes[idx]; + uint8_t* slot_begin = reinterpret_cast<uint8_t*>(FirstSlot()); memset(slot_begin, 0, numOfSlots[idx] * bracketSizes[idx]); } -inline void RosAlloc::Run::FillAllocBitMap() { - size_t num_vec = NumberOfBitmapVectors(); - memset(alloc_bit_map_, 0xFF, sizeof(uint32_t) * num_vec); - first_search_vec_idx_ = num_vec - 1; // No free bits in any of the bitmap words. -} - void RosAlloc::Run::InspectAllSlots(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg), void* arg) { size_t idx = size_bracket_idx_; @@ -1126,26 +993,27 @@ void RosAlloc::Run::InspectAllSlots(void (*handler)(void* start, void* end, size size_t bracket_size = IndexToBracketSize(idx); DCHECK_EQ(slot_base + num_slots * bracket_size, reinterpret_cast<uint8_t*>(this) + numOfPages[idx] * kPageSize); - size_t num_vec = RoundUp(num_slots, 32) / 32; - size_t slots = 0; - const uint32_t* const tl_free_vecp = IsThreadLocal() ? ThreadLocalFreeBitMap() : nullptr; - for (size_t v = 0; v < num_vec; v++, slots += 32) { - DCHECK_GE(num_slots, slots); - uint32_t vec = alloc_bit_map_[v]; - if (tl_free_vecp != nullptr) { - // Clear out the set bits in the thread local free bitmap since these aren't actually - // allocated. - vec &= ~tl_free_vecp[v]; - } - size_t end = std::min(num_slots - slots, static_cast<size_t>(32)); - for (size_t i = 0; i < end; ++i) { - bool is_allocated = ((vec >> i) & 0x1) != 0; - uint8_t* slot_addr = slot_base + (slots + i) * bracket_size; - if (is_allocated) { - handler(slot_addr, slot_addr + bracket_size, bracket_size, arg); - } else { - handler(slot_addr, slot_addr + bracket_size, 0, arg); - } + // Free slots are on the free list and the allocated/used slots are not. We traverse the free list + // to find out and record which slots are free in the is_free array. + std::unique_ptr<bool[]> is_free(new bool[num_slots]()); // zero initialized + for (Slot* slot = free_list_.Head(); slot != nullptr; slot = slot->Next()) { + size_t slot_idx = SlotIndex(slot); + DCHECK_LT(slot_idx, num_slots); + is_free[slot_idx] = true; + } + if (IsThreadLocal()) { + for (Slot* slot = thread_local_free_list_.Head(); slot != nullptr; slot = slot->Next()) { + size_t slot_idx = SlotIndex(slot); + DCHECK_LT(slot_idx, num_slots); + is_free[slot_idx] = true; + } + } + for (size_t slot_idx = 0; slot_idx < num_slots; ++slot_idx) { + uint8_t* slot_addr = slot_base + slot_idx * bracket_size; + if (!is_free[slot_idx]) { + handler(slot_addr, slot_addr + bracket_size, bracket_size, arg); + } else { + handler(slot_addr, slot_addr + bracket_size, 0, arg); } } } @@ -1236,7 +1104,7 @@ size_t RosAlloc::BulkFree(Thread* self, void** ptrs, size_t num_ptrs) { DCHECK(run != nullptr); DCHECK_EQ(run->magic_num_, kMagicNum); // Set the bit in the bulk free bit map. - freed_bytes += run->MarkBulkFreeBitMap(ptr); + freed_bytes += run->AddToBulkFreeList(ptr); #ifdef __ANDROID__ if (!run->to_be_bulk_freed_) { run->to_be_bulk_freed_ = true; @@ -1262,7 +1130,7 @@ size_t RosAlloc::BulkFree(Thread* self, void** ptrs, size_t num_ptrs) { DCHECK_LT(run->size_bracket_idx_, kNumThreadLocalSizeBrackets); DCHECK(non_full_runs_[idx].find(run) == non_full_runs_[idx].end()); DCHECK(full_runs_[idx].find(run) == full_runs_[idx].end()); - run->UnionBulkFreeBitMapToThreadLocalFreeBitMap(); + run->MergeBulkFreeListToThreadLocalFreeList(); if (kTraceRosAlloc) { LOG(INFO) << "RosAlloc::BulkFree() : Freed slot(s) in a thread local run 0x" << std::hex << reinterpret_cast<intptr_t>(run); @@ -1272,7 +1140,7 @@ size_t RosAlloc::BulkFree(Thread* self, void** ptrs, size_t num_ptrs) { // it's become all free. } else { bool run_was_full = run->IsFull(); - run->MergeBulkFreeBitMapIntoAllocBitMap(); + run->MergeBulkFreeListToFreeList(); if (kTraceRosAlloc) { LOG(INFO) << "RosAlloc::BulkFree() : Freed slot(s) in a run 0x" << std::hex << reinterpret_cast<intptr_t>(run); @@ -1316,7 +1184,7 @@ size_t RosAlloc::BulkFree(Thread* self, void** ptrs, size_t num_ptrs) { DCHECK(non_full_runs->find(run) == non_full_runs->end()); } if (!run_was_current) { - run->ZeroHeader(); + run->ZeroHeaderAndSlotHeaders(); MutexLock lock_mu(self, lock_); FreePages(self, run, true); } @@ -1677,9 +1545,9 @@ size_t RosAlloc::RevokeThreadLocalRuns(Thread* thread) { size_t num_free_slots = thread_local_run->NumberOfFreeSlots(); free_bytes += num_free_slots * bracketSizes[idx]; bool dont_care; - thread_local_run->MergeThreadLocalFreeBitMapToAllocBitMap(&dont_care); + thread_local_run->MergeThreadLocalFreeListToFreeList(&dont_care); thread_local_run->SetIsThreadLocal(false); - thread_local_run->MergeBulkFreeBitMapIntoAllocBitMap(); + thread_local_run->MergeBulkFreeListToFreeList(); DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end()); DCHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end()); RevokeRun(self, idx, thread_local_run); @@ -1702,7 +1570,7 @@ void RosAlloc::RevokeRun(Thread* self, size_t idx, Run* run) { } } } else if (run->IsAllFree()) { - run->ZeroHeader(); + run->ZeroHeaderAndSlotHeaders(); MutexLock mu(self, lock_); FreePages(self, run, true); } else { @@ -1814,22 +1682,15 @@ void RosAlloc::Initialize() { size_t max_num_of_slots = run_size / bracket_size; // Compute the actual number of slots by taking the header and // alignment into account. - size_t fixed_header_size = RoundUp(Run::fixed_header_size(), sizeof(uint32_t)); - DCHECK_EQ(fixed_header_size, static_cast<size_t>(8)); + size_t fixed_header_size = RoundUp(Run::fixed_header_size(), sizeof(uint64_t)); + DCHECK_EQ(fixed_header_size, 80U); size_t header_size = 0; - size_t bulk_free_bit_map_offset = 0; - size_t thread_local_free_bit_map_offset = 0; size_t num_of_slots = 0; // Search for the maximum number of slots that allows enough space - // for the header (including the bit maps.) + // for the header. for (int s = max_num_of_slots; s >= 0; s--) { size_t tmp_slots_size = bracket_size * s; - size_t tmp_bit_map_size = RoundUp(s, sizeof(uint32_t) * kBitsPerByte) / kBitsPerByte; - size_t tmp_bulk_free_bit_map_size = tmp_bit_map_size; - size_t tmp_bulk_free_bit_map_off = fixed_header_size + tmp_bit_map_size; - size_t tmp_thread_local_free_bit_map_size = tmp_bit_map_size; - size_t tmp_thread_local_free_bit_map_off = tmp_bulk_free_bit_map_off + tmp_bulk_free_bit_map_size; - size_t tmp_unaligned_header_size = tmp_thread_local_free_bit_map_off + tmp_thread_local_free_bit_map_size; + size_t tmp_unaligned_header_size = fixed_header_size; // Align up the unaligned header size. bracket_size may not be a power of two. size_t tmp_header_size = (tmp_unaligned_header_size % bracket_size == 0) ? tmp_unaligned_header_size : @@ -1841,24 +1702,19 @@ void RosAlloc::Initialize() { // space for the header (including the bit maps.) num_of_slots = s; header_size = tmp_header_size; - bulk_free_bit_map_offset = tmp_bulk_free_bit_map_off; - thread_local_free_bit_map_offset = tmp_thread_local_free_bit_map_off; break; } } - DCHECK(num_of_slots > 0 && header_size > 0 && bulk_free_bit_map_offset > 0); + DCHECK_GT(num_of_slots, 0U); + DCHECK_GT(header_size, 0U); // Add the padding for the alignment remainder. header_size += run_size % bracket_size; DCHECK_EQ(header_size + num_of_slots * bracket_size, run_size); numOfSlots[i] = num_of_slots; headerSizes[i] = header_size; - bulkFreeBitMapOffsets[i] = bulk_free_bit_map_offset; - threadLocalFreeBitMapOffsets[i] = thread_local_free_bit_map_offset; if (kTraceRosAlloc) { LOG(INFO) << "numOfSlots[" << i << "]=" << numOfSlots[i] - << ", headerSizes[" << i << "]=" << headerSizes[i] - << ", bulkFreeBitMapOffsets[" << i << "]=" << bulkFreeBitMapOffsets[i] - << ", threadLocalFreeBitMapOffsets[" << i << "]=" << threadLocalFreeBitMapOffsets[i];; + << ", headerSizes[" << i << "]=" << headerSizes[i]; } } // Fill the alloc bitmap so nobody can successfully allocate from it. @@ -1868,8 +1724,11 @@ void RosAlloc::Initialize() { // It doesn't matter which size bracket we use since the main goal is to have the allocation // fail 100% of the time you attempt to allocate into the dedicated full run. dedicated_full_run_->size_bracket_idx_ = 0; - dedicated_full_run_->FillAllocBitMap(); + DCHECK_EQ(dedicated_full_run_->FreeList()->Size(), 0U); // It looks full. dedicated_full_run_->SetIsThreadLocal(true); + + // The smallest bracket size must be at least as large as the sizeof(Slot). + DCHECK_LE(sizeof(Slot), bracketSizes[0]) << "sizeof(Slot) <= the smallest bracket size"; } void RosAlloc::BytesAllocatedCallback(void* start ATTRIBUTE_UNUSED, void* end ATTRIBUTE_UNUSED, @@ -2025,19 +1884,12 @@ void RosAlloc::Run::Verify(Thread* self, RosAlloc* rosalloc, bool running_on_mem CHECK_LT(idx, kNumOfSizeBrackets) << "Out of range size bracket index : " << Dump(); uint8_t* slot_base = reinterpret_cast<uint8_t*>(this) + headerSizes[idx]; const size_t num_slots = numOfSlots[idx]; - const size_t num_vec = RoundUp(num_slots, 32) / 32; - CHECK_GT(num_vec, 0U); size_t bracket_size = IndexToBracketSize(idx); CHECK_EQ(slot_base + num_slots * bracket_size, reinterpret_cast<uint8_t*>(this) + numOfPages[idx] * kPageSize) << "Mismatch in the end address of the run " << Dump(); - // Check that the bulk free bitmap is clean. It's only used during BulkFree(). - CHECK(IsBulkFreeBitmapClean()) << "The bulk free bit map isn't clean " << Dump(); - uint32_t last_word_mask = GetBitmapLastVectorMask(num_slots, num_vec); - // Make sure all the bits at the end of the run are set so that we don't allocate there. - CHECK_EQ(alloc_bit_map_[num_vec - 1] & last_word_mask, last_word_mask); - // Ensure that the first bitmap index is valid. - CHECK_LT(first_search_vec_idx_, num_vec); + // Check that the bulk free list is empty. It's only used during BulkFree(). + CHECK(IsBulkFreeListEmpty()) << "The bulk free isn't empty " << Dump(); // Check the thread local runs, the current runs, and the run sets. if (IsThreadLocal()) { // If it's a thread local run, then it must be pointed to by an owner thread. @@ -2059,11 +1911,11 @@ void RosAlloc::Run::Verify(Thread* self, RosAlloc* rosalloc, bool running_on_mem } CHECK(owner_found) << "A thread local run has no owner thread " << Dump(); } else { - // If it's not thread local, check that the thread local free bitmap is clean. - CHECK(IsThreadLocalFreeBitmapClean()) - << "A non-thread-local run's thread local free bitmap isn't clean " + // If it's not thread local, check that the thread local free list is empty. + CHECK(IsThreadLocalFreeListEmpty()) + << "A non-thread-local run's thread local free list isn't empty " << Dump(); - // Check if it's a current run for the size bucket. + // Check if it's a current run for the size bracket. bool is_current_run = false; for (size_t i = 0; i < kNumOfSizeBrackets; i++) { MutexLock mu(self, *rosalloc->size_bracket_locks_[i]); @@ -2101,34 +1953,39 @@ void RosAlloc::Run::Verify(Thread* self, RosAlloc* rosalloc, bool running_on_mem } } // Check each slot. - size_t slots = 0; size_t memory_tool_modifier = running_on_memory_tool ? 2 * ::art::gc::space::kDefaultMemoryToolRedZoneBytes : 0U; - for (size_t v = 0; v < num_vec; v++, slots += 32) { - DCHECK_GE(num_slots, slots) << "Out of bounds"; - uint32_t vec = alloc_bit_map_[v]; - uint32_t thread_local_free_vec = ThreadLocalFreeBitMap()[v]; - size_t end = std::min(num_slots - slots, static_cast<size_t>(32)); - for (size_t i = 0; i < end; ++i) { - bool is_allocated = ((vec >> i) & 0x1) != 0; - // If a thread local run, slots may be marked freed in the - // thread local free bitmap. - bool is_thread_local_freed = IsThreadLocal() && ((thread_local_free_vec >> i) & 0x1) != 0; - if (is_allocated && !is_thread_local_freed) { - uint8_t* slot_addr = slot_base + (slots + i) * bracket_size; - if (running_on_memory_tool) { - slot_addr += ::art::gc::space::kDefaultMemoryToolRedZoneBytes; - } - mirror::Object* obj = reinterpret_cast<mirror::Object*>(slot_addr); - size_t obj_size = obj->SizeOf(); - CHECK_LE(obj_size + memory_tool_modifier, kLargeSizeThreshold) - << "A run slot contains a large object " << Dump(); - CHECK_EQ(SizeToIndex(obj_size + memory_tool_modifier), idx) - << PrettyTypeOf(obj) << " " - << "obj_size=" << obj_size << "(" << obj_size + memory_tool_modifier << "), idx=" << idx - << " A run slot contains an object with wrong size " << Dump(); - } + // TODO: reuse InspectAllSlots(). + std::unique_ptr<bool[]> is_free(new bool[num_slots]()); // zero initialized + // Mark the free slots and the remaining ones are allocated. + for (Slot* slot = free_list_.Head(); slot != nullptr; slot = slot->Next()) { + size_t slot_idx = SlotIndex(slot); + DCHECK_LT(slot_idx, num_slots); + is_free[slot_idx] = true; + } + if (IsThreadLocal()) { + for (Slot* slot = thread_local_free_list_.Head(); slot != nullptr; slot = slot->Next()) { + size_t slot_idx = SlotIndex(slot); + DCHECK_LT(slot_idx, num_slots); + is_free[slot_idx] = true; + } + } + for (size_t slot_idx = 0; slot_idx < num_slots; ++slot_idx) { + uint8_t* slot_addr = slot_base + slot_idx * bracket_size; + if (running_on_memory_tool) { + slot_addr += ::art::gc::space::kDefaultMemoryToolRedZoneBytes; + } + if (!is_free[slot_idx]) { + // The slot is allocated + mirror::Object* obj = reinterpret_cast<mirror::Object*>(slot_addr); + size_t obj_size = obj->SizeOf(); + CHECK_LE(obj_size + memory_tool_modifier, kLargeSizeThreshold) + << "A run slot contains a large object " << Dump(); + CHECK_EQ(SizeToIndex(obj_size + memory_tool_modifier), idx) + << PrettyTypeOf(obj) << " " + << "obj_size=" << obj_size << "(" << obj_size + memory_tool_modifier << "), idx=" << idx + << " A run slot contains an object with wrong size " << Dump(); } } } diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h index a7f29af274..87f1392920 100644 --- a/runtime/gc/allocator/rosalloc.h +++ b/runtime/gc/allocator/rosalloc.h @@ -112,6 +112,198 @@ class RosAlloc { DISALLOW_COPY_AND_ASSIGN(FreePageRun); }; + // The slot header. + class Slot { + public: + Slot* Next() const { + return next_; + } + void SetNext(Slot* next) { + next_ = next; + } + // The slot right before this slot in terms of the address. + Slot* Left(size_t bracket_size) { + return reinterpret_cast<Slot*>(reinterpret_cast<uintptr_t>(this) - bracket_size); + } + void Clear() { + next_ = nullptr; + } + + private: + Slot* next_; // Next slot in the list. + }; + + // We use the tail (kUseTail == true) for the bulk or thread-local free lists to avoid the need to + // traverse the list from the head to the tail when merging free lists. + // We don't use the tail (kUseTail == false) for the free list to avoid the need to manage the + // tail in the allocation fast path for a performance reason. + template<bool kUseTail = true> + class SlotFreeList { + public: + SlotFreeList() : head_(0U), tail_(0), size_(0) {} + Slot* Head() const { + return reinterpret_cast<Slot*>(head_); + } + Slot* Tail() const { + CHECK(kUseTail); + return reinterpret_cast<Slot*>(tail_); + } + size_t Size() const { + return size_; + } + // Removes from the head of the free list. + Slot* Remove() { + Slot* slot; + if (kIsDebugBuild) { + Verify(); + } + Slot** headp = reinterpret_cast<Slot**>(&head_); + Slot** tailp = kUseTail ? reinterpret_cast<Slot**>(&tail_) : nullptr; + Slot* old_head = *headp; + if (old_head == nullptr) { + // List was empty. + if (kUseTail) { + DCHECK(*tailp == nullptr); + } + return nullptr; + } else { + // List wasn't empty. + if (kUseTail) { + DCHECK(*tailp != nullptr); + } + Slot* old_head_next = old_head->Next(); + slot = old_head; + *headp = old_head_next; + if (kUseTail && old_head_next == nullptr) { + // List becomes empty. + *tailp = nullptr; + } + } + slot->Clear(); + --size_; + if (kIsDebugBuild) { + Verify(); + } + return slot; + } + void Add(Slot* slot) { + if (kIsDebugBuild) { + Verify(); + } + DCHECK(slot != nullptr); + Slot** headp = reinterpret_cast<Slot**>(&head_); + Slot** tailp = kUseTail ? reinterpret_cast<Slot**>(&tail_) : nullptr; + Slot* old_head = *headp; + if (old_head == nullptr) { + // List was empty. + if (kUseTail) { + DCHECK(*tailp == nullptr); + } + *headp = slot; + if (kUseTail) { + *tailp = slot; + } + } else { + // List wasn't empty. + if (kUseTail) { + DCHECK(*tailp != nullptr); + } + *headp = slot; + slot->SetNext(old_head); + } + ++size_; + if (kIsDebugBuild) { + Verify(); + } + } + // Merge the given list into this list. Empty the given list. + // Deliberately support only a kUseTail == true SlotFreeList parameter because 1) we don't + // currently have a situation where we need a kUseTail == false SlotFreeList parameter, and 2) + // supporting the kUseTail == false parameter would require a O(n) linked list traversal to do + // the merge if 'this' SlotFreeList has kUseTail == false, which we'd like to avoid. + void Merge(SlotFreeList<true>* list) { + if (kIsDebugBuild) { + Verify(); + CHECK(list != nullptr); + list->Verify(); + } + if (list->Size() == 0) { + return; + } + Slot** headp = reinterpret_cast<Slot**>(&head_); + Slot** tailp = kUseTail ? reinterpret_cast<Slot**>(&tail_) : nullptr; + Slot* old_head = *headp; + if (old_head == nullptr) { + // List was empty. + *headp = list->Head(); + if (kUseTail) { + *tailp = list->Tail(); + } + size_ = list->Size(); + } else { + // List wasn't empty. + DCHECK(list->Head() != nullptr); + *headp = list->Head(); + DCHECK(list->Tail() != nullptr); + list->Tail()->SetNext(old_head); + // if kUseTail, no change to tailp. + size_ += list->Size(); + } + list->Reset(); + if (kIsDebugBuild) { + Verify(); + } + } + + void Reset() { + head_ = 0; + if (kUseTail) { + tail_ = 0; + } + size_ = 0; + } + + void Verify() { + Slot* head = reinterpret_cast<Slot*>(head_); + Slot* tail = kUseTail ? reinterpret_cast<Slot*>(tail_) : nullptr; + if (size_ == 0) { + CHECK(head == nullptr); + if (kUseTail) { + CHECK(tail == nullptr); + } + } else { + CHECK(head != nullptr); + if (kUseTail) { + CHECK(tail != nullptr); + } + size_t count = 0; + for (Slot* slot = head; slot != nullptr; slot = slot->Next()) { + ++count; + if (kUseTail && slot->Next() == nullptr) { + CHECK_EQ(slot, tail); + } + } + CHECK_EQ(size_, count); + } + } + + private: + // A pointer (Slot*) to the head of the list. Always 8 bytes so that we will have the same + // layout between 32 bit and 64 bit, which is not strictly necessary, but we do so for 1) + // uniformity, 2) we won't need to change this code if we move to a non-low 4G heap in the + // future, and 3) the space savings by using 32 bit fields in 32 bit would be lost in noise + // (won't open up enough space to cause an extra slot to be available). + uint64_t head_; + // A pointer (Slot*) to the tail of the list. Always 8 bytes so that we will have the same + // layout between 32 bit and 64 bit. The tail is stored to speed up merging of lists. + // Unused if kUseTail is false. + uint64_t tail_; + // The number of slots in the list. This is used to make it fast to check if a free list is all + // free without traversing the whole free list. + uint32_t size_; + uint32_t padding_ ATTRIBUTE_UNUSED; + }; + // Represents a run of memory slots of the same size. // // A run's memory layout: @@ -125,19 +317,17 @@ class RosAlloc { // +-------------------+ // | to_be_bulk_freed | // +-------------------+ - // | top_bitmap_idx | - // +-------------------+ // | | - // | alloc bit map | + // | free list | // | | // +-------------------+ // | | - // | bulk free bit map | + // | bulk free list | // | | // +-------------------+ // | | // | thread-local free | - // | bit map | + // | list | // | | // +-------------------+ // | padding due to | @@ -160,94 +350,100 @@ class RosAlloc { uint8_t size_bracket_idx_; // The index of the size bracket of this run. uint8_t is_thread_local_; // True if this run is used as a thread-local run. uint8_t to_be_bulk_freed_; // Used within BulkFree() to flag a run that's involved with a bulk free. - uint32_t first_search_vec_idx_; // The index of the first bitmap vector which may contain an available slot. - uint32_t alloc_bit_map_[0]; // The bit map that allocates if each slot is in use. - - // bulk_free_bit_map_[] : The bit map that is used for GC to - // temporarily mark the slots to free without using a lock. After - // all the slots to be freed in a run are marked, all those slots - // get freed in bulk with one locking per run, as opposed to one - // locking per slot to minimize the lock contention. This is used - // within BulkFree(). - - // thread_local_free_bit_map_[] : The bit map that is used for GC - // to temporarily mark the slots to free in a thread-local run - // without using a lock (without synchronizing the thread that - // owns the thread-local run.) When the thread-local run becomes - // full, the thread will check this bit map and update the - // allocation bit map of the run (that is, the slots get freed.) - - // Returns the byte size of the header except for the bit maps. + uint32_t padding_ ATTRIBUTE_UNUSED; + // Use a tailless free list for free_list_ so that the alloc fast path does not manage the tail. + SlotFreeList<false> free_list_; + SlotFreeList<true> bulk_free_list_; + SlotFreeList<true> thread_local_free_list_; + // Padding due to alignment + // Slot 0 + // Slot 1 + // ... + + // Returns the byte size of the header. static size_t fixed_header_size() { - Run temp; - size_t size = reinterpret_cast<uint8_t*>(&temp.alloc_bit_map_) - reinterpret_cast<uint8_t*>(&temp); - DCHECK_EQ(size, static_cast<size_t>(8)); - return size; + return sizeof(Run); + } + Slot* FirstSlot() { + const uint8_t idx = size_bracket_idx_; + return reinterpret_cast<Slot*>(reinterpret_cast<uintptr_t>(this) + headerSizes[idx]); + } + Slot* LastSlot() { + const uint8_t idx = size_bracket_idx_; + const size_t bracket_size = bracketSizes[idx]; + uintptr_t end = reinterpret_cast<uintptr_t>(End()); + Slot* last_slot = reinterpret_cast<Slot*>(end - bracket_size); + DCHECK_LE(FirstSlot(), last_slot); + return last_slot; } - // Returns the base address of the free bit map. - uint32_t* BulkFreeBitMap() { - return reinterpret_cast<uint32_t*>(reinterpret_cast<uint8_t*>(this) + bulkFreeBitMapOffsets[size_bracket_idx_]); + SlotFreeList<false>* FreeList() { + return &free_list_; } - // Returns the base address of the thread local free bit map. - uint32_t* ThreadLocalFreeBitMap() { - return reinterpret_cast<uint32_t*>(reinterpret_cast<uint8_t*>(this) + threadLocalFreeBitMapOffsets[size_bracket_idx_]); + SlotFreeList<true>* BulkFreeList() { + return &bulk_free_list_; + } + SlotFreeList<true>* ThreadLocalFreeList() { + return &thread_local_free_list_; } void* End() { return reinterpret_cast<uint8_t*>(this) + kPageSize * numOfPages[size_bracket_idx_]; } - // Returns the number of bitmap words per run. - size_t NumberOfBitmapVectors() const { - return RoundUp(numOfSlots[size_bracket_idx_], 32) / 32; - } void SetIsThreadLocal(bool is_thread_local) { is_thread_local_ = is_thread_local ? 1 : 0; } bool IsThreadLocal() const { return is_thread_local_ != 0; } - // Frees slots in the allocation bit map with regard to the - // thread-local free bit map. Used when a thread-local run becomes + // Set up the free list for a new/empty run. + void InitFreeList() { + const uint8_t idx = size_bracket_idx_; + const size_t bracket_size = bracketSizes[idx]; + Slot* first_slot = FirstSlot(); + // Add backwards so the first slot is at the head of the list. + for (Slot* slot = LastSlot(); slot >= first_slot; slot = slot->Left(bracket_size)) { + free_list_.Add(slot); + } + } + // Merge the thread local free list to the free list. Used when a thread-local run becomes // full. - bool MergeThreadLocalFreeBitMapToAllocBitMap(bool* is_all_free_after_out); - // Frees slots in the allocation bit map with regard to the bulk - // free bit map. Used in a bulk free. - void MergeBulkFreeBitMapIntoAllocBitMap(); - // Unions the slots to be freed in the free bit map into the - // thread-local free bit map. In a bulk free, as a two-step - // process, GC will first record all the slots to free in a run in - // the free bit map where it can write without a lock, and later - // acquire a lock once per run to union the bits of the free bit - // map to the thread-local free bit map. - void UnionBulkFreeBitMapToThreadLocalFreeBitMap(); + bool MergeThreadLocalFreeListToFreeList(bool* is_all_free_after_out); + // Merge the bulk free list to the free list. Used in a bulk free. + void MergeBulkFreeListToFreeList(); + // Merge the bulk free list to the thread local free list. In a bulk free, as a two-step + // process, GC will first record all the slots to free in a run in the bulk free list where it + // can write without a lock, and later acquire a lock once per run to merge the bulk free list + // to the thread-local free list. + void MergeBulkFreeListToThreadLocalFreeList(); // Allocates a slot in a run. - void* AllocSlot(); + ALWAYS_INLINE void* AllocSlot(); // Frees a slot in a run. This is used in a non-bulk free. void FreeSlot(void* ptr); - // Marks the slots to free in the bulk free bit map. Returns the bracket size. - size_t MarkBulkFreeBitMap(void* ptr); - // Marks the slots to free in the thread-local free bit map. - void MarkThreadLocalFreeBitMap(void* ptr); - // Last word mask, all of the bits in the last word which aren't valid slots are set to - // optimize allocation path. - static uint32_t GetBitmapLastVectorMask(size_t num_slots, size_t num_vec); + // Add the given slot to the bulk free list. Returns the bracket size. + size_t AddToBulkFreeList(void* ptr); + // Add the given slot to the thread-local free list. + void AddToThreadLocalFreeList(void* ptr); // Returns true if all the slots in the run are not in use. - bool IsAllFree(); + bool IsAllFree() const { + return free_list_.Size() == numOfSlots[size_bracket_idx_]; + } // Returns the number of free slots. - size_t NumberOfFreeSlots(); + size_t NumberOfFreeSlots() { + return free_list_.Size(); + } // Returns true if all the slots in the run are in use. ALWAYS_INLINE bool IsFull(); - // Returns true if the bulk free bit map is clean. - bool IsBulkFreeBitmapClean(); - // Returns true if the thread local free bit map is clean. - bool IsThreadLocalFreeBitmapClean(); - // Set the alloc_bit_map_ bits for slots that are past the end of the run. - void SetAllocBitMapBitsForInvalidSlots(); + // Returns true if the bulk free list is empty. + bool IsBulkFreeListEmpty() const { + return bulk_free_list_.Size() == 0; + } + // Returns true if the thread local free list is empty. + bool IsThreadLocalFreeListEmpty() const { + return thread_local_free_list_.Size() == 0; + } // Zero the run's data. void ZeroData(); - // Zero the run's header. - void ZeroHeader(); - // Fill the alloc bitmap with 1s. - void FillAllocBitMap(); + // Zero the run's header and the slot headers. + void ZeroHeaderAndSlotHeaders(); // Iterate over all the slots and apply the given function. void InspectAllSlots(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg), void* arg); // Dump the run metadata for debugging. @@ -258,11 +454,24 @@ class RosAlloc { REQUIRES(Locks::thread_list_lock_); private: - // The common part of MarkFreeBitMap() and MarkThreadLocalFreeBitMap(). Returns the bracket + // The common part of AddToBulkFreeList() and AddToThreadLocalFreeList(). Returns the bracket // size. - size_t MarkFreeBitMapShared(void* ptr, uint32_t* free_bit_map_base, const char* caller_name); - // Turns the bit map into a string for debugging. - static std::string BitMapToStr(uint32_t* bit_map_base, size_t num_vec); + size_t AddToFreeListShared(void* ptr, SlotFreeList<true>* free_list, const char* caller_name); + // Turns a FreeList into a string for debugging. + template<bool kUseTail> + std::string FreeListToStr(SlotFreeList<kUseTail>* free_list); + // Check a given pointer is a valid slot address and return it as Slot*. + Slot* ToSlot(void* ptr) { + const uint8_t idx = size_bracket_idx_; + const size_t bracket_size = bracketSizes[idx]; + const size_t offset_from_slot_base = reinterpret_cast<uint8_t*>(ptr) + - reinterpret_cast<uint8_t*>(FirstSlot()); + DCHECK_EQ(offset_from_slot_base % bracket_size, static_cast<size_t>(0)); + size_t slot_idx = offset_from_slot_base / bracket_size; + DCHECK_LT(slot_idx, numOfSlots[idx]); + return reinterpret_cast<Slot*>(ptr); + } + size_t SlotIndex(Slot* slot); // TODO: DISALLOW_COPY_AND_ASSIGN(Run); }; @@ -283,10 +492,6 @@ class RosAlloc { static size_t numOfSlots[kNumOfSizeBrackets]; // The header sizes in bytes of the runs for each size bracket. static size_t headerSizes[kNumOfSizeBrackets]; - // The byte offsets of the bulk free bit maps of the runs for each size bracket. - static size_t bulkFreeBitMapOffsets[kNumOfSizeBrackets]; - // The byte offsets of the thread-local free bit maps of the runs for each size bracket. - static size_t threadLocalFreeBitMapOffsets[kNumOfSizeBrackets]; // Initialize the run specs (the above arrays). static void Initialize(); @@ -493,7 +698,7 @@ class RosAlloc { // The reader-writer lock to allow one bulk free at a time while // allowing multiple individual frees at the same time. Also, this // is used to avoid race conditions between BulkFree() and - // RevokeThreadLocalRuns() on the bulk free bitmaps. + // RevokeThreadLocalRuns() on the bulk free list. ReaderWriterMutex bulk_free_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; // The page release mode. diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc index 468179c9d5..0a7a69f37e 100644 --- a/runtime/gc/collector/concurrent_copying.cc +++ b/runtime/gc/collector/concurrent_copying.cc @@ -620,7 +620,10 @@ void ConcurrentCopying::PushOntoMarkStack(mirror::Object* to_ref) { gc_mark_stack_->PushBack(to_ref); } else { CHECK_EQ(static_cast<uint32_t>(mark_stack_mode), - static_cast<uint32_t>(kMarkStackModeGcExclusive)); + static_cast<uint32_t>(kMarkStackModeGcExclusive)) + << "ref=" << to_ref + << " self->gc_marking=" << self->GetIsGcMarking() + << " cc->is_marking=" << is_marking_; CHECK(self == thread_running_gc_) << "Only GC-running thread should access the mark stack " << "in the GC exclusive mark stack mode"; diff --git a/runtime/gc/collector_type.h b/runtime/gc/collector_type.h index 95ba380a01..416510d73b 100644 --- a/runtime/gc/collector_type.h +++ b/runtime/gc/collector_type.h @@ -40,6 +40,8 @@ enum CollectorType { kCollectorTypeHeapTrim, // A (mostly) concurrent copying collector. kCollectorTypeCC, + // Instrumentation critical section fake collector. + kCollectorTypeInstrumentation, // A homogeneous space compaction collector used in background transition // when both foreground and background collector are CMS. kCollectorTypeHomogeneousSpaceCompact, diff --git a/runtime/gc/gc_cause.cc b/runtime/gc/gc_cause.cc index 6be683df48..84243dfe1e 100644 --- a/runtime/gc/gc_cause.cc +++ b/runtime/gc/gc_cause.cc @@ -33,6 +33,7 @@ const char* PrettyCause(GcCause cause) { case kGcCauseDisableMovingGc: return "DisableMovingGc"; case kGcCauseHomogeneousSpaceCompact: return "HomogeneousSpaceCompact"; case kGcCauseTrim: return "HeapTrim"; + case kGcCauseInstrumentation: return "Instrumentation"; default: LOG(FATAL) << "Unreachable"; UNREACHABLE(); diff --git a/runtime/gc/gc_cause.h b/runtime/gc/gc_cause.h index 0536f32df9..34c776622a 100644 --- a/runtime/gc/gc_cause.h +++ b/runtime/gc/gc_cause.h @@ -39,6 +39,8 @@ enum GcCause { kGcCauseDisableMovingGc, // Not a real GC cause, used when we trim the heap. kGcCauseTrim, + // Not a real GC cause, used to implement exclusion between GC and instrumentation. + kGcCauseInstrumentation, // GC triggered for background transition when both foreground and background collector are CMS. kGcCauseHomogeneousSpaceCompact, }; diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc index cfe77135b7..657fcb5f08 100644 --- a/runtime/gc/heap.cc +++ b/runtime/gc/heap.cc @@ -1312,6 +1312,13 @@ void Heap::TrimIndirectReferenceTables(Thread* self) { ATRACE_END(); } +void Heap::StartGC(Thread* self, GcCause cause, CollectorType collector_type) { + MutexLock mu(self, *gc_complete_lock_); + // Ensure there is only one GC at a time. + WaitForGcToCompleteLocked(cause, self); + collector_type_running_ = collector_type; +} + void Heap::TrimSpaces(Thread* self) { { // Need to do this before acquiring the locks since we don't want to get suspended while @@ -1319,10 +1326,7 @@ void Heap::TrimSpaces(Thread* self) { ScopedThreadStateChange tsc(self, kWaitingForGcToComplete); // Pretend we are doing a GC to prevent background compaction from deleting the space we are // trimming. - MutexLock mu(self, *gc_complete_lock_); - // Ensure there is only one GC at a time. - WaitForGcToCompleteLocked(kGcCauseTrim, self); - collector_type_running_ = kCollectorTypeHeapTrim; + StartGC(self, kGcCauseTrim, kCollectorTypeHeapTrim); } ATRACE_BEGIN(__FUNCTION__); const uint64_t start_ns = NanoTime(); @@ -1963,6 +1967,10 @@ HomogeneousSpaceCompactResult Heap::PerformHomogeneousSpaceCompact() { GrowForUtilization(semi_space_collector_); LogGC(kGcCauseHomogeneousSpaceCompact, collector); FinishGC(self, collector::kGcTypeFull); + { + ScopedObjectAccess soa(self); + soa.Vm()->UnloadNativeLibraries(); + } return HomogeneousSpaceCompactResult::kSuccess; } @@ -2104,6 +2112,10 @@ void Heap::TransitionCollector(CollectorType collector_type) { DCHECK(collector != nullptr); LogGC(kGcCauseCollectorTransition, collector); FinishGC(self, collector::kGcTypeFull); + { + ScopedObjectAccess soa(self); + soa.Vm()->UnloadNativeLibraries(); + } int32_t after_allocated = num_bytes_allocated_.LoadSequentiallyConsistent(); int32_t delta_allocated = before_allocated - after_allocated; std::string saved_str; @@ -2588,6 +2600,12 @@ collector::GcType Heap::CollectGarbageInternal(collector::GcType gc_type, FinishGC(self, gc_type); // Inform DDMS that a GC completed. Dbg::GcDidFinish(); + // Unload native libraries for class unloading. We do this after calling FinishGC to prevent + // deadlocks in case the JNI_OnUnload function does allocations. + { + ScopedObjectAccess soa(self); + soa.Vm()->UnloadNativeLibraries(); + } return gc_type; } diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h index d0d0be3826..cc48172f71 100644 --- a/runtime/gc/heap.h +++ b/runtime/gc/heap.h @@ -775,6 +775,8 @@ class Heap { REQUIRES(Locks::mutator_lock_); void LogGC(GcCause gc_cause, collector::GarbageCollector* collector); + void StartGC(Thread* self, GcCause cause, CollectorType collector_type) + REQUIRES(!*gc_complete_lock_); void FinishGC(Thread* self, collector::GcType gc_type) REQUIRES(!*gc_complete_lock_); // Create a mem map with a preferred base address. @@ -1325,6 +1327,7 @@ class Heap { friend class collector::MarkSweep; friend class collector::SemiSpace; friend class ReferenceQueue; + friend class ScopedGCCriticalSection; friend class VerifyReferenceCardVisitor; friend class VerifyReferenceVisitor; friend class VerifyObjectVisitor; diff --git a/runtime/gc/scoped_gc_critical_section.cc b/runtime/gc/scoped_gc_critical_section.cc new file mode 100644 index 0000000000..e7786a1546 --- /dev/null +++ b/runtime/gc/scoped_gc_critical_section.cc @@ -0,0 +1,41 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "scoped_gc_critical_section.h" + +#include "gc/collector_type.h" +#include "gc/heap.h" +#include "runtime.h" +#include "thread-inl.h" + +namespace art { +namespace gc { + +ScopedGCCriticalSection::ScopedGCCriticalSection(Thread* self, + GcCause cause, + CollectorType collector_type) + : self_(self) { + Runtime::Current()->GetHeap()->StartGC(self, cause, collector_type); + old_cause_ = self->StartAssertNoThreadSuspension("ScopedGCCriticalSection"); +} +ScopedGCCriticalSection::~ScopedGCCriticalSection() { + self_->EndAssertNoThreadSuspension(old_cause_); + Runtime::Current()->GetHeap()->FinishGC(self_, collector::kGcTypeNone); +} + +} // namespace gc +} // namespace art + diff --git a/runtime/gc/scoped_gc_critical_section.h b/runtime/gc/scoped_gc_critical_section.h new file mode 100644 index 0000000000..ec93bca802 --- /dev/null +++ b/runtime/gc/scoped_gc_critical_section.h @@ -0,0 +1,47 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_RUNTIME_GC_SCOPED_GC_CRITICAL_SECTION_H_ +#define ART_RUNTIME_GC_SCOPED_GC_CRITICAL_SECTION_H_ + +#include "base/mutex.h" +#include "collector_type.h" +#include "gc_cause.h" + +namespace art { + +class Thread; + +namespace gc { + +// Wait until the GC is finished and then prevent GC from starting until the destructor. Used +// to prevent deadlocks in places where we call ClassLinker::VisitClass with all th threads +// suspended. +class ScopedGCCriticalSection { + public: + ScopedGCCriticalSection(Thread* self, GcCause cause, CollectorType collector_type) + ACQUIRE(Roles::uninterruptible_); + ~ScopedGCCriticalSection() RELEASE(Roles::uninterruptible_); + + private: + Thread* const self_; + const char* old_cause_; +}; + +} // namespace gc +} // namespace art + +#endif // ART_RUNTIME_GC_SCOPED_GC_CRITICAL_SECTION_H_ diff --git a/runtime/globals.h b/runtime/globals.h index d70f3ab19b..987a94ea4b 100644 --- a/runtime/globals.h +++ b/runtime/globals.h @@ -58,12 +58,6 @@ static constexpr bool kIsTargetBuild = true; static constexpr bool kIsTargetBuild = false; #endif -#if defined(ART_USE_OPTIMIZING_COMPILER) -static constexpr bool kUseOptimizingCompiler = true; -#else -static constexpr bool kUseOptimizingCompiler = false; -#endif - // Garbage collector constants. static constexpr bool kMovingCollector = true; static constexpr bool kMarkCompactSupport = false && kMovingCollector; diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h index 80460562eb..612ca14cf5 100644 --- a/runtime/instrumentation.h +++ b/runtime/instrumentation.h @@ -146,9 +146,13 @@ class Instrumentation { // Deoptimization. void EnableDeoptimization() - REQUIRES(Locks::mutator_lock_, !deoptimized_methods_lock_); + REQUIRES(Locks::mutator_lock_) + REQUIRES(!deoptimized_methods_lock_); + // Calls UndeoptimizeEverything which may visit class linker classes through ConfigureStubs. void DisableDeoptimization(const char* key) - REQUIRES(Locks::mutator_lock_, !deoptimized_methods_lock_); + REQUIRES(Locks::mutator_lock_, Roles::uninterruptible_) + REQUIRES(!deoptimized_methods_lock_); + bool AreAllMethodsDeoptimized() const { return interpreter_stubs_installed_; } @@ -156,12 +160,17 @@ class Instrumentation { // Executes everything with interpreter. void DeoptimizeEverything(const char* key) - REQUIRES(Locks::mutator_lock_, !Locks::thread_list_lock_, !Locks::classlinker_classes_lock_, + REQUIRES(Locks::mutator_lock_, Roles::uninterruptible_) + REQUIRES(!Locks::thread_list_lock_, + !Locks::classlinker_classes_lock_, !deoptimized_methods_lock_); - // Executes everything with compiled code (or interpreter if there is no code). + // Executes everything with compiled code (or interpreter if there is no code). May visit class + // linker classes through ConfigureStubs. void UndeoptimizeEverything(const char* key) - REQUIRES(Locks::mutator_lock_, !Locks::thread_list_lock_, !Locks::classlinker_classes_lock_, + REQUIRES(Locks::mutator_lock_, Roles::uninterruptible_) + REQUIRES(!Locks::thread_list_lock_, + !Locks::classlinker_classes_lock_, !deoptimized_methods_lock_); // Deoptimize a method by forcing its execution with the interpreter. Nevertheless, a static @@ -183,12 +192,16 @@ class Instrumentation { // Enable method tracing by installing instrumentation entry/exit stubs or interpreter. void EnableMethodTracing(const char* key, bool needs_interpreter = kDeoptimizeForAccurateMethodEntryExitListeners) - REQUIRES(Locks::mutator_lock_, !Locks::thread_list_lock_, !Locks::classlinker_classes_lock_, + REQUIRES(Locks::mutator_lock_, Roles::uninterruptible_) + REQUIRES(!Locks::thread_list_lock_, + !Locks::classlinker_classes_lock_, !deoptimized_methods_lock_); // Disable method tracing by uninstalling instrumentation entry/exit stubs or interpreter. void DisableMethodTracing(const char* key) - REQUIRES(Locks::mutator_lock_, !Locks::thread_list_lock_, !Locks::classlinker_classes_lock_, + REQUIRES(Locks::mutator_lock_, Roles::uninterruptible_) + REQUIRES(!Locks::thread_list_lock_, + !Locks::classlinker_classes_lock_, !deoptimized_methods_lock_); InterpreterHandlerTable GetInterpreterHandlerTable() const @@ -393,7 +406,9 @@ class Instrumentation { // instrumentation level it needs. Therefore the current instrumentation level // becomes the highest instrumentation level required by a client. void ConfigureStubs(const char* key, InstrumentationLevel desired_instrumentation_level) - REQUIRES(Locks::mutator_lock_, !deoptimized_methods_lock_, !Locks::thread_list_lock_, + REQUIRES(Locks::mutator_lock_, Roles::uninterruptible_) + REQUIRES(!deoptimized_methods_lock_, + !Locks::thread_list_lock_, !Locks::classlinker_classes_lock_); void UpdateInterpreterHandlerTable() REQUIRES(Locks::mutator_lock_) { diff --git a/runtime/instrumentation_test.cc b/runtime/instrumentation_test.cc index d98d246914..e4688a21dd 100644 --- a/runtime/instrumentation_test.cc +++ b/runtime/instrumentation_test.cc @@ -20,6 +20,7 @@ #include "common_throws.h" #include "class_linker-inl.h" #include "dex_file.h" +#include "gc/scoped_gc_critical_section.h" #include "handle_scope-inl.h" #include "jvalue.h" #include "runtime.h" @@ -151,6 +152,9 @@ class InstrumentationTest : public CommonRuntimeTest { ScopedObjectAccess soa(Thread::Current()); instrumentation::Instrumentation* instr = Runtime::Current()->GetInstrumentation(); ScopedThreadSuspension sts(soa.Self(), kSuspended); + gc::ScopedGCCriticalSection gcs(soa.Self(), + gc::kGcCauseInstrumentation, + gc::kCollectorTypeInstrumentation); ScopedSuspendAll ssa("Instrumentation::ConfigureStubs"); instr->ConfigureStubs(key, level); } @@ -203,6 +207,9 @@ class InstrumentationTest : public CommonRuntimeTest { Runtime* runtime = Runtime::Current(); instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation(); ScopedThreadSuspension sts(self, kSuspended); + gc::ScopedGCCriticalSection gcs(self, + gc::kGcCauseInstrumentation, + gc::kCollectorTypeInstrumentation); ScopedSuspendAll ssa("Single method deoptimization"); if (enable_deoptimization) { instrumentation->EnableDeoptimization(); @@ -216,6 +223,9 @@ class InstrumentationTest : public CommonRuntimeTest { Runtime* runtime = Runtime::Current(); instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation(); ScopedThreadSuspension sts(self, kSuspended); + gc::ScopedGCCriticalSection gcs(self, + gc::kGcCauseInstrumentation, + gc::kCollectorTypeInstrumentation); ScopedSuspendAll ssa("Single method undeoptimization"); instrumentation->Undeoptimize(method); if (disable_deoptimization) { @@ -228,6 +238,9 @@ class InstrumentationTest : public CommonRuntimeTest { Runtime* runtime = Runtime::Current(); instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation(); ScopedThreadSuspension sts(self, kSuspended); + gc::ScopedGCCriticalSection gcs(self, + gc::kGcCauseInstrumentation, + gc::kCollectorTypeInstrumentation); ScopedSuspendAll ssa("Full deoptimization"); if (enable_deoptimization) { instrumentation->EnableDeoptimization(); @@ -240,6 +253,9 @@ class InstrumentationTest : public CommonRuntimeTest { Runtime* runtime = Runtime::Current(); instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation(); ScopedThreadSuspension sts(self, kSuspended); + gc::ScopedGCCriticalSection gcs(self, + gc::kGcCauseInstrumentation, + gc::kCollectorTypeInstrumentation); ScopedSuspendAll ssa("Full undeoptimization"); instrumentation->UndeoptimizeEverything(key); if (disable_deoptimization) { @@ -252,6 +268,9 @@ class InstrumentationTest : public CommonRuntimeTest { Runtime* runtime = Runtime::Current(); instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation(); ScopedThreadSuspension sts(self, kSuspended); + gc::ScopedGCCriticalSection gcs(self, + gc::kGcCauseInstrumentation, + gc::kCollectorTypeInstrumentation); ScopedSuspendAll ssa("EnableMethodTracing"); instrumentation->EnableMethodTracing(key, needs_interpreter); } @@ -261,6 +280,9 @@ class InstrumentationTest : public CommonRuntimeTest { Runtime* runtime = Runtime::Current(); instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation(); ScopedThreadSuspension sts(self, kSuspended); + gc::ScopedGCCriticalSection gcs(self, + gc::kGcCauseInstrumentation, + gc::kCollectorTypeInstrumentation); ScopedSuspendAll ssa("EnableMethodTracing"); instrumentation->DisableMethodTracing(key); } diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc index 3ac80c6642..f783b04b95 100644 --- a/runtime/interpreter/interpreter.cc +++ b/runtime/interpreter/interpreter.cc @@ -21,6 +21,7 @@ #include "mirror/string-inl.h" #include "scoped_thread_state_change.h" #include "ScopedLocalRef.h" +#include "stack.h" #include "unstarted_runtime.h" namespace art { @@ -330,8 +331,9 @@ void EnterInterpreterFromInvoke(Thread* self, ArtMethod* method, Object* receive } // Set up shadow frame with matching number of reference slots to vregs. ShadowFrame* last_shadow_frame = self->GetManagedStack()->GetTopShadowFrame(); - void* memory = alloca(ShadowFrame::ComputeSize(num_regs)); - ShadowFrame* shadow_frame(ShadowFrame::Create(num_regs, last_shadow_frame, method, 0, memory)); + ShadowFrameAllocaUniquePtr shadow_frame_unique_ptr = + CREATE_SHADOW_FRAME(num_regs, last_shadow_frame, method, 0); + ShadowFrame* shadow_frame = shadow_frame_unique_ptr.get(); self->PushShadowFrame(shadow_frame); size_t cur_reg = num_regs - num_ins; diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc index 5fbd687452..ad34c9ad9e 100644 --- a/runtime/interpreter/interpreter_common.cc +++ b/runtime/interpreter/interpreter_common.cc @@ -21,12 +21,16 @@ #include "debugger.h" #include "entrypoints/runtime_asm_entrypoints.h" #include "mirror/array-inl.h" +#include "stack.h" #include "unstarted_runtime.h" #include "verifier/method_verifier.h" namespace art { namespace interpreter { +// All lambda closures have to be a consecutive pair of virtual registers. +static constexpr size_t kLambdaVirtualRegisterWidth = 2; + void ThrowNullPointerExceptionFromInterpreter() { ThrowNullPointerExceptionFromDexPC(); } @@ -483,13 +487,16 @@ void AbortTransactionV(Thread* self, const char* fmt, va_list args) { } // Separate declaration is required solely for the attributes. -template<bool is_range, bool do_assignability_check> SHARED_REQUIRES(Locks::mutator_lock_) +template <bool is_range, + bool do_assignability_check, + size_t kVarArgMax> + SHARED_REQUIRES(Locks::mutator_lock_) static inline bool DoCallCommon(ArtMethod* called_method, Thread* self, ShadowFrame& shadow_frame, JValue* result, uint16_t number_of_inputs, - uint32_t arg[Instruction::kMaxVarArgRegs], + uint32_t (&arg)[kVarArgMax], uint32_t vregC) ALWAYS_INLINE; SHARED_REQUIRES(Locks::mutator_lock_) @@ -509,13 +516,15 @@ static inline bool NeedsInterpreter(Thread* self, ShadowFrame* new_shadow_frame) Dbg::IsForcedInterpreterNeededForCalling(self, target); } -template<bool is_range, bool do_assignability_check> +template <bool is_range, + bool do_assignability_check, + size_t kVarArgMax> static inline bool DoCallCommon(ArtMethod* called_method, Thread* self, ShadowFrame& shadow_frame, JValue* result, uint16_t number_of_inputs, - uint32_t arg[Instruction::kMaxVarArgRegs], + uint32_t (&arg)[kVarArgMax], uint32_t vregC) { bool string_init = false; // Replace calls to String.<init> with equivalent StringFactory call. @@ -560,10 +569,10 @@ static inline bool DoCallCommon(ArtMethod* called_method, number_of_inputs--; // Rewrite the var-args, dropping the 0th argument ("this") - for (uint32_t i = 1; i < Instruction::kMaxVarArgRegs; ++i) { + for (uint32_t i = 1; i < arraysize(arg); ++i) { arg[i - 1] = arg[i]; } - arg[Instruction::kMaxVarArgRegs - 1] = 0; + arg[arraysize(arg) - 1] = 0; // Rewrite the non-var-arg case vregC++; // Skips the 0th vreg in the range ("this"). @@ -576,9 +585,9 @@ static inline bool DoCallCommon(ArtMethod* called_method, // Allocate shadow frame on the stack. const char* old_cause = self->StartAssertNoThreadSuspension("DoCallCommon"); - void* memory = alloca(ShadowFrame::ComputeSize(num_regs)); - ShadowFrame* new_shadow_frame(ShadowFrame::Create(num_regs, &shadow_frame, called_method, 0, - memory)); + ShadowFrameAllocaUniquePtr shadow_frame_unique_ptr = + CREATE_SHADOW_FRAME(num_regs, &shadow_frame, called_method, 0); + ShadowFrame* new_shadow_frame = shadow_frame_unique_ptr.get(); // Initialize new shadow frame by copying the registers from the callee shadow frame. if (do_assignability_check) { @@ -669,7 +678,7 @@ static inline bool DoCallCommon(ArtMethod* called_method, AssignRegister(new_shadow_frame, shadow_frame, dest_reg, src_reg); } } else { - DCHECK_LE(number_of_inputs, Instruction::kMaxVarArgRegs); + DCHECK_LE(number_of_inputs, arraysize(arg)); for (; arg_index < number_of_inputs; ++arg_index) { AssignRegister(new_shadow_frame, shadow_frame, first_dest_reg + arg_index, arg[arg_index]); @@ -736,12 +745,13 @@ bool DoLambdaCall(ArtMethod* called_method, Thread* self, ShadowFrame& shadow_fr const Instruction* inst, uint16_t inst_data, JValue* result) { const uint4_t num_additional_registers = inst->VRegB_25x(); // Argument word count. - const uint16_t number_of_inputs = num_additional_registers + 1; - // The first input register is always present and is not encoded in the count. + const uint16_t number_of_inputs = num_additional_registers + kLambdaVirtualRegisterWidth; + // The lambda closure register is always present and is not encoded in the count. + // Furthermore, the lambda closure register is always wide, so it counts as 2 inputs. // TODO: find a cleaner way to separate non-range and range information without duplicating // code. - uint32_t arg[Instruction::kMaxVarArgRegs]; // only used in invoke-XXX. + uint32_t arg[Instruction::kMaxVarArgRegs25x]; // only used in invoke-XXX. uint32_t vregC = 0; // only used in invoke-XXX-range. if (is_range) { vregC = inst->VRegC_3rc(); @@ -767,7 +777,7 @@ bool DoCall(ArtMethod* called_method, Thread* self, ShadowFrame& shadow_frame, // TODO: find a cleaner way to separate non-range and range information without duplicating // code. - uint32_t arg[Instruction::kMaxVarArgRegs]; // only used in invoke-XXX. + uint32_t arg[Instruction::kMaxVarArgRegs] = {}; // only used in invoke-XXX. uint32_t vregC = 0; if (is_range) { vregC = inst->VRegC_3rc(); diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h index 7398778d15..f57bddbb4f 100644 --- a/runtime/interpreter/interpreter_common.h +++ b/runtime/interpreter/interpreter_common.h @@ -34,7 +34,12 @@ #include "dex_instruction-inl.h" #include "entrypoints/entrypoint_utils-inl.h" #include "handle_scope-inl.h" +#include "lambda/art_lambda_method.h" #include "lambda/box_table.h" +#include "lambda/closure.h" +#include "lambda/closure_builder-inl.h" +#include "lambda/leaking_allocator.h" +#include "lambda/shorty_field_type.h" #include "mirror/class-inl.h" #include "mirror/method.h" #include "mirror/object-inl.h" @@ -133,32 +138,44 @@ static inline bool IsValidLambdaTargetOrThrow(ArtMethod* called_method) return success; } -// Write out the 'ArtMethod*' into vreg and vreg+1 +// Write out the 'Closure*' into vreg and vreg+1, as if it was a jlong. static inline void WriteLambdaClosureIntoVRegs(ShadowFrame& shadow_frame, - const ArtMethod& called_method, + const lambda::Closure* lambda_closure, uint32_t vreg) { // Split the method into a lo and hi 32 bits so we can encode them into 2 virtual registers. - uint32_t called_method_lo = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(&called_method)); - uint32_t called_method_hi = static_cast<uint32_t>(reinterpret_cast<uint64_t>(&called_method) + uint32_t closure_lo = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(lambda_closure)); + uint32_t closure_hi = static_cast<uint32_t>(reinterpret_cast<uint64_t>(lambda_closure) >> BitSizeOf<uint32_t>()); // Use uint64_t instead of uintptr_t to allow shifting past the max on 32-bit. static_assert(sizeof(uint64_t) >= sizeof(uintptr_t), "Impossible"); - DCHECK_NE(called_method_lo | called_method_hi, 0u); + DCHECK_NE(closure_lo | closure_hi, 0u); - shadow_frame.SetVReg(vreg, called_method_lo); - shadow_frame.SetVReg(vreg + 1, called_method_hi); + shadow_frame.SetVReg(vreg, closure_lo); + shadow_frame.SetVReg(vreg + 1, closure_hi); } // Handles create-lambda instructions. // Returns true on success, otherwise throws an exception and returns false. // (Exceptions are thrown by creating a new exception and then being put in the thread TLS) // +// The closure must be allocated big enough to hold the data, and should not be +// pre-initialized. It is initialized with the actual captured variables as a side-effect, +// although this should be unimportant to the caller since this function also handles storing it to +// the ShadowFrame. +// // As a work-in-progress implementation, this shoves the ArtMethod object corresponding // to the target dex method index into the target register vA and vA + 1. template<bool do_access_check> -static inline bool DoCreateLambda(Thread* self, ShadowFrame& shadow_frame, - const Instruction* inst) { +static inline bool DoCreateLambda(Thread* self, + const Instruction* inst, + /*inout*/ShadowFrame& shadow_frame, + /*inout*/lambda::ClosureBuilder* closure_builder, + /*inout*/lambda::Closure* uninitialized_closure) { + DCHECK(closure_builder != nullptr); + DCHECK(uninitialized_closure != nullptr); + DCHECK_ALIGNED(uninitialized_closure, alignof(lambda::Closure)); + /* * create-lambda is opcode 0x21c * - vA is the target register where the closure will be stored into @@ -171,16 +188,69 @@ static inline bool DoCreateLambda(Thread* self, ShadowFrame& shadow_frame, ArtMethod* const called_method = FindMethodFromCode<kStatic, do_access_check>( method_idx, &receiver, sf_method, self); - uint32_t vregA = inst->VRegA_21c(); + uint32_t vreg_dest_closure = inst->VRegA_21c(); if (UNLIKELY(!IsValidLambdaTargetOrThrow(called_method))) { CHECK(self->IsExceptionPending()); - shadow_frame.SetVReg(vregA, 0u); - shadow_frame.SetVReg(vregA + 1, 0u); + shadow_frame.SetVReg(vreg_dest_closure, 0u); + shadow_frame.SetVReg(vreg_dest_closure + 1, 0u); return false; } - WriteLambdaClosureIntoVRegs(shadow_frame, *called_method, vregA); + lambda::ArtLambdaMethod* initialized_lambda_method; + // Initialize the ArtLambdaMethod with the right data. + { + lambda::ArtLambdaMethod* uninitialized_lambda_method = + reinterpret_cast<lambda::ArtLambdaMethod*>( + lambda::LeakingAllocator::AllocateMemory(self, sizeof(lambda::ArtLambdaMethod))); + + std::string captured_variables_shorty = closure_builder->GetCapturedVariableShortyTypes(); + std::string captured_variables_long_type_desc; + + // Synthesize a long type descriptor from the short one. + for (char shorty : captured_variables_shorty) { + lambda::ShortyFieldType shorty_field_type(shorty); + if (shorty_field_type.IsObject()) { + // Not the true type, but good enough until we implement verifier support. + captured_variables_long_type_desc += "Ljava/lang/Object;"; + UNIMPLEMENTED(FATAL) << "create-lambda with an object captured variable"; + } else if (shorty_field_type.IsLambda()) { + // Not the true type, but good enough until we implement verifier support. + captured_variables_long_type_desc += "Ljava/lang/Runnable;"; + UNIMPLEMENTED(FATAL) << "create-lambda with a lambda captured variable"; + } else { + // The primitive types have the same length shorty or not, so this is always correct. + DCHECK(shorty_field_type.IsPrimitive()); + captured_variables_long_type_desc += shorty_field_type; + } + } + + // Copy strings to dynamically allocated storage. This leaks, but that's ok. Fix it later. + // TODO: Strings need to come from the DexFile, so they won't need their own allocations. + char* captured_variables_type_desc = lambda::LeakingAllocator::MakeFlexibleInstance<char>( + self, + captured_variables_long_type_desc.size() + 1); + strcpy(captured_variables_type_desc, captured_variables_long_type_desc.c_str()); + char* captured_variables_shorty_copy = lambda::LeakingAllocator::MakeFlexibleInstance<char>( + self, + captured_variables_shorty.size() + 1); + strcpy(captured_variables_shorty_copy, captured_variables_shorty.c_str()); + + new (uninitialized_lambda_method) lambda::ArtLambdaMethod(called_method, + captured_variables_type_desc, + captured_variables_shorty_copy, + true); // innate lambda + initialized_lambda_method = uninitialized_lambda_method; + } + + // Write all the closure captured variables and the closure header into the closure. + lambda::Closure* initialized_closure; + { + initialized_closure = + closure_builder->CreateInPlace(uninitialized_closure, initialized_lambda_method); + } + + WriteLambdaClosureIntoVRegs(/*inout*/shadow_frame, initialized_closure, vreg_dest_closure); return true; } @@ -189,13 +259,11 @@ static inline bool DoCreateLambda(Thread* self, ShadowFrame& shadow_frame, // Validates that the art method points to a valid lambda function, otherwise throws // an exception and returns null. // (Exceptions are thrown by creating a new exception and then being put in the thread TLS) -static inline ArtMethod* ReadLambdaClosureFromVRegsOrThrow(ShadowFrame& shadow_frame, - uint32_t vreg) +static inline lambda::Closure* ReadLambdaClosureFromVRegsOrThrow(ShadowFrame& shadow_frame, + uint32_t vreg) SHARED_REQUIRES(Locks::mutator_lock_) { - // TODO(iam): Introduce a closure abstraction that will contain the captured variables - // instead of just an ArtMethod. - // This is temporarily using 2 vregs because a native ArtMethod can be up to 64-bit, - // but once proper variable capture is implemented it will only use 1 vreg. + // Lambda closures take up a consecutive pair of 2 virtual registers. + // On 32-bit the high bits are always 0. uint32_t vc_value_lo = shadow_frame.GetVReg(vreg); uint32_t vc_value_hi = shadow_frame.GetVReg(vreg + 1); @@ -204,17 +272,285 @@ static inline ArtMethod* ReadLambdaClosureFromVRegsOrThrow(ShadowFrame& shadow_f // Use uint64_t instead of uintptr_t to allow left-shifting past the max on 32-bit. static_assert(sizeof(uint64_t) >= sizeof(uintptr_t), "Impossible"); - ArtMethod* const called_method = reinterpret_cast<ArtMethod* const>(vc_value_ptr); + lambda::Closure* const lambda_closure = reinterpret_cast<lambda::Closure*>(vc_value_ptr); + DCHECK_ALIGNED(lambda_closure, alignof(lambda::Closure)); // Guard against the user passing a null closure, which is odd but (sadly) semantically valid. - if (UNLIKELY(called_method == nullptr)) { + if (UNLIKELY(lambda_closure == nullptr)) { ThrowNullPointerExceptionFromInterpreter(); return nullptr; - } else if (UNLIKELY(!IsValidLambdaTargetOrThrow(called_method))) { + } else if (UNLIKELY(!IsValidLambdaTargetOrThrow(lambda_closure->GetTargetMethod()))) { + // Sanity check against data corruption. return nullptr; } - return called_method; + return lambda_closure; +} + +// Forward declaration for lock annotations. See below for documentation. +template <bool do_access_check> +static inline const char* GetStringDataByDexStringIndexOrThrow(ShadowFrame& shadow_frame, + uint32_t string_idx) + SHARED_REQUIRES(Locks::mutator_lock_); + +// Find the c-string data corresponding to a dex file's string index. +// Otherwise, returns null if not found and throws a VerifyError. +// +// Note that with do_access_check=false, we never return null because the verifier +// must guard against invalid string indices. +// (Exceptions are thrown by creating a new exception and then being put in the thread TLS) +template <bool do_access_check> +static inline const char* GetStringDataByDexStringIndexOrThrow(ShadowFrame& shadow_frame, + uint32_t string_idx) { + ArtMethod* method = shadow_frame.GetMethod(); + const DexFile* dex_file = method->GetDexFile(); + + mirror::Class* declaring_class = method->GetDeclaringClass(); + if (!do_access_check) { + // MethodVerifier refuses methods with string_idx out of bounds. + DCHECK_LT(string_idx, declaring_class->GetDexCache()->NumStrings()); + } else { + // Access checks enabled: perform string index bounds ourselves. + if (string_idx >= dex_file->GetHeader().string_ids_size_) { + ThrowVerifyError(declaring_class, "String index '%" PRIu32 "' out of bounds", + string_idx); + return nullptr; + } + } + + const char* type_string = dex_file->StringDataByIdx(string_idx); + + if (UNLIKELY(type_string == nullptr)) { + CHECK_EQ(false, do_access_check) + << " verifier should've caught invalid string index " << string_idx; + CHECK_EQ(true, do_access_check) + << " string idx size check should've caught invalid string index " << string_idx; + } + + return type_string; +} + +// Handles capture-variable instructions. +// Returns true on success, otherwise throws an exception and returns false. +// (Exceptions are thrown by creating a new exception and then being put in the thread TLS) +template<bool do_access_check> +static inline bool DoCaptureVariable(Thread* self, + const Instruction* inst, + /*inout*/ShadowFrame& shadow_frame, + /*inout*/lambda::ClosureBuilder* closure_builder) { + DCHECK(closure_builder != nullptr); + using lambda::ShortyFieldType; + /* + * capture-variable is opcode 0xf6, fmt 0x21c + * - vA is the source register of the variable that will be captured + * - vB is the string ID of the variable's type that will be captured + */ + const uint32_t source_vreg = inst->VRegA_21c(); + const uint32_t string_idx = inst->VRegB_21c(); + // TODO: this should be a proper [type id] instead of a [string ID] pointing to a type. + + const char* type_string = GetStringDataByDexStringIndexOrThrow<do_access_check>(shadow_frame, + string_idx); + if (UNLIKELY(type_string == nullptr)) { + CHECK(self->IsExceptionPending()); + return false; + } + + char type_first_letter = type_string[0]; + ShortyFieldType shorty_type; + if (do_access_check && + UNLIKELY(!ShortyFieldType::MaybeCreate(type_first_letter, /*out*/&shorty_type))) { // NOLINT: [whitespace/comma] [3] + ThrowVerifyError(shadow_frame.GetMethod()->GetDeclaringClass(), + "capture-variable vB must be a valid type"); + return false; + } else { + // Already verified that the type is valid. + shorty_type = ShortyFieldType(type_first_letter); + } + + const size_t captured_variable_count = closure_builder->GetCaptureCount(); + + // Note: types are specified explicitly so that the closure is packed tightly. + switch (shorty_type) { + case ShortyFieldType::kBoolean: { + uint32_t primitive_narrow_value = shadow_frame.GetVReg(source_vreg); + closure_builder->CaptureVariablePrimitive<bool>(primitive_narrow_value); + break; + } + case ShortyFieldType::kByte: { + uint32_t primitive_narrow_value = shadow_frame.GetVReg(source_vreg); + closure_builder->CaptureVariablePrimitive<int8_t>(primitive_narrow_value); + break; + } + case ShortyFieldType::kChar: { + uint32_t primitive_narrow_value = shadow_frame.GetVReg(source_vreg); + closure_builder->CaptureVariablePrimitive<uint16_t>(primitive_narrow_value); + break; + } + case ShortyFieldType::kShort: { + uint32_t primitive_narrow_value = shadow_frame.GetVReg(source_vreg); + closure_builder->CaptureVariablePrimitive<int16_t>(primitive_narrow_value); + break; + } + case ShortyFieldType::kInt: { + uint32_t primitive_narrow_value = shadow_frame.GetVReg(source_vreg); + closure_builder->CaptureVariablePrimitive<int32_t>(primitive_narrow_value); + break; + } + case ShortyFieldType::kDouble: { + closure_builder->CaptureVariablePrimitive(shadow_frame.GetVRegDouble(source_vreg)); + break; + } + case ShortyFieldType::kFloat: { + closure_builder->CaptureVariablePrimitive(shadow_frame.GetVRegFloat(source_vreg)); + break; + } + case ShortyFieldType::kLambda: { + UNIMPLEMENTED(FATAL) << " capture-variable with type kLambda"; + // TODO: Capturing lambdas recursively will be done at a later time. + UNREACHABLE(); + } + case ShortyFieldType::kLong: { + closure_builder->CaptureVariablePrimitive(shadow_frame.GetVRegLong(source_vreg)); + break; + } + case ShortyFieldType::kObject: { + closure_builder->CaptureVariableObject(shadow_frame.GetVRegReference(source_vreg)); + UNIMPLEMENTED(FATAL) << " capture-variable with type kObject"; + // TODO: finish implementing this. disabled for now since we can't track lambda refs for GC. + UNREACHABLE(); + } + + default: + LOG(FATAL) << "Invalid shorty type value " << shorty_type; + UNREACHABLE(); + } + + DCHECK_EQ(captured_variable_count + 1, closure_builder->GetCaptureCount()); + + return true; +} + +// Handles capture-variable instructions. +// Returns true on success, otherwise throws an exception and returns false. +// (Exceptions are thrown by creating a new exception and then being put in the thread TLS) +template<bool do_access_check> +static inline bool DoLiberateVariable(Thread* self, + const Instruction* inst, + size_t captured_variable_index, + /*inout*/ShadowFrame& shadow_frame) { + using lambda::ShortyFieldType; + /* + * liberate-variable is opcode 0xf7, fmt 0x22c + * - vA is the destination register + * - vB is the register with the lambda closure in it + * - vC is the string ID which needs to be a valid field type descriptor + */ + + const uint32_t dest_vreg = inst->VRegA_22c(); + const uint32_t closure_vreg = inst->VRegB_22c(); + const uint32_t string_idx = inst->VRegC_22c(); + // TODO: this should be a proper [type id] instead of a [string ID] pointing to a type. + + + // Synthesize a long type descriptor from a shorty type descriptor list. + // TODO: Fix the dex encoding to contain the long and short type descriptors. + const char* type_string = GetStringDataByDexStringIndexOrThrow<do_access_check>(shadow_frame, + string_idx); + if (UNLIKELY(do_access_check && type_string == nullptr)) { + CHECK(self->IsExceptionPending()); + shadow_frame.SetVReg(dest_vreg, 0); + return false; + } + + char type_first_letter = type_string[0]; + ShortyFieldType shorty_type; + if (do_access_check && + UNLIKELY(!ShortyFieldType::MaybeCreate(type_first_letter, /*out*/&shorty_type))) { // NOLINT: [whitespace/comma] [3] + ThrowVerifyError(shadow_frame.GetMethod()->GetDeclaringClass(), + "liberate-variable vC must be a valid type"); + shadow_frame.SetVReg(dest_vreg, 0); + return false; + } else { + // Already verified that the type is valid. + shorty_type = ShortyFieldType(type_first_letter); + } + + // Check for closure being null *after* the type check. + // This way we can access the type info in case we fail later, to know how many vregs to clear. + const lambda::Closure* lambda_closure = + ReadLambdaClosureFromVRegsOrThrow(/*inout*/shadow_frame, closure_vreg); + + // Failed lambda target runtime check, an exception was raised. + if (UNLIKELY(lambda_closure == nullptr)) { + CHECK(self->IsExceptionPending()); + + // Clear the destination vreg(s) to be safe. + shadow_frame.SetVReg(dest_vreg, 0); + if (shorty_type.IsPrimitiveWide() || shorty_type.IsLambda()) { + shadow_frame.SetVReg(dest_vreg + 1, 0); + } + return false; + } + + if (do_access_check && + UNLIKELY(captured_variable_index >= lambda_closure->GetNumberOfCapturedVariables())) { + ThrowVerifyError(shadow_frame.GetMethod()->GetDeclaringClass(), + "liberate-variable captured variable index %zu out of bounds", + lambda_closure->GetNumberOfCapturedVariables()); + // Clear the destination vreg(s) to be safe. + shadow_frame.SetVReg(dest_vreg, 0); + if (shorty_type.IsPrimitiveWide() || shorty_type.IsLambda()) { + shadow_frame.SetVReg(dest_vreg + 1, 0); + } + return false; + } + + // Verify that the runtime type of the captured-variable matches the requested dex type. + if (do_access_check) { + ShortyFieldType actual_type = lambda_closure->GetCapturedShortyType(captured_variable_index); + if (actual_type != shorty_type) { + ThrowVerifyError(shadow_frame.GetMethod()->GetDeclaringClass(), + "cannot liberate-variable of runtime type '%c' to dex type '%c'", + static_cast<char>(actual_type), + static_cast<char>(shorty_type)); + + shadow_frame.SetVReg(dest_vreg, 0); + if (shorty_type.IsPrimitiveWide() || shorty_type.IsLambda()) { + shadow_frame.SetVReg(dest_vreg + 1, 0); + } + return false; + } + + if (actual_type.IsLambda() || actual_type.IsObject()) { + UNIMPLEMENTED(FATAL) << "liberate-variable type checks needs to " + << "parse full type descriptor for objects and lambdas"; + } + } + + // Unpack the captured variable from the closure into the correct type, then save it to the vreg. + if (shorty_type.IsPrimitiveNarrow()) { + uint32_t primitive_narrow_value = + lambda_closure->GetCapturedPrimitiveNarrow(captured_variable_index); + shadow_frame.SetVReg(dest_vreg, primitive_narrow_value); + } else if (shorty_type.IsPrimitiveWide()) { + uint64_t primitive_wide_value = + lambda_closure->GetCapturedPrimitiveWide(captured_variable_index); + shadow_frame.SetVRegLong(dest_vreg, static_cast<int64_t>(primitive_wide_value)); + } else if (shorty_type.IsObject()) { + mirror::Object* unpacked_object = + lambda_closure->GetCapturedObject(captured_variable_index); + shadow_frame.SetVRegReference(dest_vreg, unpacked_object); + + UNIMPLEMENTED(FATAL) << "liberate-variable cannot unpack objects yet"; + } else if (shorty_type.IsLambda()) { + UNIMPLEMENTED(FATAL) << "liberate-variable cannot unpack lambdas yet"; + } else { + LOG(FATAL) << "unreachable"; + UNREACHABLE(); + } + + return true; } template<bool do_access_check> @@ -229,22 +565,24 @@ static inline bool DoInvokeLambda(Thread* self, ShadowFrame& shadow_frame, const * * - reading var-args for 0x25 gets us vD,vE,vF,vG (but not vB) */ - uint32_t vC = inst->VRegC_25x(); - ArtMethod* const called_method = ReadLambdaClosureFromVRegsOrThrow(shadow_frame, vC); + uint32_t vreg_closure = inst->VRegC_25x(); + const lambda::Closure* lambda_closure = + ReadLambdaClosureFromVRegsOrThrow(shadow_frame, vreg_closure); // Failed lambda target runtime check, an exception was raised. - if (UNLIKELY(called_method == nullptr)) { + if (UNLIKELY(lambda_closure == nullptr)) { CHECK(self->IsExceptionPending()); result->SetJ(0); return false; } + ArtMethod* const called_method = lambda_closure->GetTargetMethod(); // Invoke a non-range lambda return DoLambdaCall<false, do_access_check>(called_method, self, shadow_frame, inst, inst_data, result); } -// Handles invoke-XXX/range instructions. +// Handles invoke-XXX/range instructions (other than invoke-lambda[-range]). // Returns true on success, otherwise throws an exception and returns false. template<InvokeType type, bool is_range, bool do_access_check> static inline bool DoInvoke(Thread* self, ShadowFrame& shadow_frame, const Instruction* inst, @@ -521,17 +859,17 @@ static inline bool DoBoxLambda(Thread* self, ShadowFrame& shadow_frame, const In uint32_t vreg_target_object = inst->VRegA_22x(inst_data); uint32_t vreg_source_closure = inst->VRegB_22x(); - ArtMethod* closure_method = ReadLambdaClosureFromVRegsOrThrow(shadow_frame, - vreg_source_closure); + lambda::Closure* lambda_closure = ReadLambdaClosureFromVRegsOrThrow(shadow_frame, + vreg_source_closure); // Failed lambda target runtime check, an exception was raised. - if (UNLIKELY(closure_method == nullptr)) { + if (UNLIKELY(lambda_closure == nullptr)) { CHECK(self->IsExceptionPending()); return false; } mirror::Object* closure_as_object = - Runtime::Current()->GetLambdaBoxTable()->BoxLambda(closure_method); + Runtime::Current()->GetLambdaBoxTable()->BoxLambda(lambda_closure); // Failed to box the lambda, an exception was raised. if (UNLIKELY(closure_as_object == nullptr)) { @@ -564,16 +902,16 @@ static inline bool DoUnboxLambda(Thread* self, return false; } - ArtMethod* unboxed_closure = nullptr; + lambda::Closure* unboxed_closure = nullptr; // Raise an exception if unboxing fails. if (!Runtime::Current()->GetLambdaBoxTable()->UnboxLambda(boxed_closure_object, - &unboxed_closure)) { + /*out*/&unboxed_closure)) { CHECK(self->IsExceptionPending()); return false; } DCHECK(unboxed_closure != nullptr); - WriteLambdaClosureIntoVRegs(shadow_frame, *unboxed_closure, vreg_target_closure); + WriteLambdaClosureIntoVRegs(/*inout*/shadow_frame, unboxed_closure, vreg_target_closure); return true; } @@ -650,10 +988,13 @@ EXPLICIT_DO_INVOKE_VIRTUAL_QUICK_TEMPLATE_DECL(true); // invoke-virtual-quick- #undef EXPLICIT_INSTANTIATION_DO_INVOKE_VIRTUAL_QUICK // Explicitly instantiate all DoCreateLambda functions. -#define EXPLICIT_DO_CREATE_LAMBDA_DECL(_do_check) \ -template SHARED_REQUIRES(Locks::mutator_lock_) \ -bool DoCreateLambda<_do_check>(Thread* self, ShadowFrame& shadow_frame, \ - const Instruction* inst) +#define EXPLICIT_DO_CREATE_LAMBDA_DECL(_do_check) \ +template SHARED_REQUIRES(Locks::mutator_lock_) \ +bool DoCreateLambda<_do_check>(Thread* self, \ + const Instruction* inst, \ + /*inout*/ShadowFrame& shadow_frame, \ + /*inout*/lambda::ClosureBuilder* closure_builder, \ + /*inout*/lambda::Closure* uninitialized_closure); EXPLICIT_DO_CREATE_LAMBDA_DECL(false); // create-lambda EXPLICIT_DO_CREATE_LAMBDA_DECL(true); // create-lambda @@ -689,7 +1030,29 @@ EXPLICIT_DO_UNBOX_LAMBDA_DECL(false); // unbox-lambda EXPLICIT_DO_UNBOX_LAMBDA_DECL(true); // unbox-lambda #undef EXPLICIT_DO_BOX_LAMBDA_DECL +// Explicitly instantiate all DoCaptureVariable functions. +#define EXPLICIT_DO_CAPTURE_VARIABLE_DECL(_do_check) \ +template SHARED_REQUIRES(Locks::mutator_lock_) \ +bool DoCaptureVariable<_do_check>(Thread* self, \ + const Instruction* inst, \ + ShadowFrame& shadow_frame, \ + lambda::ClosureBuilder* closure_builder); + +EXPLICIT_DO_CAPTURE_VARIABLE_DECL(false); // capture-variable +EXPLICIT_DO_CAPTURE_VARIABLE_DECL(true); // capture-variable +#undef EXPLICIT_DO_CREATE_LAMBDA_DECL +// Explicitly instantiate all DoLiberateVariable functions. +#define EXPLICIT_DO_LIBERATE_VARIABLE_DECL(_do_check) \ +template SHARED_REQUIRES(Locks::mutator_lock_) \ +bool DoLiberateVariable<_do_check>(Thread* self, \ + const Instruction* inst, \ + size_t captured_variable_index, \ + ShadowFrame& shadow_frame); \ + +EXPLICIT_DO_LIBERATE_VARIABLE_DECL(false); // liberate-variable +EXPLICIT_DO_LIBERATE_VARIABLE_DECL(true); // liberate-variable +#undef EXPLICIT_DO_LIBERATE_LAMBDA_DECL } // namespace interpreter } // namespace art diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc index 72e2ba0e7b..9677d79de3 100644 --- a/runtime/interpreter/interpreter_goto_table_impl.cc +++ b/runtime/interpreter/interpreter_goto_table_impl.cc @@ -17,9 +17,13 @@ #if !defined(__clang__) // Clang 3.4 fails to build the goto interpreter implementation. + +#include "base/stl_util.h" // MakeUnique #include "interpreter_common.h" #include "safe_math.h" +#include <memory> // std::unique_ptr + namespace art { namespace interpreter { @@ -179,6 +183,9 @@ JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item, ShadowF } } + std::unique_ptr<lambda::ClosureBuilder> lambda_closure_builder; + size_t lambda_captured_variable_index = 0; + // Jump to first instruction. ADVANCE(0); UNREACHABLE_CODE_CHECK(); @@ -2412,7 +2419,20 @@ JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item, ShadowF HANDLE_INSTRUCTION_END(); HANDLE_EXPERIMENTAL_INSTRUCTION_START(CREATE_LAMBDA) { - bool success = DoCreateLambda<true>(self, shadow_frame, inst); + if (lambda_closure_builder == nullptr) { + // DoCreateLambda always needs a ClosureBuilder, even if it has 0 captured variables. + lambda_closure_builder = MakeUnique<lambda::ClosureBuilder>(); + } + + // TODO: these allocations should not leak, and the lambda method should not be local. + lambda::Closure* lambda_closure = + reinterpret_cast<lambda::Closure*>(alloca(lambda_closure_builder->GetSize())); + bool success = DoCreateLambda<do_access_check>(self, + inst, + /*inout*/shadow_frame, + /*inout*/lambda_closure_builder.get(), + /*inout*/lambda_closure); + lambda_closure_builder.reset(nullptr); // reset state of variables captured POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2); } HANDLE_EXPERIMENTAL_INSTRUCTION_END(); @@ -2429,6 +2449,31 @@ JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item, ShadowF } HANDLE_EXPERIMENTAL_INSTRUCTION_END(); + HANDLE_EXPERIMENTAL_INSTRUCTION_START(CAPTURE_VARIABLE) { + if (lambda_closure_builder == nullptr) { + lambda_closure_builder = MakeUnique<lambda::ClosureBuilder>(); + } + + bool success = DoCaptureVariable<do_access_check>(self, + inst, + /*inout*/shadow_frame, + /*inout*/lambda_closure_builder.get()); + + POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2); + } + HANDLE_EXPERIMENTAL_INSTRUCTION_END(); + + HANDLE_EXPERIMENTAL_INSTRUCTION_START(LIBERATE_VARIABLE) { + bool success = DoLiberateVariable<do_access_check>(self, + inst, + lambda_captured_variable_index, + /*inout*/shadow_frame); + // Temporarily only allow sequences of 'liberate-variable, liberate-variable, ...' + lambda_captured_variable_index++; + POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2); + } + HANDLE_EXPERIMENTAL_INSTRUCTION_END(); + HANDLE_INSTRUCTION_START(UNUSED_3E) UnexpectedOpcode(inst, shadow_frame); HANDLE_INSTRUCTION_END(); @@ -2465,14 +2510,6 @@ JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item, ShadowF UnexpectedOpcode(inst, shadow_frame); HANDLE_INSTRUCTION_END(); - HANDLE_INSTRUCTION_START(UNUSED_F5) - UnexpectedOpcode(inst, shadow_frame); - HANDLE_INSTRUCTION_END(); - - HANDLE_INSTRUCTION_START(UNUSED_F7) - UnexpectedOpcode(inst, shadow_frame); - HANDLE_INSTRUCTION_END(); - HANDLE_INSTRUCTION_START(UNUSED_FA) UnexpectedOpcode(inst, shadow_frame); HANDLE_INSTRUCTION_END(); diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc index b5cc11e070..083dfb5267 100644 --- a/runtime/interpreter/interpreter_switch_impl.cc +++ b/runtime/interpreter/interpreter_switch_impl.cc @@ -14,9 +14,12 @@ * limitations under the License. */ +#include "base/stl_util.h" // MakeUnique #include "interpreter_common.h" #include "safe_math.h" +#include <memory> // std::unique_ptr + namespace art { namespace interpreter { @@ -82,6 +85,11 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, const uint16_t* const insns = code_item->insns_; const Instruction* inst = Instruction::At(insns + dex_pc); uint16_t inst_data; + + // TODO: collapse capture-variable+create-lambda into one opcode, then we won't need + // to keep this live for the scope of the entire function call. + std::unique_ptr<lambda::ClosureBuilder> lambda_closure_builder; + size_t lambda_captured_variable_index = 0; while (true) { dex_pc = inst->GetDexPc(insns); shadow_frame.SetDexPC(dex_pc); @@ -2235,19 +2243,63 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx); break; } + case Instruction::CAPTURE_VARIABLE: { + if (!IsExperimentalInstructionEnabled(inst)) { + UnexpectedOpcode(inst, shadow_frame); + } + + if (lambda_closure_builder == nullptr) { + lambda_closure_builder = MakeUnique<lambda::ClosureBuilder>(); + } + + PREAMBLE(); + bool success = DoCaptureVariable<do_access_check>(self, + inst, + /*inout*/shadow_frame, + /*inout*/lambda_closure_builder.get()); + POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx); + break; + } case Instruction::CREATE_LAMBDA: { if (!IsExperimentalInstructionEnabled(inst)) { UnexpectedOpcode(inst, shadow_frame); } PREAMBLE(); - bool success = DoCreateLambda<do_access_check>(self, shadow_frame, inst); + + if (lambda_closure_builder == nullptr) { + // DoCreateLambda always needs a ClosureBuilder, even if it has 0 captured variables. + lambda_closure_builder = MakeUnique<lambda::ClosureBuilder>(); + } + + // TODO: these allocations should not leak, and the lambda method should not be local. + lambda::Closure* lambda_closure = + reinterpret_cast<lambda::Closure*>(alloca(lambda_closure_builder->GetSize())); + bool success = DoCreateLambda<do_access_check>(self, + inst, + /*inout*/shadow_frame, + /*inout*/lambda_closure_builder.get(), + /*inout*/lambda_closure); + lambda_closure_builder.reset(nullptr); // reset state of variables captured + POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx); + break; + } + case Instruction::LIBERATE_VARIABLE: { + if (!IsExperimentalInstructionEnabled(inst)) { + UnexpectedOpcode(inst, shadow_frame); + } + + PREAMBLE(); + bool success = DoLiberateVariable<do_access_check>(self, + inst, + lambda_captured_variable_index, + /*inout*/shadow_frame); + // Temporarily only allow sequences of 'liberate-variable, liberate-variable, ...' + lambda_captured_variable_index++; POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx); break; } - case Instruction::UNUSED_F4: - case Instruction::UNUSED_F5: - case Instruction::UNUSED_F7: { + case Instruction::UNUSED_F4: { if (!IsExperimentalInstructionEnabled(inst)) { UnexpectedOpcode(inst, shadow_frame); } diff --git a/runtime/java_vm_ext.cc b/runtime/java_vm_ext.cc index 531e03926a..b5e28e9314 100644 --- a/runtime/java_vm_ext.cc +++ b/runtime/java_vm_ext.cc @@ -60,7 +60,7 @@ class SharedLibrary { : path_(path), handle_(handle), needs_native_bridge_(false), - class_loader_(env->NewGlobalRef(class_loader)), + class_loader_(env->NewWeakGlobalRef(class_loader)), jni_on_load_lock_("JNI_OnLoad lock"), jni_on_load_cond_("JNI_OnLoad condition variable", jni_on_load_lock_), jni_on_load_thread_id_(self->GetThreadId()), @@ -70,11 +70,11 @@ class SharedLibrary { ~SharedLibrary() { Thread* self = Thread::Current(); if (self != nullptr) { - self->GetJniEnv()->DeleteGlobalRef(class_loader_); + self->GetJniEnv()->DeleteWeakGlobalRef(class_loader_); } } - jobject GetClassLoader() const { + jweak GetClassLoader() const { return class_loader_; } @@ -131,7 +131,13 @@ class SharedLibrary { return needs_native_bridge_; } - void* FindSymbol(const std::string& symbol_name) { + void* FindSymbol(const std::string& symbol_name, const char* shorty = nullptr) { + return NeedsNativeBridge() + ? FindSymbolWithNativeBridge(symbol_name.c_str(), shorty) + : FindSymbolWithoutNativeBridge(symbol_name.c_str()); + } + + void* FindSymbolWithoutNativeBridge(const std::string& symbol_name) { CHECK(!NeedsNativeBridge()); return dlsym(handle_, symbol_name.c_str()); @@ -160,9 +166,9 @@ class SharedLibrary { // True if a native bridge is required. bool needs_native_bridge_; - // The ClassLoader this library is associated with, a global JNI reference that is + // The ClassLoader this library is associated with, a weak global JNI reference that is // created/deleted with the scope of the library. - const jobject class_loader_; + const jweak class_loader_; // Guards remaining items. Mutex jni_on_load_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; @@ -184,7 +190,10 @@ class Libraries { STLDeleteValues(&libraries_); } - void Dump(std::ostream& os) const { + // NO_THREAD_SAFETY_ANALYSIS since this may be called from Dumpable. Dumpable can't be annotated + // properly due to the template. The caller should be holding the jni_libraries_lock_. + void Dump(std::ostream& os) const NO_THREAD_SAFETY_ANALYSIS { + Locks::jni_libraries_lock_->AssertHeld(Thread::Current()); bool first = true; for (const auto& library : libraries_) { if (!first) { @@ -195,16 +204,17 @@ class Libraries { } } - size_t size() const { + size_t size() const REQUIRES(Locks::jni_libraries_lock_) { return libraries_.size(); } - SharedLibrary* Get(const std::string& path) { + SharedLibrary* Get(const std::string& path) REQUIRES(Locks::jni_libraries_lock_) { auto it = libraries_.find(path); return (it == libraries_.end()) ? nullptr : it->second; } - void Put(const std::string& path, SharedLibrary* library) { + void Put(const std::string& path, SharedLibrary* library) + REQUIRES(Locks::jni_libraries_lock_) { libraries_.Put(path, library); } @@ -217,24 +227,18 @@ class Libraries { const mirror::ClassLoader* declaring_class_loader = m->GetDeclaringClass()->GetClassLoader(); ScopedObjectAccessUnchecked soa(Thread::Current()); for (const auto& lib : libraries_) { - SharedLibrary* library = lib.second; + SharedLibrary* const library = lib.second; if (soa.Decode<mirror::ClassLoader*>(library->GetClassLoader()) != declaring_class_loader) { // We only search libraries loaded by the appropriate ClassLoader. continue; } // Try the short name then the long name... - void* fn; - if (library->NeedsNativeBridge()) { - const char* shorty = m->GetShorty(); - fn = library->FindSymbolWithNativeBridge(jni_short_name, shorty); - if (fn == nullptr) { - fn = library->FindSymbolWithNativeBridge(jni_long_name, shorty); - } - } else { - fn = library->FindSymbol(jni_short_name); - if (fn == nullptr) { - fn = library->FindSymbol(jni_long_name); - } + const char* shorty = library->NeedsNativeBridge() + ? m->GetShorty() + : nullptr; + void* fn = library->FindSymbol(jni_short_name, shorty); + if (fn == nullptr) { + fn = library->FindSymbol(jni_long_name, shorty); } if (fn != nullptr) { VLOG(jni) << "[Found native code for " << PrettyMethod(m) @@ -249,11 +253,50 @@ class Libraries { return nullptr; } + // Unload native libraries with cleared class loaders. + void UnloadNativeLibraries() + REQUIRES(!Locks::jni_libraries_lock_) + SHARED_REQUIRES(Locks::mutator_lock_) { + ScopedObjectAccessUnchecked soa(Thread::Current()); + typedef void (*JNI_OnUnloadFn)(JavaVM*, void*); + std::vector<JNI_OnUnloadFn> unload_functions; + { + MutexLock mu(soa.Self(), *Locks::jni_libraries_lock_); + for (auto it = libraries_.begin(); it != libraries_.end(); ) { + SharedLibrary* const library = it->second; + // If class loader is null then it was unloaded, call JNI_OnUnload. + const jweak class_loader = library->GetClassLoader(); + // If class_loader is a null jobject then it is the boot class loader. We should not unload + // the native libraries of the boot class loader. + if (class_loader != nullptr && + soa.Decode<mirror::ClassLoader*>(class_loader) == nullptr) { + void* const sym = library->FindSymbol("JNI_OnUnload", nullptr); + if (sym == nullptr) { + VLOG(jni) << "[No JNI_OnUnload found in \"" << library->GetPath() << "\"]"; + } else { + VLOG(jni) << "[JNI_OnUnload found for \"" << library->GetPath() << "\"]"; + JNI_OnUnloadFn jni_on_unload = reinterpret_cast<JNI_OnUnloadFn>(sym); + unload_functions.push_back(jni_on_unload); + } + delete library; + it = libraries_.erase(it); + } else { + ++it; + } + } + } + // Do this without holding the jni libraries lock to prevent possible deadlocks. + for (JNI_OnUnloadFn fn : unload_functions) { + VLOG(jni) << "Calling JNI_OnUnload"; + (*fn)(soa.Vm(), nullptr); + } + } + private: - AllocationTrackingSafeMap<std::string, SharedLibrary*, kAllocatorTagJNILibraries> libraries_; + AllocationTrackingSafeMap<std::string, SharedLibrary*, kAllocatorTagJNILibraries> libraries_ + GUARDED_BY(Locks::jni_libraries_lock_); }; - class JII { public: static jint DestroyJavaVM(JavaVM* vm) { @@ -641,6 +684,10 @@ void JavaVMExt::DumpReferenceTables(std::ostream& os) { } } +void JavaVMExt::UnloadNativeLibraries() { + libraries_.get()->UnloadNativeLibraries(); +} + bool JavaVMExt::LoadNativeLibrary(JNIEnv* env, const std::string& path, jobject class_loader, std::string* error_msg) { error_msg->clear(); @@ -738,10 +785,8 @@ bool JavaVMExt::LoadNativeLibrary(JNIEnv* env, const std::string& path, jobject void* sym; if (needs_native_bridge) { library->SetNeedsNativeBridge(); - sym = library->FindSymbolWithNativeBridge("JNI_OnLoad", nullptr); - } else { - sym = dlsym(handle, "JNI_OnLoad"); } + sym = library->FindSymbol("JNI_OnLoad", nullptr); if (sym == nullptr) { VLOG(jni) << "[No JNI_OnLoad found in \"" << path << "\"]"; was_successful = true; diff --git a/runtime/java_vm_ext.h b/runtime/java_vm_ext.h index b539bbdba3..c1fbdc0389 100644 --- a/runtime/java_vm_ext.h +++ b/runtime/java_vm_ext.h @@ -88,6 +88,11 @@ class JavaVMExt : public JavaVM { bool LoadNativeLibrary(JNIEnv* env, const std::string& path, jobject javaLoader, std::string* error_msg); + // Unload native libraries with cleared class loaders. + void UnloadNativeLibraries() + REQUIRES(!Locks::jni_libraries_lock_) + SHARED_REQUIRES(Locks::mutator_lock_); + /** * Returns a pointer to the code for the native method 'm', found * using dlsym(3) on every native library that's been loaded so far. @@ -184,7 +189,9 @@ class JavaVMExt : public JavaVM { // Not guarded by globals_lock since we sometimes use SynchronizedGet in Thread::DecodeJObject. IndirectReferenceTable globals_; - std::unique_ptr<Libraries> libraries_ GUARDED_BY(Locks::jni_libraries_lock_); + // No lock annotation since UnloadNativeLibraries is called on libraries_ but locks the + // jni_libraries_lock_ internally. + std::unique_ptr<Libraries> libraries_; // Used by -Xcheck:jni. const JNIInvokeInterface* const unchecked_functions_; diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h index 643bc23da3..e73ba82278 100644 --- a/runtime/jit/jit.h +++ b/runtime/jit/jit.h @@ -67,6 +67,9 @@ class Jit { void DumpInfo(std::ostream& os); // Add a timing logger to cumulative_timings_. void AddTimingLogger(const TimingLogger& logger); + JitInstrumentationCache* GetInstrumentationCache() const { + return instrumentation_cache_.get(); + } private: Jit(); diff --git a/runtime/jit/jit_instrumentation.cc b/runtime/jit/jit_instrumentation.cc index d437dd5d56..9b9c5d2760 100644 --- a/runtime/jit/jit_instrumentation.cc +++ b/runtime/jit/jit_instrumentation.cc @@ -24,11 +24,21 @@ namespace art { namespace jit { -class JitCompileTask : public Task { +class JitCompileTask FINAL : public Task { public: - explicit JitCompileTask(ArtMethod* method) : method_(method) {} + explicit JitCompileTask(ArtMethod* method) : method_(method) { + ScopedObjectAccess soa(Thread::Current()); + // Add a global ref to the class to prevent class unloading until compilation is done. + klass_ = soa.Vm()->AddGlobalRef(soa.Self(), method_->GetDeclaringClass()); + CHECK(klass_ != nullptr); + } + + ~JitCompileTask() { + ScopedObjectAccess soa(Thread::Current()); + soa.Vm()->DeleteGlobalRef(soa.Self(), klass_); + } - virtual void Run(Thread* self) OVERRIDE { + void Run(Thread* self) OVERRIDE { ScopedObjectAccess soa(self); VLOG(jit) << "JitCompileTask compiling method " << PrettyMethod(method_); if (!Runtime::Current()->GetJit()->CompileMethod(method_, self)) { @@ -36,12 +46,13 @@ class JitCompileTask : public Task { } } - virtual void Finalize() OVERRIDE { + void Finalize() OVERRIDE { delete this; } private: ArtMethod* const method_; + jobject klass_; DISALLOW_IMPLICIT_CONSTRUCTORS(JitCompileTask); }; @@ -100,9 +111,16 @@ void JitInstrumentationListener::InvokeVirtualOrInterface(Thread* thread, DCHECK(this_object != nullptr); ProfilingInfo* info = caller->GetProfilingInfo(sizeof(void*)); if (info != nullptr) { + // Since the instrumentation is marked from the declaring class we need to mark the card so + // that mod-union tables and card rescanning know about the update. + Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(caller->GetDeclaringClass()); info->AddInvokeInfo(thread, dex_pc, this_object->GetClass()); } } +void JitInstrumentationCache::WaitForCompilationToFinish(Thread* self) { + thread_pool_->Wait(self, false, false); +} + } // namespace jit } // namespace art diff --git a/runtime/jit/jit_instrumentation.h b/runtime/jit/jit_instrumentation.h index 6fdef6585d..9eb464b841 100644 --- a/runtime/jit/jit_instrumentation.h +++ b/runtime/jit/jit_instrumentation.h @@ -50,6 +50,8 @@ class JitInstrumentationCache { SHARED_REQUIRES(Locks::mutator_lock_); void CreateThreadPool(); void DeleteThreadPool(); + // Wait until there is no more pending compilation tasks. + void WaitForCompilationToFinish(Thread* self); private: size_t hot_method_threshold_; diff --git a/runtime/jni_env_ext.cc b/runtime/jni_env_ext.cc index b18b430403..4104d7a0e8 100644 --- a/runtime/jni_env_ext.cc +++ b/runtime/jni_env_ext.cc @@ -16,10 +16,17 @@ #include "jni_env_ext.h" +#include <algorithm> +#include <vector> + #include "check_jni.h" #include "indirect_reference_table.h" #include "java_vm_ext.h" #include "jni_internal.h" +#include "lock_word.h" +#include "mirror/object-inl.h" +#include "nth_caller_visitor.h" +#include "thread-inl.h" namespace art { @@ -63,14 +70,14 @@ JNIEnvExt::JNIEnvExt(Thread* self_in, JavaVMExt* vm_in) JNIEnvExt::~JNIEnvExt() { } -jobject JNIEnvExt::NewLocalRef(mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_) { +jobject JNIEnvExt::NewLocalRef(mirror::Object* obj) { if (obj == nullptr) { return nullptr; } return reinterpret_cast<jobject>(locals.Add(local_ref_cookie, obj)); } -void JNIEnvExt::DeleteLocalRef(jobject obj) SHARED_REQUIRES(Locks::mutator_lock_) { +void JNIEnvExt::DeleteLocalRef(jobject obj) { if (obj != nullptr) { locals.Remove(local_ref_cookie, reinterpret_cast<IndirectRef>(obj)); } @@ -86,14 +93,14 @@ void JNIEnvExt::DumpReferenceTables(std::ostream& os) { monitors.Dump(os); } -void JNIEnvExt::PushFrame(int capacity) SHARED_REQUIRES(Locks::mutator_lock_) { +void JNIEnvExt::PushFrame(int capacity) { UNUSED(capacity); // cpplint gets confused with (int) and thinks its a cast. // TODO: take 'capacity' into account. stacked_local_ref_cookies.push_back(local_ref_cookie); local_ref_cookie = locals.GetSegmentState(); } -void JNIEnvExt::PopFrame() SHARED_REQUIRES(Locks::mutator_lock_) { +void JNIEnvExt::PopFrame() { locals.SetSegmentState(local_ref_cookie); local_ref_cookie = stacked_local_ref_cookies.back(); stacked_local_ref_cookies.pop_back(); @@ -104,4 +111,118 @@ Offset JNIEnvExt::SegmentStateOffset() { IndirectReferenceTable::SegmentStateOffset().Int32Value()); } +// Use some defining part of the caller's frame as the identifying mark for the JNI segment. +static uintptr_t GetJavaCallFrame(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) { + NthCallerVisitor zeroth_caller(self, 0, false); + zeroth_caller.WalkStack(); + if (zeroth_caller.caller == nullptr) { + // No Java code, must be from pure native code. + return 0; + } else if (zeroth_caller.GetCurrentQuickFrame() == nullptr) { + // Shadow frame = interpreter. Use the actual shadow frame's address. + DCHECK(zeroth_caller.GetCurrentShadowFrame() != nullptr); + return reinterpret_cast<uintptr_t>(zeroth_caller.GetCurrentShadowFrame()); + } else { + // Quick frame = compiled code. Use the bottom of the frame. + return reinterpret_cast<uintptr_t>(zeroth_caller.GetCurrentQuickFrame()); + } +} + +void JNIEnvExt::RecordMonitorEnter(jobject obj) { + locked_objects_.push_back(std::make_pair(GetJavaCallFrame(self), obj)); +} + +static std::string ComputeMonitorDescription(Thread* self, + jobject obj) SHARED_REQUIRES(Locks::mutator_lock_) { + mirror::Object* o = self->DecodeJObject(obj); + if ((o->GetLockWord(false).GetState() == LockWord::kThinLocked) && + Locks::mutator_lock_->IsExclusiveHeld(self)) { + // Getting the identity hashcode here would result in lock inflation and suspension of the + // current thread, which isn't safe if this is the only runnable thread. + return StringPrintf("<@addr=0x%" PRIxPTR "> (a %s)", + reinterpret_cast<intptr_t>(o), + PrettyTypeOf(o).c_str()); + } else { + // IdentityHashCode can cause thread suspension, which would invalidate o if it moved. So + // we get the pretty type before we call IdentityHashCode. + const std::string pretty_type(PrettyTypeOf(o)); + return StringPrintf("<0x%08x> (a %s)", o->IdentityHashCode(), pretty_type.c_str()); + } +} + +static void RemoveMonitors(Thread* self, + uintptr_t frame, + ReferenceTable* monitors, + std::vector<std::pair<uintptr_t, jobject>>* locked_objects) + SHARED_REQUIRES(Locks::mutator_lock_) { + auto kept_end = std::remove_if( + locked_objects->begin(), + locked_objects->end(), + [self, frame, monitors](const std::pair<uintptr_t, jobject>& pair) + SHARED_REQUIRES(Locks::mutator_lock_) { + if (frame == pair.first) { + mirror::Object* o = self->DecodeJObject(pair.second); + monitors->Remove(o); + return true; + } + return false; + }); + locked_objects->erase(kept_end, locked_objects->end()); +} + +void JNIEnvExt::CheckMonitorRelease(jobject obj) { + uintptr_t current_frame = GetJavaCallFrame(self); + std::pair<uintptr_t, jobject> exact_pair = std::make_pair(current_frame, obj); + auto it = std::find(locked_objects_.begin(), locked_objects_.end(), exact_pair); + bool will_abort = false; + if (it != locked_objects_.end()) { + locked_objects_.erase(it); + } else { + // Check whether this monitor was locked in another JNI "session." + mirror::Object* mirror_obj = self->DecodeJObject(obj); + for (std::pair<uintptr_t, jobject>& pair : locked_objects_) { + if (self->DecodeJObject(pair.second) == mirror_obj) { + std::string monitor_descr = ComputeMonitorDescription(self, pair.second); + vm->JniAbortF("<JNI MonitorExit>", + "Unlocking monitor that wasn't locked here: %s", + monitor_descr.c_str()); + will_abort = true; + break; + } + } + } + + // When we abort, also make sure that any locks from the current "session" are removed from + // the monitors table, otherwise we may visit local objects in GC during abort (which won't be + // valid anymore). + if (will_abort) { + RemoveMonitors(self, current_frame, &monitors, &locked_objects_); + } +} + +void JNIEnvExt::CheckNoHeldMonitors() { + uintptr_t current_frame = GetJavaCallFrame(self); + // The locked_objects_ are grouped by their stack frame component, as this enforces structured + // locking, and the groups form a stack. So the current frame entries are at the end. Check + // whether the vector is empty, and when there are elements, whether the last element belongs + // to this call - this signals that there are unlocked monitors. + if (!locked_objects_.empty()) { + std::pair<uintptr_t, jobject>& pair = locked_objects_[locked_objects_.size() - 1]; + if (pair.first == current_frame) { + std::string monitor_descr = ComputeMonitorDescription(self, pair.second); + vm->JniAbortF("<JNI End>", + "Still holding a locked object on JNI end: %s", + monitor_descr.c_str()); + // When we abort, also make sure that any locks from the current "session" are removed from + // the monitors table, otherwise we may visit local objects in GC during abort. + RemoveMonitors(self, current_frame, &monitors, &locked_objects_); + } else if (kIsDebugBuild) { + // Make sure there are really no other entries and our checking worked as expected. + for (std::pair<uintptr_t, jobject>& check_pair : locked_objects_) { + CHECK_NE(check_pair.first, current_frame); + } + } + } +} + } // namespace art diff --git a/runtime/jni_env_ext.h b/runtime/jni_env_ext.h index 9b55536e98..3828ff045d 100644 --- a/runtime/jni_env_ext.h +++ b/runtime/jni_env_ext.h @@ -43,8 +43,8 @@ struct JNIEnvExt : public JNIEnv { void SetCheckJniEnabled(bool enabled); - void PushFrame(int capacity); - void PopFrame(); + void PushFrame(int capacity) SHARED_REQUIRES(Locks::mutator_lock_); + void PopFrame() SHARED_REQUIRES(Locks::mutator_lock_); template<typename T> T AddLocalReference(mirror::Object* obj) @@ -89,10 +89,27 @@ struct JNIEnvExt : public JNIEnv { // Used by -Xcheck:jni. const JNINativeInterface* unchecked_functions; + // Functions to keep track of monitor lock and unlock operations. Used to ensure proper locking + // rules in CheckJNI mode. + + // Record locking of a monitor. + void RecordMonitorEnter(jobject obj) SHARED_REQUIRES(Locks::mutator_lock_); + + // Check the release, that is, that the release is performed in the same JNI "segment." + void CheckMonitorRelease(jobject obj) SHARED_REQUIRES(Locks::mutator_lock_); + + // Check that no monitors are held that have been acquired in this JNI "segment." + void CheckNoHeldMonitors() SHARED_REQUIRES(Locks::mutator_lock_); + private: // The constructor should not be called directly. It may leave the object in an erronuous state, // and the result needs to be checked. JNIEnvExt(Thread* self, JavaVMExt* vm); + + // All locked objects, with the (Java caller) stack frame that locked them. Used in CheckJNI + // to ensure that only monitors locked in this native frame are being unlocked, and that at + // the end all are unlocked. + std::vector<std::pair<uintptr_t, jobject>> locked_objects_; }; // Used to save and restore the JNIEnvExt state when not going through code created by the JNI diff --git a/runtime/jni_internal_test.cc b/runtime/jni_internal_test.cc index 2a0cb28f0c..41b368ec32 100644 --- a/runtime/jni_internal_test.cc +++ b/runtime/jni_internal_test.cc @@ -607,11 +607,64 @@ class JniInternalTest : public CommonCompilerTest { EXPECT_EQ(check_jni, vm_->SetCheckJniEnabled(old_check_jni)); } + void SetUpForTest(bool direct, const char* method_name, const char* method_sig, + void* native_fnptr) { + // Initialize class loader and set generic JNI entrypoint. + // Note: this code is adapted from the jni_compiler_test, and taken with minimal modifications. + if (!runtime_->IsStarted()) { + { + ScopedObjectAccess soa(Thread::Current()); + class_loader_ = LoadDex("MyClassNatives"); + StackHandleScope<1> hs(soa.Self()); + Handle<mirror::ClassLoader> loader( + hs.NewHandle(soa.Decode<mirror::ClassLoader*>(class_loader_))); + mirror::Class* c = class_linker_->FindClass(soa.Self(), "LMyClassNatives;", loader); + const auto pointer_size = class_linker_->GetImagePointerSize(); + ArtMethod* method = direct ? c->FindDirectMethod(method_name, method_sig, pointer_size) : + c->FindVirtualMethod(method_name, method_sig, pointer_size); + ASSERT_TRUE(method != nullptr) << method_name << " " << method_sig; + method->SetEntryPointFromQuickCompiledCode(class_linker_->GetRuntimeQuickGenericJniStub()); + } + // Start runtime. + Thread::Current()->TransitionFromSuspendedToRunnable(); + bool started = runtime_->Start(); + CHECK(started); + } + // JNI operations after runtime start. + env_ = Thread::Current()->GetJniEnv(); + jklass_ = env_->FindClass("MyClassNatives"); + ASSERT_TRUE(jklass_ != nullptr) << method_name << " " << method_sig; + + if (direct) { + jmethod_ = env_->GetStaticMethodID(jklass_, method_name, method_sig); + } else { + jmethod_ = env_->GetMethodID(jklass_, method_name, method_sig); + } + ASSERT_TRUE(jmethod_ != nullptr) << method_name << " " << method_sig; + + if (native_fnptr != nullptr) { + JNINativeMethod methods[] = { { method_name, method_sig, native_fnptr } }; + ASSERT_EQ(JNI_OK, env_->RegisterNatives(jklass_, methods, 1)) + << method_name << " " << method_sig; + } else { + env_->UnregisterNatives(jklass_); + } + + jmethodID constructor = env_->GetMethodID(jklass_, "<init>", "()V"); + jobj_ = env_->NewObject(jklass_, constructor); + ASSERT_TRUE(jobj_ != nullptr) << method_name << " " << method_sig; + } + JavaVMExt* vm_; JNIEnv* env_; jclass aioobe_; jclass ase_; jclass sioobe_; + + jclass jklass_; + jobject jobj_; + jobject class_loader_; + jmethodID jmethod_; }; TEST_F(JniInternalTest, AllocObject) { @@ -2111,4 +2164,38 @@ TEST_F(JniInternalTest, MonitorEnterExit) { } } +void Java_MyClassNatives_foo_exit(JNIEnv* env, jobject thisObj) { + // Release the monitor on self. This should trigger an abort. + env->MonitorExit(thisObj); +} + +TEST_F(JniInternalTest, MonitorExitLockedInDifferentCall) { + SetUpForTest(false, "foo", "()V", reinterpret_cast<void*>(&Java_MyClassNatives_foo_exit)); + ASSERT_NE(jobj_, nullptr); + + env_->MonitorEnter(jobj_); + EXPECT_FALSE(env_->ExceptionCheck()); + + CheckJniAbortCatcher check_jni_abort_catcher; + env_->CallNonvirtualVoidMethod(jobj_, jklass_, jmethod_); + check_jni_abort_catcher.Check("Unlocking monitor that wasn't locked here"); +} + +void Java_MyClassNatives_foo_enter_no_exit(JNIEnv* env, jobject thisObj) { + // Acquire but don't release the monitor on self. This should trigger an abort on return. + env->MonitorEnter(thisObj); +} + +TEST_F(JniInternalTest, MonitorExitNotAllUnlocked) { + SetUpForTest(false, + "foo", + "()V", + reinterpret_cast<void*>(&Java_MyClassNatives_foo_enter_no_exit)); + ASSERT_NE(jobj_, nullptr); + + CheckJniAbortCatcher check_jni_abort_catcher; + env_->CallNonvirtualVoidMethod(jobj_, jklass_, jmethod_); + check_jni_abort_catcher.Check("Still holding a locked object on JNI end"); +} + } // namespace art diff --git a/runtime/lambda/art_lambda_method.h b/runtime/lambda/art_lambda_method.h index 892d8c6f6b..ea13eb7af6 100644 --- a/runtime/lambda/art_lambda_method.h +++ b/runtime/lambda/art_lambda_method.h @@ -35,7 +35,7 @@ class ArtLambdaMethod { // (Ownership of strings is retained by the caller and the lifetime should exceed this class). ArtLambdaMethod(ArtMethod* target_method, const char* captured_variables_type_descriptor, - const char* captured_variables_shorty_, + const char* captured_variables_shorty, bool innate_lambda = true); // Get the target method for this lambda that would be used by the invoke-lambda dex instruction. diff --git a/runtime/lambda/box_table.cc b/runtime/lambda/box_table.cc index 26575fd995..8eef10bbad 100644 --- a/runtime/lambda/box_table.cc +++ b/runtime/lambda/box_table.cc @@ -18,6 +18,8 @@ #include "base/mutex.h" #include "common_throws.h" #include "gc_root-inl.h" +#include "lambda/closure.h" +#include "lambda/leaking_allocator.h" #include "mirror/method.h" #include "mirror/object-inl.h" #include "thread.h" @@ -26,11 +28,53 @@ namespace art { namespace lambda { +// Temporarily represent the lambda Closure as its raw bytes in an array. +// TODO: Generate a proxy class for the closure when boxing the first time. +using BoxedClosurePointerType = mirror::ByteArray*; + +static mirror::Class* GetBoxedClosureClass() SHARED_REQUIRES(Locks::mutator_lock_) { + return mirror::ByteArray::GetArrayClass(); +} + +namespace { + // Convenience functions to allocating/deleting box table copies of the closures. + struct ClosureAllocator { + // Deletes a Closure that was allocated through ::Allocate. + static void Delete(Closure* ptr) { + delete[] reinterpret_cast<char*>(ptr); + } + + // Returns a well-aligned pointer to a newly allocated Closure on the 'new' heap. + static Closure* Allocate(size_t size) { + DCHECK_GE(size, sizeof(Closure)); + + // TODO: Maybe point to the interior of the boxed closure object after we add proxy support? + Closure* closure = reinterpret_cast<Closure*>(new char[size]); + DCHECK_ALIGNED(closure, alignof(Closure)); + return closure; + } + }; +} // namespace BoxTable::BoxTable() : allow_new_weaks_(true), new_weaks_condition_("lambda box table allowed weaks", *Locks::lambda_table_lock_) {} +BoxTable::~BoxTable() { + // Free all the copies of our closures. + for (auto map_iterator = map_.begin(); map_iterator != map_.end(); ++map_iterator) { + std::pair<UnorderedMapKeyType, ValueType>& key_value_pair = *map_iterator; + + Closure* closure = key_value_pair.first; + + // Remove from the map first, so that it doesn't try to access dangling pointer. + map_iterator = map_.Erase(map_iterator); + + // Safe to delete, no dangling pointers. + ClosureAllocator::Delete(closure); + } +} + mirror::Object* BoxTable::BoxLambda(const ClosureType& closure) { Thread* self = Thread::Current(); @@ -58,22 +102,29 @@ mirror::Object* BoxTable::BoxLambda(const ClosureType& closure) { // Release the lambda table lock here, so that thread suspension is allowed. - // Convert the ArtMethod into a java.lang.reflect.Method which will serve + // Convert the Closure into a managed byte[] which will serve // as the temporary 'boxed' version of the lambda. This is good enough // to check all the basic object identities that a boxed lambda must retain. + // It's also good enough to contain all the captured primitive variables. // TODO: Boxing an innate lambda (i.e. made with create-lambda) should make a proxy class // TODO: Boxing a learned lambda (i.e. made with unbox-lambda) should return the original object - mirror::Method* method_as_object = - mirror::Method::CreateFromArtMethod(self, closure); + BoxedClosurePointerType closure_as_array_object = + mirror::ByteArray::Alloc(self, closure->GetSize()); + // There are no thread suspension points after this, so we don't need to put it into a handle. - if (UNLIKELY(method_as_object == nullptr)) { + if (UNLIKELY(closure_as_array_object == nullptr)) { // Most likely an OOM has occurred. CHECK(self->IsExceptionPending()); return nullptr; } + // Write the raw closure data into the byte[]. + closure->CopyTo(closure_as_array_object->GetRawData(sizeof(uint8_t), // component size + 0 /*index*/), // index + closure_as_array_object->GetLength()); + // The method has been successfully boxed into an object, now insert it into the hash map. { MutexLock mu(self, *Locks::lambda_table_lock_); @@ -87,38 +138,56 @@ mirror::Object* BoxTable::BoxLambda(const ClosureType& closure) { return value.Read(); } - // Otherwise we should insert it into the hash map in this thread. - map_.Insert(std::make_pair(closure, ValueType(method_as_object))); + // Otherwise we need to insert it into the hash map in this thread. + + // Make a copy for the box table to keep, in case the closure gets collected from the stack. + // TODO: GC may need to sweep for roots in the box table's copy of the closure. + Closure* closure_table_copy = ClosureAllocator::Allocate(closure->GetSize()); + closure->CopyTo(closure_table_copy, closure->GetSize()); + + // The closure_table_copy needs to be deleted by us manually when we erase it from the map. + + // Actually insert into the table. + map_.Insert({closure_table_copy, ValueType(closure_as_array_object)}); } - return method_as_object; + return closure_as_array_object; } bool BoxTable::UnboxLambda(mirror::Object* object, ClosureType* out_closure) { DCHECK(object != nullptr); *out_closure = nullptr; + Thread* self = Thread::Current(); + // Note that we do not need to access lambda_table_lock_ here // since we don't need to look at the map. mirror::Object* boxed_closure_object = object; - // Raise ClassCastException if object is not instanceof java.lang.reflect.Method - if (UNLIKELY(!boxed_closure_object->InstanceOf(mirror::Method::StaticClass()))) { - ThrowClassCastException(mirror::Method::StaticClass(), boxed_closure_object->GetClass()); + // Raise ClassCastException if object is not instanceof byte[] + if (UNLIKELY(!boxed_closure_object->InstanceOf(GetBoxedClosureClass()))) { + ThrowClassCastException(GetBoxedClosureClass(), boxed_closure_object->GetClass()); return false; } // TODO(iam): We must check that the closure object extends/implements the type - // specified in [type id]. This is not currently implemented since it's always a Method. + // specified in [type id]. This is not currently implemented since it's always a byte[]. // If we got this far, the inputs are valid. - // Write out the java.lang.reflect.Method's embedded ArtMethod* into the vreg target. - mirror::AbstractMethod* boxed_closure_as_method = - down_cast<mirror::AbstractMethod*>(boxed_closure_object); + // Shuffle the byte[] back into a raw closure, then allocate it, copy, and return it. + BoxedClosurePointerType boxed_closure_as_array = + down_cast<BoxedClosurePointerType>(boxed_closure_object); + + const int8_t* unaligned_interior_closure = boxed_closure_as_array->GetData(); - ArtMethod* unboxed_closure = boxed_closure_as_method->GetArtMethod(); - DCHECK(unboxed_closure != nullptr); + // Allocate a copy that can "escape" and copy the closure data into that. + Closure* unboxed_closure = + LeakingAllocator::MakeFlexibleInstance<Closure>(self, boxed_closure_as_array->GetLength()); + // TODO: don't just memcpy the closure, it's unsafe when we add references to the mix. + memcpy(unboxed_closure, unaligned_interior_closure, boxed_closure_as_array->GetLength()); + + DCHECK_EQ(unboxed_closure->GetSize(), static_cast<size_t>(boxed_closure_as_array->GetLength())); *out_closure = unboxed_closure; return true; @@ -127,7 +196,7 @@ bool BoxTable::UnboxLambda(mirror::Object* object, ClosureType* out_closure) { BoxTable::ValueType BoxTable::FindBoxedLambda(const ClosureType& closure) const { auto map_iterator = map_.Find(closure); if (map_iterator != map_.end()) { - const std::pair<ClosureType, ValueType>& key_value_pair = *map_iterator; + const std::pair<UnorderedMapKeyType, ValueType>& key_value_pair = *map_iterator; const ValueType& value = key_value_pair.second; DCHECK(!value.IsNull()); // Never store null boxes. @@ -157,7 +226,7 @@ void BoxTable::SweepWeakBoxedLambdas(IsMarkedVisitor* visitor) { */ std::vector<ClosureType> remove_list; for (auto map_iterator = map_.begin(); map_iterator != map_.end(); ) { - std::pair<ClosureType, ValueType>& key_value_pair = *map_iterator; + std::pair<UnorderedMapKeyType, ValueType>& key_value_pair = *map_iterator; const ValueType& old_value = key_value_pair.second; @@ -166,10 +235,15 @@ void BoxTable::SweepWeakBoxedLambdas(IsMarkedVisitor* visitor) { mirror::Object* new_value = visitor->IsMarked(old_value_raw); if (new_value == nullptr) { - const ClosureType& closure = key_value_pair.first; // The object has been swept away. + const ClosureType& closure = key_value_pair.first; + // Delete the entry from the map. - map_iterator = map_.Erase(map_.Find(closure)); + map_iterator = map_.Erase(map_iterator); + + // Clean up the memory by deleting the closure. + ClosureAllocator::Delete(closure); + } else { // The object has been moved. // Update the map. @@ -208,16 +282,33 @@ void BoxTable::BroadcastForNewWeakBoxedLambdas() { new_weaks_condition_.Broadcast(self); } -bool BoxTable::EqualsFn::operator()(const ClosureType& lhs, const ClosureType& rhs) const { +void BoxTable::EmptyFn::MakeEmpty(std::pair<UnorderedMapKeyType, ValueType>& item) const { + item.first = nullptr; + + Locks::mutator_lock_->AssertSharedHeld(Thread::Current()); + item.second = ValueType(); // Also clear the GC root. +} + +bool BoxTable::EmptyFn::IsEmpty(const std::pair<UnorderedMapKeyType, ValueType>& item) const { + return item.first == nullptr; +} + +bool BoxTable::EqualsFn::operator()(const UnorderedMapKeyType& lhs, + const UnorderedMapKeyType& rhs) const { // Nothing needs this right now, but leave this assertion for later when // we need to look at the references inside of the closure. - if (kIsDebugBuild) { - Locks::mutator_lock_->AssertSharedHeld(Thread::Current()); - } + Locks::mutator_lock_->AssertSharedHeld(Thread::Current()); + + return lhs->ReferenceEquals(rhs); +} + +size_t BoxTable::HashFn::operator()(const UnorderedMapKeyType& key) const { + const lambda::Closure* closure = key; + DCHECK_ALIGNED(closure, alignof(lambda::Closure)); - // TODO: Need rework to use read barriers once closures have references inside of them that can - // move. Until then, it's safe to just compare the data inside of it directly. - return lhs == rhs; + // Need to hold mutator_lock_ before calling into Closure::GetHashCode. + Locks::mutator_lock_->AssertSharedHeld(Thread::Current()); + return closure->GetHashCode(); } } // namespace lambda diff --git a/runtime/lambda/box_table.h b/runtime/lambda/box_table.h index 9ffda6658f..adb733271e 100644 --- a/runtime/lambda/box_table.h +++ b/runtime/lambda/box_table.h @@ -34,6 +34,7 @@ class Object; // forward declaration } // namespace mirror namespace lambda { +struct Closure; // forward declaration /* * Store a table of boxed lambdas. This is required to maintain object referential equality @@ -44,7 +45,7 @@ namespace lambda { */ class BoxTable FINAL { public: - using ClosureType = art::ArtMethod*; + using ClosureType = art::lambda::Closure*; // Boxes a closure into an object. Returns null and throws an exception on failure. mirror::Object* BoxLambda(const ClosureType& closure) @@ -72,10 +73,9 @@ class BoxTable FINAL { REQUIRES(!Locks::lambda_table_lock_); BoxTable(); - ~BoxTable() = default; + ~BoxTable(); private: - // FIXME: This needs to be a GcRoot. // Explanation: // - After all threads are suspended (exclusive mutator lock), // the concurrent-copying GC can move objects from the "from" space to the "to" space. @@ -97,30 +97,30 @@ class BoxTable FINAL { void BlockUntilWeaksAllowed() SHARED_REQUIRES(Locks::lambda_table_lock_); + // Wrap the Closure into a unique_ptr so that the HashMap can delete its memory automatically. + using UnorderedMapKeyType = ClosureType; + // EmptyFn implementation for art::HashMap struct EmptyFn { - void MakeEmpty(std::pair<ClosureType, ValueType>& item) const { - item.first = nullptr; - } - bool IsEmpty(const std::pair<ClosureType, ValueType>& item) const { - return item.first == nullptr; - } + void MakeEmpty(std::pair<UnorderedMapKeyType, ValueType>& item) const + NO_THREAD_SAFETY_ANALYSIS; // SHARED_REQUIRES(Locks::mutator_lock_) + + bool IsEmpty(const std::pair<UnorderedMapKeyType, ValueType>& item) const; }; // HashFn implementation for art::HashMap struct HashFn { - size_t operator()(const ClosureType& key) const { - // TODO(iam): Rewrite hash function when ClosureType is no longer an ArtMethod* - return static_cast<size_t>(reinterpret_cast<uintptr_t>(key)); - } + size_t operator()(const UnorderedMapKeyType& key) const + NO_THREAD_SAFETY_ANALYSIS; // SHARED_REQUIRES(Locks::mutator_lock_) }; // EqualsFn implementation for art::HashMap struct EqualsFn { - bool operator()(const ClosureType& lhs, const ClosureType& rhs) const; + bool operator()(const UnorderedMapKeyType& lhs, const UnorderedMapKeyType& rhs) const + NO_THREAD_SAFETY_ANALYSIS; // SHARED_REQUIRES(Locks::mutator_lock_) }; - using UnorderedMap = art::HashMap<ClosureType, + using UnorderedMap = art::HashMap<UnorderedMapKeyType, ValueType, EmptyFn, HashFn, diff --git a/runtime/lambda/closure.cc b/runtime/lambda/closure.cc index 95a17c660c..179e4ee7f2 100644 --- a/runtime/lambda/closure.cc +++ b/runtime/lambda/closure.cc @@ -124,6 +124,55 @@ void Closure::CopyTo(void* target, size_t target_size) const { memcpy(target, this, GetSize()); } +ArtMethod* Closure::GetTargetMethod() const { + return const_cast<ArtMethod*>(lambda_info_->GetArtMethod()); +} + +uint32_t Closure::GetHashCode() const { + // Start with a non-zero constant, a prime number. + uint32_t result = 17; + + // Include the hash with the ArtMethod. + { + uintptr_t method = reinterpret_cast<uintptr_t>(GetTargetMethod()); + result = 31 * result + Low32Bits(method); + if (sizeof(method) == sizeof(uint64_t)) { + result = 31 * result + High32Bits(method); + } + } + + // Include a hash for each captured variable. + for (size_t i = 0; i < GetCapturedVariablesSize(); ++i) { + // TODO: not safe for GC-able values since the address can move and the hash code would change. + uint8_t captured_variable_raw_value; + CopyUnsafeAtOffset<uint8_t>(i, /*out*/&captured_variable_raw_value); // NOLINT: [whitespace/comma] [3] + + result = 31 * result + captured_variable_raw_value; + } + + // TODO: Fix above loop to work for objects and lambdas. + static_assert(kClosureSupportsGarbageCollection == false, + "Need to update above loop to read the hash code from the " + "objects and lambdas recursively"); + + return result; +} + +bool Closure::ReferenceEquals(const Closure* other) const { + DCHECK(other != nullptr); + + // TODO: Need rework to use read barriers once closures have references inside of them that can + // move. Until then, it's safe to just compare the data inside of it directly. + static_assert(kClosureSupportsReferences == false, + "Unsafe to use memcmp in read barrier collector"); + + if (GetSize() != other->GetSize()) { + return false; + } + + return memcmp(this, other, GetSize()); +} + size_t Closure::GetNumberOfCapturedVariables() const { // TODO: refactor into art_lambda_method.h. Parsing should only be required here as a DCHECK. VariableInfo variable_info = diff --git a/runtime/lambda/closure.h b/runtime/lambda/closure.h index 60d117e9e2..31ff1944d2 100644 --- a/runtime/lambda/closure.h +++ b/runtime/lambda/closure.h @@ -49,6 +49,19 @@ struct PACKED(sizeof(ArtLambdaMethod*)) Closure { // The target_size must be at least as large as GetSize(). void CopyTo(void* target, size_t target_size) const; + // Get the target method, i.e. the method that will be dispatched into with invoke-lambda. + ArtMethod* GetTargetMethod() const; + + // Calculates the hash code. Value is recomputed each time. + uint32_t GetHashCode() const SHARED_REQUIRES(Locks::mutator_lock_); + + // Is this the same closure as other? e.g. same target method, same variables captured. + // + // Determines whether the two Closures are interchangeable instances. + // Does *not* call Object#equals recursively. If two Closures compare ReferenceEquals true that + // means that they are interchangeable values (usually for the purpose of boxing/unboxing). + bool ReferenceEquals(const Closure* other) const SHARED_REQUIRES(Locks::mutator_lock_); + // How many variables were captured? size_t GetNumberOfCapturedVariables() const; diff --git a/runtime/lambda/closure_builder-inl.h b/runtime/lambda/closure_builder-inl.h index 41a803baf2..3cec21f3ba 100644 --- a/runtime/lambda/closure_builder-inl.h +++ b/runtime/lambda/closure_builder-inl.h @@ -35,6 +35,8 @@ void ClosureBuilder::CaptureVariablePrimitive(T value) { values_.push_back(value_storage); size_ += sizeof(T); + + shorty_types_ += kShortyType; } } // namespace lambda diff --git a/runtime/lambda/closure_builder.cc b/runtime/lambda/closure_builder.cc index 9c37db8fcc..739e965238 100644 --- a/runtime/lambda/closure_builder.cc +++ b/runtime/lambda/closure_builder.cc @@ -64,6 +64,8 @@ void ClosureBuilder::CaptureVariableObject(mirror::Object* object) { UNIMPLEMENTED(FATAL) << "can't yet safely capture objects with read barrier"; } } + + shorty_types_ += ShortyFieldType::kObject; } void ClosureBuilder::CaptureVariableLambda(Closure* closure) { @@ -78,6 +80,8 @@ void ClosureBuilder::CaptureVariableLambda(Closure* closure) { // A closure may be sized dynamically, so always query it for the true size. size_ += closure->GetSize(); + + shorty_types_ += ShortyFieldType::kLambda; } size_t ClosureBuilder::GetSize() const { @@ -85,9 +89,15 @@ size_t ClosureBuilder::GetSize() const { } size_t ClosureBuilder::GetCaptureCount() const { + DCHECK_EQ(values_.size(), shorty_types_.size()); return values_.size(); } +const std::string& ClosureBuilder::GetCapturedVariableShortyTypes() const { + DCHECK_EQ(values_.size(), shorty_types_.size()); + return shorty_types_; +} + Closure* ClosureBuilder::CreateInPlace(void* memory, ArtLambdaMethod* target_method) const { DCHECK(memory != nullptr); DCHECK(target_method != nullptr); @@ -138,11 +148,14 @@ size_t ClosureBuilder::WriteValues(ArtLambdaMethod* target_method, size_t variables_size) const { size_t total_size = header_size; const char* shorty_types = target_method->GetCapturedVariablesShortyTypeDescriptor(); + DCHECK_STREQ(shorty_types, shorty_types_.c_str()); size_t variables_offset = 0; size_t remaining_size = variables_size; const size_t shorty_count = target_method->GetNumberOfCapturedVariables(); + DCHECK_EQ(shorty_count, GetCaptureCount()); + for (size_t i = 0; i < shorty_count; ++i) { ShortyFieldType shorty{shorty_types[i]}; // NOLINT [readability/braces] [4] diff --git a/runtime/lambda/closure_builder.h b/runtime/lambda/closure_builder.h index 542e12afaa..23eb484529 100644 --- a/runtime/lambda/closure_builder.h +++ b/runtime/lambda/closure_builder.h @@ -40,13 +40,12 @@ class ArtLambdaMethod; // forward declaration // // The mutator lock must be held for the duration of the lifetime of this object, // since it needs to temporarily store heap references into an internal list. -class ClosureBuilder : ValueObject { +class ClosureBuilder { public: using ShortyTypeEnum = decltype(ShortyFieldType::kByte); - // Mark this primitive value to be captured as the specified type. - template <typename T, ShortyTypeEnum kShortyType> + template <typename T, ShortyTypeEnum kShortyType = ShortyFieldTypeSelectEnum<T>::value> void CaptureVariablePrimitive(T value); // Mark this object reference to be captured. @@ -63,6 +62,9 @@ class ClosureBuilder : ValueObject { // Returns how many variables have been captured so far. size_t GetCaptureCount() const; + // Get the list of captured variables' shorty field types. + const std::string& GetCapturedVariableShortyTypes() const; + // Creates a closure in-place and writes out the data into 'memory'. // Memory must be at least 'GetSize' bytes large. // All previously marked data to be captured is now written out. @@ -93,6 +95,7 @@ class ClosureBuilder : ValueObject { size_t size_ = kInitialSize; bool is_dynamic_size_ = false; std::vector<ShortyFieldTypeTraits::MaxType> values_; + std::string shorty_types_; }; } // namespace lambda diff --git a/runtime/lambda/leaking_allocator.cc b/runtime/lambda/leaking_allocator.cc new file mode 100644 index 0000000000..4910732a6c --- /dev/null +++ b/runtime/lambda/leaking_allocator.cc @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "lambda/leaking_allocator.h" +#include "linear_alloc.h" +#include "runtime.h" + +namespace art { +namespace lambda { + +void* LeakingAllocator::AllocateMemory(Thread* self, size_t byte_size) { + // TODO: use GetAllocatorForClassLoader to allocate lambda ArtMethod data. + return Runtime::Current()->GetLinearAlloc()->Alloc(self, byte_size); +} + +} // namespace lambda +} // namespace art diff --git a/runtime/lambda/leaking_allocator.h b/runtime/lambda/leaking_allocator.h new file mode 100644 index 0000000000..c3222d0485 --- /dev/null +++ b/runtime/lambda/leaking_allocator.h @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef ART_RUNTIME_LAMBDA_LEAKING_ALLOCATOR_H_ +#define ART_RUNTIME_LAMBDA_LEAKING_ALLOCATOR_H_ + +#include <utility> // std::forward + +namespace art { +class Thread; // forward declaration + +namespace lambda { + +// Temporary class to centralize all the leaking allocations. +// Allocations made through this class are never freed, but it is a placeholder +// that means that the calling code needs to be rewritten to properly: +// +// (a) Have a lifetime scoped to some other entity. +// (b) Not be allocated over and over again if it was already allocated once (immutable data). +// +// TODO: do all of the above a/b for each callsite, and delete this class. +class LeakingAllocator { + public: + // Allocate byte_size bytes worth of memory. Never freed. + static void* AllocateMemory(Thread* self, size_t byte_size); + + // Make a new instance of T, flexibly sized, in-place at newly allocated memory. Never freed. + template <typename T, typename... Args> + static T* MakeFlexibleInstance(Thread* self, size_t byte_size, Args&&... args) { + return new (AllocateMemory(self, byte_size)) T(std::forward<Args>(args)...); + } + + // Make a new instance of T in-place at newly allocated memory. Never freed. + template <typename T, typename... Args> + static T* MakeInstance(Thread* self, Args&&... args) { + return new (AllocateMemory(self, sizeof(T))) T(std::forward<Args>(args)...); + } +}; + +} // namespace lambda +} // namespace art + +#endif // ART_RUNTIME_LAMBDA_LEAKING_ALLOCATOR_H_ diff --git a/runtime/mirror/throwable.cc b/runtime/mirror/throwable.cc index e8633def48..e215994fb7 100644 --- a/runtime/mirror/throwable.cc +++ b/runtime/mirror/throwable.cc @@ -71,18 +71,14 @@ bool Throwable::IsCheckedException() { int32_t Throwable::GetStackDepth() { Object* stack_state = GetStackState(); - if (stack_state == nullptr) { + if (stack_state == nullptr || !stack_state->IsObjectArray()) { return -1; } - if (!stack_state->IsIntArray() && !stack_state->IsLongArray()) { - return -1; - } - mirror::PointerArray* method_trace = down_cast<mirror::PointerArray*>(stack_state->AsArray()); - int32_t array_len = method_trace->GetLength(); - // The format is [method pointers][pcs] so the depth is half the length (see method - // BuildInternalStackTraceVisitor::Init). - CHECK_EQ(array_len % 2, 0); - return array_len / 2; + mirror::ObjectArray<mirror::Object>* const trace = stack_state->AsObjectArray<mirror::Object>(); + const int32_t array_len = trace->GetLength(); + DCHECK_GT(array_len, 0); + // See method BuildInternalStackTraceVisitor::Init for the format. + return array_len - 1; } std::string Throwable::Dump() { @@ -95,18 +91,22 @@ std::string Throwable::Dump() { result += "\n"; Object* stack_state = GetStackState(); // check stack state isn't missing or corrupt - if (stack_state != nullptr && - (stack_state->IsIntArray() || stack_state->IsLongArray())) { + if (stack_state != nullptr && stack_state->IsObjectArray()) { + mirror::ObjectArray<mirror::Object>* object_array = + stack_state->AsObjectArray<mirror::Object>(); // Decode the internal stack trace into the depth and method trace - // Format is [method pointers][pcs] - auto* method_trace = down_cast<mirror::PointerArray*>(stack_state->AsArray()); - auto array_len = method_trace->GetLength(); + // See method BuildInternalStackTraceVisitor::Init for the format. + DCHECK_GT(object_array->GetLength(), 0); + mirror::Object* methods_and_dex_pcs = object_array->Get(0); + DCHECK(methods_and_dex_pcs->IsIntArray() || methods_and_dex_pcs->IsLongArray()); + mirror::PointerArray* method_trace = down_cast<mirror::PointerArray*>(methods_and_dex_pcs); + const int32_t array_len = method_trace->GetLength(); CHECK_EQ(array_len % 2, 0); const auto depth = array_len / 2; if (depth == 0) { result += "(Throwable with empty stack trace)"; } else { - auto ptr_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize(); + const size_t ptr_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize(); for (int32_t i = 0; i < depth; ++i) { ArtMethod* method = method_trace->GetElementPtrSize<ArtMethod*>(i, ptr_size); uintptr_t dex_pc = method_trace->GetElementPtrSize<uintptr_t>(i + depth, ptr_size); diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc index 3b84bfa026..4aebc2c35f 100644 --- a/runtime/native/dalvik_system_DexFile.cc +++ b/runtime/native/dalvik_system_DexFile.cc @@ -192,28 +192,38 @@ static jobject DexFile_openDexFileNative( } } -static void DexFile_closeDexFile(JNIEnv* env, jclass, jobject cookie) { - std::unique_ptr<std::vector<const DexFile*>> dex_files = ConvertJavaArrayToNative(env, cookie); - if (dex_files.get() == nullptr) { - DCHECK(env->ExceptionCheck()); - return; - } - +static jboolean DexFile_closeDexFile(JNIEnv* env, jclass, jobject cookie) { ScopedObjectAccess soa(env); + mirror::Object* dex_files_object = soa.Decode<mirror::Object*>(cookie); + if (dex_files_object == nullptr) { + ThrowNullPointerException("cookie == null"); + return JNI_FALSE; + } + mirror::LongArray* dex_files = dex_files_object->AsLongArray(); - // The Runtime currently never unloads classes, which means any registered - // dex files must be kept around forever in case they are used. We - // accomplish this here by explicitly leaking those dex files that are - // registered. - // - // TODO: The Runtime should support unloading of classes and freeing of the - // dex files for those unloaded classes rather than leaking dex files here. + // Delete dex files associated with this dalvik.system.DexFile since there should not be running + // code using it. dex_files is a vector due to multidex. ClassLinker* const class_linker = Runtime::Current()->GetClassLinker(); - for (const DexFile* dex_file : *dex_files) { + bool all_deleted = true; + for (int32_t i = 0, count = dex_files->GetLength(); i < count; ++i) { + auto* dex_file = reinterpret_cast<DexFile*>(dex_files->Get(i)); + if (dex_file == nullptr) { + continue; + } + // Only delete the dex file if the dex cache is not found to prevent runtime crashes if there + // are calls to DexFile.close while the ART DexFile is still in use. if (class_linker->FindDexCache(soa.Self(), *dex_file, true) == nullptr) { + // Clear the element in the array so that we can call close again. + dex_files->Set(i, 0); delete dex_file; + } else { + all_deleted = false; } } + + // TODO: Also unmap the OatFile for this dalvik.system.DexFile. + + return all_deleted ? JNI_TRUE : JNI_FALSE; } static jclass DexFile_defineClassNative(JNIEnv* env, jclass, jstring javaName, jobject javaLoader, @@ -379,7 +389,7 @@ static jboolean DexFile_isDexOptNeeded(JNIEnv* env, jclass, jstring javaFilename } static JNINativeMethod gMethods[] = { - NATIVE_METHOD(DexFile, closeDexFile, "(Ljava/lang/Object;)V"), + NATIVE_METHOD(DexFile, closeDexFile, "(Ljava/lang/Object;)Z"), NATIVE_METHOD(DexFile, defineClassNative, "(Ljava/lang/String;Ljava/lang/ClassLoader;Ljava/lang/Object;)Ljava/lang/Class;"), NATIVE_METHOD(DexFile, getClassNameList, "(Ljava/lang/Object;)[Ljava/lang/String;"), diff --git a/runtime/native/java_lang_Class.cc b/runtime/native/java_lang_Class.cc index 8fd6849363..5da15df25b 100644 --- a/runtime/native/java_lang_Class.cc +++ b/runtime/native/java_lang_Class.cc @@ -229,6 +229,65 @@ ALWAYS_INLINE static inline mirror::Field* GetDeclaredField( return nullptr; } +static mirror::Field* GetPublicFieldRecursive( + Thread* self, mirror::Class* clazz, mirror::String* name) + SHARED_REQUIRES(Locks::mutator_lock_) { + DCHECK(clazz != nullptr); + DCHECK(name != nullptr); + DCHECK(self != nullptr); + + StackHandleScope<1> hs(self); + MutableHandle<mirror::Class> h_clazz(hs.NewHandle(clazz)); + + // We search the current class, its direct interfaces then its superclass. + while (h_clazz.Get() != nullptr) { + mirror::Field* result = GetDeclaredField(self, h_clazz.Get(), name); + if ((result != nullptr) && (result->GetAccessFlags() & kAccPublic)) { + return result; + } else if (UNLIKELY(self->IsExceptionPending())) { + // Something went wrong. Bail out. + return nullptr; + } + + uint32_t num_direct_interfaces = h_clazz->NumDirectInterfaces(); + for (uint32_t i = 0; i < num_direct_interfaces; i++) { + mirror::Class *iface = mirror::Class::GetDirectInterface(self, h_clazz, i); + if (UNLIKELY(iface == nullptr)) { + self->AssertPendingException(); + return nullptr; + } + result = GetPublicFieldRecursive(self, iface, name); + if (result != nullptr) { + DCHECK(result->GetAccessFlags() & kAccPublic); + return result; + } else if (UNLIKELY(self->IsExceptionPending())) { + // Something went wrong. Bail out. + return nullptr; + } + } + + // We don't try the superclass if we are an interface. + if (h_clazz->IsInterface()) { + break; + } + + // Get the next class. + h_clazz.Assign(h_clazz->GetSuperClass()); + } + return nullptr; +} + +static jobject Class_getPublicFieldRecursive(JNIEnv* env, jobject javaThis, jstring name) { + ScopedFastNativeObjectAccess soa(env); + auto* name_string = soa.Decode<mirror::String*>(name); + if (UNLIKELY(name_string == nullptr)) { + ThrowNullPointerException("name == null"); + return nullptr; + } + return soa.AddLocalReference<jobject>( + GetPublicFieldRecursive(soa.Self(), DecodeClass(soa, javaThis), name_string)); +} + static jobject Class_getDeclaredFieldInternal(JNIEnv* env, jobject javaThis, jstring name) { ScopedFastNativeObjectAccess soa(env); auto* name_string = soa.Decode<mirror::String*>(name); @@ -678,6 +737,7 @@ static JNINativeMethod gMethods[] = { "!([Ljava/lang/Class;)Ljava/lang/reflect/Constructor;"), NATIVE_METHOD(Class, getDeclaredConstructorsInternal, "!(Z)[Ljava/lang/reflect/Constructor;"), NATIVE_METHOD(Class, getDeclaredField, "!(Ljava/lang/String;)Ljava/lang/reflect/Field;"), + NATIVE_METHOD(Class, getPublicFieldRecursive, "!(Ljava/lang/String;)Ljava/lang/reflect/Field;"), NATIVE_METHOD(Class, getDeclaredFieldInternal, "!(Ljava/lang/String;)Ljava/lang/reflect/Field;"), NATIVE_METHOD(Class, getDeclaredFields, "!()[Ljava/lang/reflect/Field;"), NATIVE_METHOD(Class, getDeclaredFieldsUnchecked, "!(Z)[Ljava/lang/reflect/Field;"), diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc index 5c13e13f90..63f43cf3b2 100644 --- a/runtime/quick_exception_handler.cc +++ b/runtime/quick_exception_handler.cc @@ -20,6 +20,7 @@ #include "art_method-inl.h" #include "dex_instruction.h" #include "entrypoints/entrypoint_utils.h" +#include "entrypoints/quick/quick_entrypoints_enum.h" #include "entrypoints/runtime_asm_entrypoints.h" #include "handle_scope-inl.h" #include "mirror/class-inl.h" @@ -36,8 +37,9 @@ QuickExceptionHandler::QuickExceptionHandler(Thread* self, bool is_deoptimizatio : self_(self), context_(self->GetLongJumpContext()), is_deoptimization_(is_deoptimization), method_tracing_active_(is_deoptimization || Runtime::Current()->GetInstrumentation()->AreExitStubsInstalled()), - handler_quick_frame_(nullptr), handler_quick_frame_pc_(0), handler_method_(nullptr), - handler_dex_pc_(0), clear_exception_(false), handler_frame_depth_(kInvalidFrameDepth) { + handler_quick_frame_(nullptr), handler_quick_frame_pc_(0), handler_quick_arg0_(0), + handler_method_(nullptr), handler_dex_pc_(0), clear_exception_(false), + handler_frame_depth_(kInvalidFrameDepth) { } // Finds catch handler. @@ -260,19 +262,25 @@ void QuickExceptionHandler::SetCatchEnvironmentForOptimizedHandler(StackVisitor* // Prepares deoptimization. class DeoptimizeStackVisitor FINAL : public StackVisitor { public: - DeoptimizeStackVisitor(Thread* self, Context* context, QuickExceptionHandler* exception_handler) + DeoptimizeStackVisitor(Thread* self, + Context* context, + QuickExceptionHandler* exception_handler, + bool single_frame) SHARED_REQUIRES(Locks::mutator_lock_) : StackVisitor(self, context, StackVisitor::StackWalkKind::kIncludeInlinedFrames), exception_handler_(exception_handler), prev_shadow_frame_(nullptr), - stacked_shadow_frame_pushed_(false) { + stacked_shadow_frame_pushed_(false), + single_frame_deopt_(single_frame), + single_frame_done_(false) { } bool VisitFrame() OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) { exception_handler_->SetHandlerFrameDepth(GetFrameDepth()); ArtMethod* method = GetMethod(); - if (method == nullptr) { - // This is the upcall, we remember the frame and last pc so that we may long jump to them. + if (method == nullptr || single_frame_done_) { + // This is the upcall (or the next full frame in single-frame deopt), we remember the frame + // and last pc so that we may long jump to them. exception_handler_->SetHandlerQuickFramePc(GetCurrentQuickFramePc()); exception_handler_->SetHandlerQuickFrame(GetCurrentQuickFrame()); if (!stacked_shadow_frame_pushed_) { @@ -295,7 +303,13 @@ class DeoptimizeStackVisitor FINAL : public StackVisitor { CHECK_EQ(GetFrameDepth(), 1U); return true; } else { - return HandleDeoptimization(method); + HandleDeoptimization(method); + if (single_frame_deopt_ && !IsInInlinedFrame()) { + // Single-frame deopt ends at the first non-inlined frame and needs to store that method. + exception_handler_->SetHandlerQuickArg0(reinterpret_cast<uintptr_t>(method)); + single_frame_done_ = true; + } + return true; } } @@ -304,7 +318,7 @@ class DeoptimizeStackVisitor FINAL : public StackVisitor { return static_cast<VRegKind>(kinds.at(reg * 2)); } - bool HandleDeoptimization(ArtMethod* m) SHARED_REQUIRES(Locks::mutator_lock_) { + void HandleDeoptimization(ArtMethod* m) SHARED_REQUIRES(Locks::mutator_lock_) { const DexFile::CodeItem* code_item = m->GetCodeItem(); CHECK(code_item != nullptr) << "No code item for " << PrettyMethod(m); uint16_t num_regs = code_item->registers_size_; @@ -448,16 +462,20 @@ class DeoptimizeStackVisitor FINAL : public StackVisitor { // Will be popped after the long jump after DeoptimizeStack(), // right before interpreter::EnterInterpreterFromDeoptimize(). stacked_shadow_frame_pushed_ = true; - GetThread()->PushStackedShadowFrame(new_frame, - StackedShadowFrameType::kDeoptimizationShadowFrame); + GetThread()->PushStackedShadowFrame( + new_frame, + single_frame_deopt_ + ? StackedShadowFrameType::kSingleFrameDeoptimizationShadowFrame + : StackedShadowFrameType::kDeoptimizationShadowFrame); } prev_shadow_frame_ = new_frame; - return true; } QuickExceptionHandler* const exception_handler_; ShadowFrame* prev_shadow_frame_; bool stacked_shadow_frame_pushed_; + const bool single_frame_deopt_; + bool single_frame_done_; DISALLOW_COPY_AND_ASSIGN(DeoptimizeStackVisitor); }; @@ -468,13 +486,46 @@ void QuickExceptionHandler::DeoptimizeStack() { self_->DumpStack(LOG(INFO) << "Deoptimizing: "); } - DeoptimizeStackVisitor visitor(self_, context_, this); + DeoptimizeStackVisitor visitor(self_, context_, this, false); visitor.WalkStack(true); // Restore deoptimization exception self_->SetException(Thread::GetDeoptimizationException()); } +void QuickExceptionHandler::DeoptimizeSingleFrame() { + DCHECK(is_deoptimization_); + + if (VLOG_IS_ON(deopt) || kDebugExceptionDelivery) { + LOG(INFO) << "Single-frame deopting:"; + DumpFramesWithType(self_, true); + } + + DeoptimizeStackVisitor visitor(self_, context_, this, true); + visitor.WalkStack(true); + + // PC needs to be of the quick-to-interpreter bridge. + int32_t offset; + #ifdef __LP64__ + offset = GetThreadOffset<8>(kQuickQuickToInterpreterBridge).Int32Value(); + #else + offset = GetThreadOffset<4>(kQuickQuickToInterpreterBridge).Int32Value(); + #endif + handler_quick_frame_pc_ = *reinterpret_cast<uintptr_t*>( + reinterpret_cast<uint8_t*>(self_) + offset); +} + +void QuickExceptionHandler::DeoptimizeSingleFrameArchDependentFixup() { + // Architecture-dependent work. This is to get the LR right for x86 and x86-64. + + if (kRuntimeISA == InstructionSet::kX86 || kRuntimeISA == InstructionSet::kX86_64) { + // On x86, the return address is on the stack, so just reuse it. Otherwise we would have to + // change how longjump works. + handler_quick_frame_ = reinterpret_cast<ArtMethod**>( + reinterpret_cast<uintptr_t>(handler_quick_frame_) - sizeof(void*)); + } +} + // Unwinds all instrumentation stack frame prior to catch handler or upcall. class InstrumentationStackVisitor : public StackVisitor { public: @@ -529,15 +580,67 @@ void QuickExceptionHandler::UpdateInstrumentationStack() { } } -void QuickExceptionHandler::DoLongJump() { +void QuickExceptionHandler::DoLongJump(bool smash_caller_saves) { // Place context back on thread so it will be available when we continue. self_->ReleaseLongJumpContext(context_); context_->SetSP(reinterpret_cast<uintptr_t>(handler_quick_frame_)); CHECK_NE(handler_quick_frame_pc_, 0u); context_->SetPC(handler_quick_frame_pc_); - context_->SmashCallerSaves(); + context_->SetArg0(handler_quick_arg0_); + if (smash_caller_saves) { + context_->SmashCallerSaves(); + } context_->DoLongJump(); UNREACHABLE(); } +// Prints out methods with their type of frame. +class DumpFramesWithTypeStackVisitor FINAL : public StackVisitor { + public: + DumpFramesWithTypeStackVisitor(Thread* self, bool show_details = false) + SHARED_REQUIRES(Locks::mutator_lock_) + : StackVisitor(self, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames), + show_details_(show_details) {} + + bool VisitFrame() OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) { + ArtMethod* method = GetMethod(); + if (show_details_) { + LOG(INFO) << "|> pc = " << std::hex << GetCurrentQuickFramePc(); + LOG(INFO) << "|> addr = " << std::hex << reinterpret_cast<uintptr_t>(GetCurrentQuickFrame()); + if (GetCurrentQuickFrame() != nullptr && method != nullptr) { + LOG(INFO) << "|> ret = " << std::hex << GetReturnPc(); + } + } + if (method == nullptr) { + // Transition, do go on, we want to unwind over bridges, all the way. + if (show_details_) { + LOG(INFO) << "N <transition>"; + } + return true; + } else if (method->IsRuntimeMethod()) { + if (show_details_) { + LOG(INFO) << "R " << PrettyMethod(method, true); + } + return true; + } else { + bool is_shadow = GetCurrentShadowFrame() != nullptr; + LOG(INFO) << (is_shadow ? "S" : "Q") + << ((!is_shadow && IsInInlinedFrame()) ? "i" : " ") + << " " + << PrettyMethod(method, true); + return true; // Go on. + } + } + + private: + bool show_details_; + + DISALLOW_COPY_AND_ASSIGN(DumpFramesWithTypeStackVisitor); +}; + +void QuickExceptionHandler::DumpFramesWithType(Thread* self, bool details) { + DumpFramesWithTypeStackVisitor visitor(self, details); + visitor.WalkStack(true); +} + } // namespace art diff --git a/runtime/quick_exception_handler.h b/runtime/quick_exception_handler.h index 2e05c7e1e5..89d6a25128 100644 --- a/runtime/quick_exception_handler.h +++ b/runtime/quick_exception_handler.h @@ -49,6 +49,9 @@ class QuickExceptionHandler { // Deoptimize the stack to the upcall. For every compiled frame, we create a "copy" // shadow frame that will be executed with the interpreter. void DeoptimizeStack() SHARED_REQUIRES(Locks::mutator_lock_); + void DeoptimizeSingleFrame() SHARED_REQUIRES(Locks::mutator_lock_); + void DeoptimizeSingleFrameArchDependentFixup() SHARED_REQUIRES(Locks::mutator_lock_); + // Update the instrumentation stack by removing all methods that will be unwound // by the exception being thrown. void UpdateInstrumentationStack() SHARED_REQUIRES(Locks::mutator_lock_); @@ -58,7 +61,7 @@ class QuickExceptionHandler { SHARED_REQUIRES(Locks::mutator_lock_); // Long jump either to a catch handler or to the upcall. - NO_RETURN void DoLongJump() SHARED_REQUIRES(Locks::mutator_lock_); + NO_RETURN void DoLongJump(bool smash_caller_saves = true) SHARED_REQUIRES(Locks::mutator_lock_); void SetHandlerQuickFrame(ArtMethod** handler_quick_frame) { handler_quick_frame_ = handler_quick_frame; @@ -68,6 +71,10 @@ class QuickExceptionHandler { handler_quick_frame_pc_ = handler_quick_frame_pc; } + void SetHandlerQuickArg0(uintptr_t handler_quick_arg0) { + handler_quick_arg0_ = handler_quick_arg0; + } + ArtMethod* GetHandlerMethod() const { return handler_method_; } @@ -92,6 +99,11 @@ class QuickExceptionHandler { handler_frame_depth_ = frame_depth; } + // Walk the stack frames of the given thread, printing out non-runtime methods with their types + // of frames. Helps to verify that single-frame deopt really only deopted one frame. + static void DumpFramesWithType(Thread* self, bool details = false) + SHARED_REQUIRES(Locks::mutator_lock_); + private: Thread* const self_; Context* const context_; @@ -103,6 +115,8 @@ class QuickExceptionHandler { ArtMethod** handler_quick_frame_; // PC to branch to for the handler. uintptr_t handler_quick_frame_pc_; + // The value for argument 0. + uintptr_t handler_quick_arg0_; // The handler method to report to the debugger. ArtMethod* handler_method_; // The handler's dex PC, zero implies an uncaught exception. diff --git a/runtime/read_barrier-inl.h b/runtime/read_barrier-inl.h index daae401b46..85ac4aab96 100644 --- a/runtime/read_barrier-inl.h +++ b/runtime/read_barrier-inl.h @@ -62,8 +62,10 @@ inline MirrorType* ReadBarrier::Barrier( if (heap != nullptr && heap->GetReadBarrierTable()->IsSet(old_ref)) { ref = reinterpret_cast<MirrorType*>(Mark(old_ref)); // Update the field atomically. This may fail if mutator updates before us, but it's ok. - obj->CasFieldStrongSequentiallyConsistentObjectWithoutWriteBarrier<false, false>( - offset, old_ref, ref); + if (ref != old_ref) { + obj->CasFieldStrongSequentiallyConsistentObjectWithoutWriteBarrier<false, false>( + offset, old_ref, ref); + } } AssertToSpaceInvariant(obj, offset, ref); return ref; @@ -90,17 +92,17 @@ inline MirrorType* ReadBarrier::BarrierForRoot(MirrorType** root, // To be implemented. return ref; } else if (with_read_barrier && kUseTableLookupReadBarrier) { - if (kMaybeDuringStartup && IsDuringStartup()) { - // During startup, the heap may not be initialized yet. Just - // return the given ref. - return ref; - } - if (Runtime::Current()->GetHeap()->GetReadBarrierTable()->IsSet(ref)) { + Thread* self = Thread::Current(); + if (self != nullptr && + self->GetIsGcMarking() && + Runtime::Current()->GetHeap()->GetReadBarrierTable()->IsSet(ref)) { MirrorType* old_ref = ref; ref = reinterpret_cast<MirrorType*>(Mark(old_ref)); // Update the field atomically. This may fail if mutator updates before us, but it's ok. - Atomic<mirror::Object*>* atomic_root = reinterpret_cast<Atomic<mirror::Object*>*>(root); - atomic_root->CompareExchangeStrongSequentiallyConsistent(old_ref, ref); + if (ref != old_ref) { + Atomic<mirror::Object*>* atomic_root = reinterpret_cast<Atomic<mirror::Object*>*>(root); + atomic_root->CompareExchangeStrongSequentiallyConsistent(old_ref, ref); + } } AssertToSpaceInvariant(gc_root_source, ref); return ref; @@ -127,19 +129,19 @@ inline MirrorType* ReadBarrier::BarrierForRoot(mirror::CompressedReference<Mirro // To be implemented. return ref; } else if (with_read_barrier && kUseTableLookupReadBarrier) { - if (kMaybeDuringStartup && IsDuringStartup()) { - // During startup, the heap may not be initialized yet. Just - // return the given ref. - return ref; - } - if (Runtime::Current()->GetHeap()->GetReadBarrierTable()->IsSet(ref)) { + Thread* self = Thread::Current(); + if (self != nullptr && + self->GetIsGcMarking() && + Runtime::Current()->GetHeap()->GetReadBarrierTable()->IsSet(ref)) { auto old_ref = mirror::CompressedReference<MirrorType>::FromMirrorPtr(ref); ref = reinterpret_cast<MirrorType*>(Mark(ref)); auto new_ref = mirror::CompressedReference<MirrorType>::FromMirrorPtr(ref); // Update the field atomically. This may fail if mutator updates before us, but it's ok. - auto* atomic_root = - reinterpret_cast<Atomic<mirror::CompressedReference<MirrorType>>*>(root); - atomic_root->CompareExchangeStrongSequentiallyConsistent(old_ref, new_ref); + if (new_ref.AsMirrorPtr() != old_ref.AsMirrorPtr()) { + auto* atomic_root = + reinterpret_cast<Atomic<mirror::CompressedReference<MirrorType>>*>(root); + atomic_root->CompareExchangeStrongSequentiallyConsistent(old_ref, new_ref); + } } AssertToSpaceInvariant(gc_root_source, ref); return ref; diff --git a/runtime/read_barrier_c.h b/runtime/read_barrier_c.h index 710c21f03e..8e5b1872f2 100644 --- a/runtime/read_barrier_c.h +++ b/runtime/read_barrier_c.h @@ -26,10 +26,16 @@ // table-lookup read barriers. #ifdef ART_USE_READ_BARRIER +#if ART_READ_BARRIER_TYPE_IS_BAKER #define USE_BAKER_READ_BARRIER -// #define USE_BROOKS_READ_BARRIER -// #define USE_TABLE_LOOKUP_READ_BARRIER +#elif ART_READ_BARRIER_TYPE_IS_BROOKS +#define USE_BROOKS_READ_BARRIER +#elif ART_READ_BARRIER_TYPE_IS_TABLELOOKUP +#define USE_TABLE_LOOKUP_READ_BARRIER +#else +#error "ART read barrier type must be set" #endif +#endif // ART_USE_READ_BARRIER #ifdef ART_HEAP_POISONING #define USE_HEAP_POISONING diff --git a/runtime/runtime.cc b/runtime/runtime.cc index 8cba1a91d7..1f447d076b 100644 --- a/runtime/runtime.cc +++ b/runtime/runtime.cc @@ -1790,6 +1790,9 @@ bool Runtime::IsVerificationSoftFail() const { } LinearAlloc* Runtime::CreateLinearAlloc() { + // For 64 bit compilers, it needs to be in low 4GB in the case where we are cross compiling for a + // 32 bit target. In this case, we have 32 bit pointers in the dex cache arrays which can't hold + // when we have 64 bit ArtMethod pointers. return (IsAotCompiler() && Is64BitInstructionSet(kRuntimeISA)) ? new LinearAlloc(low_4gb_arena_pool_.get()) : new LinearAlloc(arena_pool_.get()); diff --git a/runtime/stack.cc b/runtime/stack.cc index 7f72f8ab61..1d21a6494a 100644 --- a/runtime/stack.cc +++ b/runtime/stack.cc @@ -110,7 +110,7 @@ StackVisitor::StackVisitor(Thread* thread, } InlineInfo StackVisitor::GetCurrentInlineInfo() const { - ArtMethod* outer_method = *GetCurrentQuickFrame(); + ArtMethod* outer_method = GetOuterMethod(); uint32_t native_pc_offset = outer_method->NativeQuickPcOffset(cur_quick_frame_pc_); CodeInfo code_info = outer_method->GetOptimizedCodeInfo(); StackMapEncoding encoding = code_info.ExtractEncoding(); @@ -194,11 +194,12 @@ size_t StackVisitor::GetNativePcOffset() const { } bool StackVisitor::IsReferenceVReg(ArtMethod* m, uint16_t vreg) { + DCHECK_EQ(m, GetMethod()); // Process register map (which native and runtime methods don't have) if (m->IsNative() || m->IsRuntimeMethod() || m->IsProxyMethod()) { return false; } - if (m->IsOptimized(sizeof(void*))) { + if (GetOuterMethod()->IsOptimized(sizeof(void*))) { return true; // TODO: Implement. } const uint8_t* native_gc_map = m->GetNativeGcMap(sizeof(void*)); @@ -251,7 +252,7 @@ bool StackVisitor::GetVReg(ArtMethod* m, uint16_t vreg, VRegKind kind, uint32_t* if (GetVRegFromDebuggerShadowFrame(vreg, kind, val)) { return true; } - if (m->IsOptimized(sizeof(void*))) { + if (GetOuterMethod()->IsOptimized(sizeof(void*))) { return GetVRegFromOptimizedCode(m, vreg, kind, val); } else { return GetVRegFromQuickCode(m, vreg, kind, val); @@ -288,15 +289,15 @@ bool StackVisitor::GetVRegFromQuickCode(ArtMethod* m, uint16_t vreg, VRegKind ki bool StackVisitor::GetVRegFromOptimizedCode(ArtMethod* m, uint16_t vreg, VRegKind kind, uint32_t* val) const { + ArtMethod* outer_method = GetOuterMethod(); + const void* code_pointer = outer_method->GetQuickOatCodePointer(sizeof(void*)); + DCHECK(code_pointer != nullptr); DCHECK_EQ(m, GetMethod()); const DexFile::CodeItem* code_item = m->GetCodeItem(); DCHECK(code_item != nullptr) << PrettyMethod(m); // Can't be null or how would we compile // its instructions? uint16_t number_of_dex_registers = code_item->registers_size_; DCHECK_LT(vreg, code_item->registers_size_); - ArtMethod* outer_method = *GetCurrentQuickFrame(); - const void* code_pointer = outer_method->GetQuickOatCodePointer(sizeof(void*)); - DCHECK(code_pointer != nullptr); CodeInfo code_info = outer_method->GetOptimizedCodeInfo(); StackMapEncoding encoding = code_info.ExtractEncoding(); @@ -405,7 +406,7 @@ bool StackVisitor::GetVRegPair(ArtMethod* m, uint16_t vreg, VRegKind kind_lo, if (cur_quick_frame_ != nullptr) { DCHECK(context_ != nullptr); // You can't reliably read registers without a context. DCHECK(m == GetMethod()); - if (m->IsOptimized(sizeof(void*))) { + if (GetOuterMethod()->IsOptimized(sizeof(void*))) { return GetVRegPairFromOptimizedCode(m, vreg, kind_lo, kind_hi, val); } else { return GetVRegPairFromQuickCode(m, vreg, kind_lo, kind_hi, val); @@ -481,7 +482,7 @@ bool StackVisitor::SetVReg(ArtMethod* m, uint16_t vreg, uint32_t new_value, if (cur_quick_frame_ != nullptr) { DCHECK(context_ != nullptr); // You can't reliably write registers without a context. DCHECK(m == GetMethod()); - if (m->IsOptimized(sizeof(void*))) { + if (GetOuterMethod()->IsOptimized(sizeof(void*))) { return false; } else { return SetVRegFromQuickCode(m, vreg, new_value, kind); @@ -590,7 +591,7 @@ bool StackVisitor::SetVRegPair(ArtMethod* m, uint16_t vreg, uint64_t new_value, if (cur_quick_frame_ != nullptr) { DCHECK(context_ != nullptr); // You can't reliably write registers without a context. DCHECK(m == GetMethod()); - if (m->IsOptimized(sizeof(void*))) { + if (GetOuterMethod()->IsOptimized(sizeof(void*))) { return false; } else { return SetVRegPairFromQuickCode(m, vreg, new_value, kind_lo, kind_hi); @@ -724,14 +725,14 @@ void StackVisitor::SetFPR(uint32_t reg, uintptr_t value) { uintptr_t StackVisitor::GetReturnPc() const { uint8_t* sp = reinterpret_cast<uint8_t*>(GetCurrentQuickFrame()); DCHECK(sp != nullptr); - uint8_t* pc_addr = sp + GetMethod()->GetReturnPcOffset().SizeValue(); + uint8_t* pc_addr = sp + GetOuterMethod()->GetReturnPcOffset().SizeValue(); return *reinterpret_cast<uintptr_t*>(pc_addr); } void StackVisitor::SetReturnPc(uintptr_t new_ret_pc) { uint8_t* sp = reinterpret_cast<uint8_t*>(GetCurrentQuickFrame()); CHECK(sp != nullptr); - uint8_t* pc_addr = sp + GetMethod()->GetReturnPcOffset().SizeValue(); + uint8_t* pc_addr = sp + GetOuterMethod()->GetReturnPcOffset().SizeValue(); *reinterpret_cast<uintptr_t*>(pc_addr) = new_ret_pc; } diff --git a/runtime/stack.h b/runtime/stack.h index b805239836..31acf0eb64 100644 --- a/runtime/stack.h +++ b/runtime/stack.h @@ -62,6 +62,10 @@ template<class MirrorType> class MANAGED StackReference : public mirror::CompressedReference<MirrorType> { }; +// Forward declaration. Just calls the destructor. +struct ShadowFrameDeleter; +using ShadowFrameAllocaUniquePtr = std::unique_ptr<ShadowFrame, ShadowFrameDeleter>; + // ShadowFrame has 2 possible layouts: // - interpreter - separate VRegs and reference arrays. References are in the reference array. // - JNI - just VRegs, but where every VReg holds a reference. @@ -77,21 +81,26 @@ class ShadowFrame { static ShadowFrame* CreateDeoptimizedFrame(uint32_t num_vregs, ShadowFrame* link, ArtMethod* method, uint32_t dex_pc) { uint8_t* memory = new uint8_t[ComputeSize(num_vregs)]; - return Create(num_vregs, link, method, dex_pc, memory); + return CreateShadowFrameImpl(num_vregs, link, method, dex_pc, memory); } // Delete a ShadowFrame allocated on the heap for deoptimization. static void DeleteDeoptimizedFrame(ShadowFrame* sf) { + sf->~ShadowFrame(); // Explicitly destruct. uint8_t* memory = reinterpret_cast<uint8_t*>(sf); delete[] memory; } - // Create ShadowFrame for interpreter using provided memory. - static ShadowFrame* Create(uint32_t num_vregs, ShadowFrame* link, - ArtMethod* method, uint32_t dex_pc, void* memory) { - ShadowFrame* sf = new (memory) ShadowFrame(num_vregs, link, method, dex_pc, true); - return sf; - } + // Create a shadow frame in a fresh alloca. This needs to be in the context of the caller. + // Inlining doesn't work, the compiler will still undo the alloca. So this needs to be a macro. +#define CREATE_SHADOW_FRAME(num_vregs, link, method, dex_pc) ({ \ + size_t frame_size = ShadowFrame::ComputeSize(num_vregs); \ + void* alloca_mem = alloca(frame_size); \ + ShadowFrameAllocaUniquePtr( \ + ShadowFrame::CreateShadowFrameImpl((num_vregs), (link), (method), (dex_pc), \ + (alloca_mem))); \ + }) + ~ShadowFrame() {} // TODO(iam): Clean references array up since they're always there, @@ -283,6 +292,15 @@ class ShadowFrame { return OFFSETOF_MEMBER(ShadowFrame, vregs_); } + // Create ShadowFrame for interpreter using provided memory. + static ShadowFrame* CreateShadowFrameImpl(uint32_t num_vregs, + ShadowFrame* link, + ArtMethod* method, + uint32_t dex_pc, + void* memory) { + return new (memory) ShadowFrame(num_vregs, link, method, dex_pc, true); + } + private: ShadowFrame(uint32_t num_vregs, ShadowFrame* link, ArtMethod* method, uint32_t dex_pc, bool has_reference_array) @@ -326,6 +344,14 @@ class ShadowFrame { DISALLOW_IMPLICIT_CONSTRUCTORS(ShadowFrame); }; +struct ShadowFrameDeleter { + inline void operator()(ShadowFrame* frame) { + if (frame != nullptr) { + frame->~ShadowFrame(); + } + } +}; + class JavaFrameRootInfo : public RootInfo { public: JavaFrameRootInfo(uint32_t thread_id, const StackVisitor* stack_visitor, size_t vreg) @@ -447,6 +473,10 @@ class StackVisitor { ArtMethod* GetMethod() const SHARED_REQUIRES(Locks::mutator_lock_); + ArtMethod* GetOuterMethod() const { + return *GetCurrentQuickFrame(); + } + bool IsShadowFrame() const { return cur_shadow_frame_ != nullptr; } diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h index 8bf241b66d..f5d20bd608 100644 --- a/runtime/thread-inl.h +++ b/runtime/thread-inl.h @@ -118,11 +118,8 @@ inline void Thread::AssertThreadSuspensionIsAllowable(bool check_locks) const { } } -inline void Thread::TransitionFromRunnableToSuspended(ThreadState new_state) { - AssertThreadSuspensionIsAllowable(); +inline void Thread::TransitionToSuspendedAndRunCheckpoints(ThreadState new_state) { DCHECK_NE(new_state, kRunnable); - DCHECK_EQ(this, Thread::Current()); - // Change to non-runnable state, thereby appearing suspended to the system. DCHECK_EQ(GetState(), kRunnable); union StateAndFlags old_state_and_flags; union StateAndFlags new_state_and_flags; @@ -145,12 +142,9 @@ inline void Thread::TransitionFromRunnableToSuspended(ThreadState new_state) { break; } } +} - // Change to non-runnable state, thereby appearing suspended to the system. - // Mark the release of the share of the mutator_lock_. - Locks::mutator_lock_->TransitionFromRunnableToSuspended(this); - - // Once suspended - check the active suspend barrier flag +inline void Thread::PassActiveSuspendBarriers() { while (true) { uint16_t current_flags = tls32_.state_and_flags.as_struct.flags; if (LIKELY((current_flags & (kCheckpointRequest | kActiveSuspendBarrier)) == 0)) { @@ -159,11 +153,22 @@ inline void Thread::TransitionFromRunnableToSuspended(ThreadState new_state) { PassActiveSuspendBarriers(this); } else { // Impossible - LOG(FATAL) << "Fatal, thread transited into suspended without running the checkpoint"; + LOG(FATAL) << "Fatal, thread transitioned into suspended without running the checkpoint"; } } } +inline void Thread::TransitionFromRunnableToSuspended(ThreadState new_state) { + AssertThreadSuspensionIsAllowable(); + DCHECK_EQ(this, Thread::Current()); + // Change to non-runnable state, thereby appearing suspended to the system. + TransitionToSuspendedAndRunCheckpoints(new_state); + // Mark the release of the share of the mutator_lock_. + Locks::mutator_lock_->TransitionFromRunnableToSuspended(this); + // Once suspended - check the active suspend barrier flag + PassActiveSuspendBarriers(); +} + inline ThreadState Thread::TransitionFromSuspendedToRunnable() { union StateAndFlags old_state_and_flags; old_state_and_flags.as_int = tls32_.state_and_flags.as_int; @@ -191,7 +196,9 @@ inline ThreadState Thread::TransitionFromSuspendedToRunnable() { PassActiveSuspendBarriers(this); } else if ((old_state_and_flags.as_struct.flags & kCheckpointRequest) != 0) { // Impossible - LOG(FATAL) << "Fatal, wrong checkpoint flag"; + LOG(FATAL) << "Transitioning to runnable with checkpoint flag, " + << " flags=" << old_state_and_flags.as_struct.flags + << " state=" << old_state_and_flags.as_struct.state; } else if ((old_state_and_flags.as_struct.flags & kSuspendRequest) != 0) { // Wait while our suspend count is non-zero. MutexLock mu(this, *Locks::thread_suspend_count_lock_); diff --git a/runtime/thread.cc b/runtime/thread.cc index 5bf895ef80..65f71efc06 100644 --- a/runtime/thread.cc +++ b/runtime/thread.cc @@ -250,10 +250,16 @@ void Thread::PushStackedShadowFrame(ShadowFrame* sf, StackedShadowFrameType type tlsPtr_.stacked_shadow_frame_record = record; } -ShadowFrame* Thread::PopStackedShadowFrame(StackedShadowFrameType type) { +ShadowFrame* Thread::PopStackedShadowFrame(StackedShadowFrameType type, bool must_be_present) { StackedShadowFrameRecord* record = tlsPtr_.stacked_shadow_frame_record; - DCHECK(record != nullptr); - DCHECK_EQ(record->GetType(), type); + if (must_be_present) { + DCHECK(record != nullptr); + DCHECK_EQ(record->GetType(), type); + } else { + if (record == nullptr || record->GetType() != type) { + return nullptr; + } + } tlsPtr_.stacked_shadow_frame_record = record->GetLink(); ShadowFrame* shadow_frame = record->GetShadowFrame(); delete record; @@ -1960,15 +1966,32 @@ class BuildInternalStackTraceVisitor : public StackVisitor { pointer_size_(Runtime::Current()->GetClassLinker()->GetImagePointerSize()) {} bool Init(int depth) SHARED_REQUIRES(Locks::mutator_lock_) ACQUIRE(Roles::uninterruptible_) { - // Allocate method trace with format [method pointers][pcs]. - auto* cl = Runtime::Current()->GetClassLinker(); - trace_ = cl->AllocPointerArray(self_, depth * 2); + // Allocate method trace as an object array where the first element is a pointer array that + // contains the ArtMethod pointers and dex PCs. The rest of the elements are the declaring + // class of the ArtMethod pointers. + ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); + StackHandleScope<1> hs(self_); + mirror::Class* array_class = class_linker->GetClassRoot(ClassLinker::kObjectArrayClass); + // The first element is the methods and dex pc array, the other elements are declaring classes + // for the methods to ensure classes in the stack trace don't get unloaded. + Handle<mirror::ObjectArray<mirror::Object>> trace( + hs.NewHandle( + mirror::ObjectArray<mirror::Object>::Alloc(hs.Self(), array_class, depth + 1))); + if (trace.Get() == nullptr) { + // Acquire uninterruptible_ in all paths. + self_->StartAssertNoThreadSuspension("Building internal stack trace"); + self_->AssertPendingOOMException(); + return false; + } + mirror::PointerArray* methods_and_pcs = class_linker->AllocPointerArray(self_, depth * 2); const char* last_no_suspend_cause = self_->StartAssertNoThreadSuspension("Building internal stack trace"); - if (trace_ == nullptr) { + if (methods_and_pcs == nullptr) { self_->AssertPendingOOMException(); return false; } + trace->Set(0, methods_and_pcs); + trace_ = trace.Get(); // If We are called from native, use non-transactional mode. CHECK(last_no_suspend_cause == nullptr) << last_no_suspend_cause; return true; @@ -1990,16 +2013,24 @@ class BuildInternalStackTraceVisitor : public StackVisitor { if (m->IsRuntimeMethod()) { return true; // Ignore runtime frames (in particular callee save). } - trace_->SetElementPtrSize<kTransactionActive>( - count_, m, pointer_size_); - trace_->SetElementPtrSize<kTransactionActive>( - trace_->GetLength() / 2 + count_, m->IsProxyMethod() ? DexFile::kDexNoIndex : GetDexPc(), - pointer_size_); + mirror::PointerArray* trace_methods_and_pcs = GetTraceMethodsAndPCs(); + trace_methods_and_pcs->SetElementPtrSize<kTransactionActive>(count_, m, pointer_size_); + trace_methods_and_pcs->SetElementPtrSize<kTransactionActive>( + trace_methods_and_pcs->GetLength() / 2 + count_, + m->IsProxyMethod() ? DexFile::kDexNoIndex : GetDexPc(), + pointer_size_); + // Save the declaring class of the method to ensure that the declaring classes of the methods + // do not get unloaded while the stack trace is live. + trace_->Set(count_ + 1, m->GetDeclaringClass()); ++count_; return true; } - mirror::PointerArray* GetInternalStackTrace() const { + mirror::PointerArray* GetTraceMethodsAndPCs() const SHARED_REQUIRES(Locks::mutator_lock_) { + return down_cast<mirror::PointerArray*>(trace_->Get(0)); + } + + mirror::ObjectArray<mirror::Object>* GetInternalStackTrace() const { return trace_; } @@ -2009,8 +2040,11 @@ class BuildInternalStackTraceVisitor : public StackVisitor { int32_t skip_depth_; // Current position down stack trace. uint32_t count_; - // An array of the methods on the stack, the last entries are the dex PCs. - mirror::PointerArray* trace_; + // An object array where the first element is a pointer array that contains the ArtMethod + // pointers on the stack and dex PCs. The rest of the elements are the declaring + // class of the ArtMethod pointers. trace_[i+1] contains the declaring class of the ArtMethod of + // the i'th frame. + mirror::ObjectArray<mirror::Object>* trace_; // For cross compilation. const size_t pointer_size_; @@ -2033,11 +2067,12 @@ jobject Thread::CreateInternalStackTrace(const ScopedObjectAccessAlreadyRunnable return nullptr; // Allocation failed. } build_trace_visitor.WalkStack(); - mirror::PointerArray* trace = build_trace_visitor.GetInternalStackTrace(); + mirror::ObjectArray<mirror::Object>* trace = build_trace_visitor.GetInternalStackTrace(); if (kIsDebugBuild) { - // Second half is dex PCs. - for (uint32_t i = 0; i < static_cast<uint32_t>(trace->GetLength() / 2); ++i) { - auto* method = trace->GetElementPtrSize<ArtMethod*>( + mirror::PointerArray* trace_methods = build_trace_visitor.GetTraceMethodsAndPCs(); + // Second half of trace_methods is dex PCs. + for (uint32_t i = 0; i < static_cast<uint32_t>(trace_methods->GetLength() / 2); ++i) { + auto* method = trace_methods->GetElementPtrSize<ArtMethod*>( i, Runtime::Current()->GetClassLinker()->GetImagePointerSize()); CHECK(method != nullptr); } @@ -2056,12 +2091,16 @@ bool Thread::IsExceptionThrownByCurrentMethod(mirror::Throwable* exception) cons } jobjectArray Thread::InternalStackTraceToStackTraceElementArray( - const ScopedObjectAccessAlreadyRunnable& soa, jobject internal, jobjectArray output_array, + const ScopedObjectAccessAlreadyRunnable& soa, + jobject internal, + jobjectArray output_array, int* stack_depth) { - // Decode the internal stack trace into the depth, method trace and PC trace - int32_t depth = soa.Decode<mirror::PointerArray*>(internal)->GetLength() / 2; + // Decode the internal stack trace into the depth, method trace and PC trace. + // Subtract one for the methods and PC trace. + int32_t depth = soa.Decode<mirror::Array*>(internal)->GetLength() - 1; + DCHECK_GE(depth, 0); - auto* cl = Runtime::Current()->GetClassLinker(); + ClassLinker* const class_linker = Runtime::Current()->GetClassLinker(); jobjectArray result; @@ -2075,7 +2114,7 @@ jobjectArray Thread::InternalStackTraceToStackTraceElementArray( } else { // Create java_trace array and place in local reference table mirror::ObjectArray<mirror::StackTraceElement>* java_traces = - cl->AllocStackTraceElementArray(soa.Self(), depth); + class_linker->AllocStackTraceElementArray(soa.Self(), depth); if (java_traces == nullptr) { return nullptr; } @@ -2087,7 +2126,12 @@ jobjectArray Thread::InternalStackTraceToStackTraceElementArray( } for (int32_t i = 0; i < depth; ++i) { - auto* method_trace = soa.Decode<mirror::PointerArray*>(internal); + mirror::ObjectArray<mirror::Object>* decoded_traces = + soa.Decode<mirror::Object*>(internal)->AsObjectArray<mirror::Object>(); + // Methods and dex PC trace is element 0. + DCHECK(decoded_traces->Get(0)->IsIntArray() || decoded_traces->Get(0)->IsLongArray()); + mirror::PointerArray* const method_trace = + down_cast<mirror::PointerArray*>(decoded_traces->Get(0)); // Prepare parameters for StackTraceElement(String cls, String method, String file, int line) ArtMethod* method = method_trace->GetElementPtrSize<ArtMethod*>(i, sizeof(void*)); uint32_t dex_pc = method_trace->GetElementPtrSize<uint32_t>( diff --git a/runtime/thread.h b/runtime/thread.h index 11f2e285a1..d262c62224 100644 --- a/runtime/thread.h +++ b/runtime/thread.h @@ -108,7 +108,8 @@ enum ThreadFlag { enum class StackedShadowFrameType { kShadowFrameUnderConstruction, - kDeoptimizationShadowFrame + kDeoptimizationShadowFrame, + kSingleFrameDeoptimizationShadowFrame }; static constexpr size_t kNumRosAllocThreadLocalSizeBrackets = 34; @@ -246,17 +247,15 @@ class Thread { SHARED_REQUIRES(Locks::mutator_lock_); // Transition from non-runnable to runnable state acquiring share on mutator_lock_. - ThreadState TransitionFromSuspendedToRunnable() + ALWAYS_INLINE ThreadState TransitionFromSuspendedToRunnable() REQUIRES(!Locks::thread_suspend_count_lock_) - SHARED_LOCK_FUNCTION(Locks::mutator_lock_) - ALWAYS_INLINE; + SHARED_LOCK_FUNCTION(Locks::mutator_lock_); // Transition from runnable into a state where mutator privileges are denied. Releases share of // mutator lock. - void TransitionFromRunnableToSuspended(ThreadState new_state) + ALWAYS_INLINE void TransitionFromRunnableToSuspended(ThreadState new_state) REQUIRES(!Locks::thread_suspend_count_lock_, !Roles::uninterruptible_) - UNLOCK_FUNCTION(Locks::mutator_lock_) - ALWAYS_INLINE; + UNLOCK_FUNCTION(Locks::mutator_lock_); // Once called thread suspension will cause an assertion failure. const char* StartAssertNoThreadSuspension(const char* cause) ACQUIRE(Roles::uninterruptible_) { @@ -843,7 +842,7 @@ class Thread { void AssertHasDeoptimizationContext() SHARED_REQUIRES(Locks::mutator_lock_); void PushStackedShadowFrame(ShadowFrame* sf, StackedShadowFrameType type); - ShadowFrame* PopStackedShadowFrame(StackedShadowFrameType type); + ShadowFrame* PopStackedShadowFrame(StackedShadowFrameType type, bool must_be_present = true); // For debugger, find the shadow frame that corresponds to a frame id. // Or return null if there is none. @@ -1016,11 +1015,15 @@ class Thread { // Dbg::Disconnected. ThreadState SetStateUnsafe(ThreadState new_state) { ThreadState old_state = GetState(); - tls32_.state_and_flags.as_struct.state = new_state; - // if transit to a suspended state, check the pass barrier request. - if (UNLIKELY((new_state != kRunnable) && - (tls32_.state_and_flags.as_struct.flags & kActiveSuspendBarrier))) { - PassActiveSuspendBarriers(this); + if (old_state == kRunnable && new_state != kRunnable) { + // Need to run pending checkpoint and suspend barriers. Run checkpoints in runnable state in + // case they need to use a ScopedObjectAccess. If we are holding the mutator lock and a SOA + // attempts to TransitionFromSuspendedToRunnable, it results in a deadlock. + TransitionToSuspendedAndRunCheckpoints(new_state); + // Since we transitioned to a suspended state, check the pass barrier requests. + PassActiveSuspendBarriers(); + } else { + tls32_.state_and_flags.as_struct.state = new_state; } return old_state; } @@ -1063,6 +1066,12 @@ class Thread { void SetUpAlternateSignalStack(); void TearDownAlternateSignalStack(); + ALWAYS_INLINE void TransitionToSuspendedAndRunCheckpoints(ThreadState new_state) + REQUIRES(!Locks::thread_suspend_count_lock_, !Roles::uninterruptible_); + + ALWAYS_INLINE void PassActiveSuspendBarriers() + REQUIRES(!Locks::thread_suspend_count_lock_, !Roles::uninterruptible_); + // 32 bits of atomically changed state and flags. Keeping as 32 bits allows and atomic CAS to // change from being Suspended to Runnable without a suspend request occurring. union PACKED(4) StateAndFlags { diff --git a/runtime/thread_pool.cc b/runtime/thread_pool.cc index d8f80fa690..0527d3ae14 100644 --- a/runtime/thread_pool.cc +++ b/runtime/thread_pool.cc @@ -16,7 +16,9 @@ #include "thread_pool.h" +#include "base/bit_utils.h" #include "base/casts.h" +#include "base/logging.h" #include "base/stl_util.h" #include "base/time_utils.h" #include "runtime.h" @@ -30,10 +32,15 @@ ThreadPoolWorker::ThreadPoolWorker(ThreadPool* thread_pool, const std::string& n size_t stack_size) : thread_pool_(thread_pool), name_(name) { + // Add an inaccessible page to catch stack overflow. + stack_size += kPageSize; std::string error_msg; stack_.reset(MemMap::MapAnonymous(name.c_str(), nullptr, stack_size, PROT_READ | PROT_WRITE, false, false, &error_msg)); CHECK(stack_.get() != nullptr) << error_msg; + CHECK_ALIGNED(stack_->Begin(), kPageSize); + int mprotect_result = mprotect(stack_->Begin(), kPageSize, PROT_NONE); + CHECK_EQ(mprotect_result, 0) << "Failed to mprotect() bottom page of thread pool worker stack."; const char* reason = "new thread pool worker thread"; pthread_attr_t attr; CHECK_PTHREAD_CALL(pthread_attr_init, (&attr), reason); @@ -92,7 +99,8 @@ ThreadPool::ThreadPool(const char* name, size_t num_threads) while (GetThreadCount() < num_threads) { const std::string worker_name = StringPrintf("%s worker thread %zu", name_.c_str(), GetThreadCount()); - threads_.push_back(new ThreadPoolWorker(this, worker_name, ThreadPoolWorker::kDefaultStackSize)); + threads_.push_back( + new ThreadPoolWorker(this, worker_name, ThreadPoolWorker::kDefaultStackSize)); } // Wait for all of the threads to attach. creation_barier_.Wait(self); diff --git a/runtime/trace.cc b/runtime/trace.cc index e2743ceb13..745aa6386e 100644 --- a/runtime/trace.cc +++ b/runtime/trace.cc @@ -31,6 +31,7 @@ #include "common_throws.h" #include "debugger.h" #include "dex_file-inl.h" +#include "gc/scoped_gc_critical_section.h" #include "instrumentation.h" #include "mirror/class-inl.h" #include "mirror/dex_cache-inl.h" @@ -350,6 +351,10 @@ void Trace::Start(const char* trace_filename, int trace_fd, size_t buffer_size, // Create Trace object. { + // Required since EnableMethodTracing calls ConfigureStubs which visits class linker classes. + gc::ScopedGCCriticalSection gcs(self, + gc::kGcCauseInstrumentation, + gc::kCollectorTypeInstrumentation); ScopedSuspendAll ssa(__FUNCTION__); MutexLock mu(self, *Locks::trace_lock_); if (the_trace_ != nullptr) { @@ -464,9 +469,10 @@ void Trace::Pause() { Runtime* runtime = Runtime::Current(); Trace* the_trace = nullptr; + Thread* const self = Thread::Current(); pthread_t sampling_pthread = 0U; { - MutexLock mu(Thread::Current(), *Locks::trace_lock_); + MutexLock mu(self, *Locks::trace_lock_); if (the_trace_ == nullptr) { LOG(ERROR) << "Trace pause requested, but no trace currently running"; return; @@ -478,23 +484,26 @@ void Trace::Pause() { if (sampling_pthread != 0U) { { - MutexLock mu(Thread::Current(), *Locks::trace_lock_); + MutexLock mu(self, *Locks::trace_lock_); the_trace_ = nullptr; } CHECK_PTHREAD_CALL(pthread_join, (sampling_pthread, nullptr), "sampling thread shutdown"); sampling_pthread_ = 0U; { - MutexLock mu(Thread::Current(), *Locks::trace_lock_); + MutexLock mu(self, *Locks::trace_lock_); the_trace_ = the_trace; } } if (the_trace != nullptr) { + gc::ScopedGCCriticalSection gcs(self, + gc::kGcCauseInstrumentation, + gc::kCollectorTypeInstrumentation); ScopedSuspendAll ssa(__FUNCTION__); stop_alloc_counting = (the_trace->flags_ & Trace::kTraceCountAllocs) != 0; if (the_trace->trace_mode_ == TraceMode::kSampling) { - MutexLock mu(Thread::Current(), *Locks::thread_list_lock_); + MutexLock mu(self, *Locks::thread_list_lock_); runtime->GetThreadList()->ForEach(ClearThreadStackTraceAndClockBase, nullptr); } else { runtime->GetInstrumentation()->DisableMethodTracing(kTracerInstrumentationKey); @@ -530,6 +539,9 @@ void Trace::Resume() { bool enable_stats = (the_trace->flags_ && kTraceCountAllocs) != 0; { + gc::ScopedGCCriticalSection gcs(self, + gc::kGcCauseInstrumentation, + gc::kCollectorTypeInstrumentation); ScopedSuspendAll ssa(__FUNCTION__); // Reenable. @@ -1046,4 +1058,9 @@ size_t Trace::GetBufferSize() { return the_trace_->buffer_size_; } +bool Trace::IsTracingEnabled() { + MutexLock mu(Thread::Current(), *Locks::trace_lock_); + return the_trace_ != nullptr; +} + } // namespace art diff --git a/runtime/trace.h b/runtime/trace.h index 87a691d553..356a81f282 100644 --- a/runtime/trace.h +++ b/runtime/trace.h @@ -183,6 +183,9 @@ class Trace FINAL : public instrumentation::InstrumentationListener { static TraceMode GetMode() REQUIRES(!Locks::trace_lock_); static size_t GetBufferSize() REQUIRES(!Locks::trace_lock_); + // Used by class linker to prevent class unloading. + static bool IsTracingEnabled() REQUIRES(!Locks::trace_lock_); + private: Trace(File* trace_file, const char* trace_name, size_t buffer_size, int flags, TraceOutputMode output_mode, TraceMode trace_mode); diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc index 9938e907e9..eed3e22a72 100644 --- a/runtime/verifier/method_verifier.cc +++ b/runtime/verifier/method_verifier.cc @@ -1008,6 +1008,9 @@ bool MethodVerifier::VerifyInstruction(const Instruction* inst, uint32_t code_of case Instruction::kVerifyRegCWide: result = result && CheckWideRegisterIndex(inst->VRegC()); break; + case Instruction::kVerifyRegCString: + result = result && CheckStringIndex(inst->VRegC()); + break; } switch (inst->GetVerifyExtraFlags()) { case Instruction::kVerifyArrayData: @@ -1300,17 +1303,17 @@ bool MethodVerifier::CheckSwitchTargets(uint32_t cur_offset) { return false; } + bool is_packed_switch = (*insns & 0xff) == Instruction::PACKED_SWITCH; + uint32_t switch_count = switch_insns[1]; - int32_t keys_offset, targets_offset; + int32_t targets_offset; uint16_t expected_signature; - if ((*insns & 0xff) == Instruction::PACKED_SWITCH) { + if (is_packed_switch) { /* 0=sig, 1=count, 2/3=firstKey */ targets_offset = 4; - keys_offset = -1; expected_signature = Instruction::kPackedSwitchSignature; } else { /* 0=sig, 1=count, 2..count*2 = keys */ - keys_offset = 2; targets_offset = 2 + 2 * switch_count; expected_signature = Instruction::kSparseSwitchSignature; } @@ -1329,19 +1332,33 @@ bool MethodVerifier::CheckSwitchTargets(uint32_t cur_offset) { << ", count " << insn_count; return false; } - /* for a sparse switch, verify the keys are in ascending order */ - if (keys_offset > 0 && switch_count > 1) { - int32_t last_key = switch_insns[keys_offset] | (switch_insns[keys_offset + 1] << 16); - for (uint32_t targ = 1; targ < switch_count; targ++) { - int32_t key = - static_cast<int32_t>(switch_insns[keys_offset + targ * 2]) | - static_cast<int32_t>(switch_insns[keys_offset + targ * 2 + 1] << 16); - if (key <= last_key) { - Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid packed switch: last key=" << last_key - << ", this=" << key; + + constexpr int32_t keys_offset = 2; + if (switch_count > 1) { + if (is_packed_switch) { + /* for a packed switch, verify that keys do not overflow int32 */ + int32_t first_key = switch_insns[keys_offset] | (switch_insns[keys_offset + 1] << 16); + int32_t max_first_key = + std::numeric_limits<int32_t>::max() - (static_cast<int32_t>(switch_count) - 1); + if (first_key > max_first_key) { + Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid packed switch: first_key=" << first_key + << ", switch_count=" << switch_count; return false; } - last_key = key; + } else { + /* for a sparse switch, verify the keys are in ascending order */ + int32_t last_key = switch_insns[keys_offset] | (switch_insns[keys_offset + 1] << 16); + for (uint32_t targ = 1; targ < switch_count; targ++) { + int32_t key = + static_cast<int32_t>(switch_insns[keys_offset + targ * 2]) | + static_cast<int32_t>(switch_insns[keys_offset + targ * 2 + 1] << 16); + if (key <= last_key) { + Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid sparse switch: last key=" << last_key + << ", this=" << key; + return false; + } + last_key = key; + } } } /* verify each switch target */ @@ -3149,6 +3166,13 @@ bool MethodVerifier::CodeFlowVerifyInstruction(uint32_t* start_guess) { Fail(VERIFY_ERROR_FORCE_INTERPRETER); // TODO(iam): implement invoke-lambda verification break; } + case Instruction::CAPTURE_VARIABLE: { + // Don't bother verifying, instead the interpreter will take the slow path with access checks. + // If the code would've normally hard-failed, then the interpreter will throw the + // appropriate verification errors at runtime. + Fail(VERIFY_ERROR_FORCE_INTERPRETER); // TODO(iam): implement capture-variable verification + break; + } case Instruction::CREATE_LAMBDA: { // Don't bother verifying, instead the interpreter will take the slow path with access checks. // If the code would've normally hard-failed, then the interpreter will throw the @@ -3156,10 +3180,15 @@ bool MethodVerifier::CodeFlowVerifyInstruction(uint32_t* start_guess) { Fail(VERIFY_ERROR_FORCE_INTERPRETER); // TODO(iam): implement create-lambda verification break; } + case Instruction::LIBERATE_VARIABLE: { + // Don't bother verifying, instead the interpreter will take the slow path with access checks. + // If the code would've normally hard-failed, then the interpreter will throw the + // appropriate verification errors at runtime. + Fail(VERIFY_ERROR_FORCE_INTERPRETER); // TODO(iam): implement liberate-variable verification + break; + } - case Instruction::UNUSED_F4: - case Instruction::UNUSED_F5: - case Instruction::UNUSED_F7: { + case Instruction::UNUSED_F4: { DCHECK(false); // TODO(iam): Implement opcodes for lambdas // Conservatively fail verification on release builds. Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Unexpected opcode " << inst->DumpString(dex_file_); diff --git a/test/004-JniTest/expected.txt b/test/004-JniTest/expected.txt index 49d9cc0d5a..86ab37e1e5 100644 --- a/test/004-JniTest/expected.txt +++ b/test/004-JniTest/expected.txt @@ -1,3 +1,4 @@ +JNI_OnLoad called Super.<init> Super.<init> Subclass.<init> diff --git a/test/004-JniTest/jni_test.cc b/test/004-JniTest/jni_test.cc index db0dd32771..be7888b04a 100644 --- a/test/004-JniTest/jni_test.cc +++ b/test/004-JniTest/jni_test.cc @@ -15,8 +15,9 @@ */ #include <assert.h> -#include <stdio.h> +#include <iostream> #include <pthread.h> +#include <stdio.h> #include <vector> #include "jni.h" @@ -27,13 +28,21 @@ static JavaVM* jvm = nullptr; -extern "C" JNIEXPORT jint JNI_OnLoad(JavaVM *vm, void *) { +extern "C" JNIEXPORT jint JNI_OnLoad(JavaVM *vm, void*) { assert(vm != nullptr); assert(jvm == nullptr); jvm = vm; + std::cout << "JNI_OnLoad called" << std::endl; return JNI_VERSION_1_6; } +extern "C" JNIEXPORT void JNI_OnUnload(JavaVM*, void*) { + // std::cout since LOG(INFO) adds extra stuff like pid. + std::cout << "JNI_OnUnload called" << std::endl; + // Clear jvm for assert in test 004-JniTest. + jvm = nullptr; +} + static void* AttachHelper(void* arg) { assert(jvm != nullptr); diff --git a/test/004-ReferenceMap/expected.txt b/test/004-ReferenceMap/expected.txt index e69de29bb2..6a5618ebc6 100644 --- a/test/004-ReferenceMap/expected.txt +++ b/test/004-ReferenceMap/expected.txt @@ -0,0 +1 @@ +JNI_OnLoad called diff --git a/test/004-ReferenceMap/stack_walk_refmap_jni.cc b/test/004-ReferenceMap/stack_walk_refmap_jni.cc index 55a77ac2eb..285df18c72 100644 --- a/test/004-ReferenceMap/stack_walk_refmap_jni.cc +++ b/test/004-ReferenceMap/stack_walk_refmap_jni.cc @@ -49,7 +49,9 @@ struct ReferenceMap2Visitor : public CheckReferenceMapVisitor { CHECK_REGS_CONTAIN_REFS(0x06U, true, 8, 1); // v8: this, v1: x CHECK_REGS_CONTAIN_REFS(0x08U, true, 8, 3, 1); // v8: this, v3: y, v1: x CHECK_REGS_CONTAIN_REFS(0x0cU, true, 8, 3, 1); // v8: this, v3: y, v1: x - CHECK_REGS_CONTAIN_REFS(0x0eU, true, 8, 3, 1); // v8: this, v3: y, v1: x + if (!m->IsOptimized(sizeof(void*))) { + CHECK_REGS_CONTAIN_REFS(0x0eU, true, 8, 3, 1); // v8: this, v3: y, v1: x + } CHECK_REGS_CONTAIN_REFS(0x10U, true, 8, 3, 1); // v8: this, v3: y, v1: x // v2 is added because of the instruction at DexPC 0024. Object merges with 0 is Object. See: // 0024: move-object v3, v2 @@ -63,12 +65,18 @@ struct ReferenceMap2Visitor : public CheckReferenceMapVisitor { // Note that v0: ex can be eliminated because it's a dead merge of two different exceptions. CHECK_REGS_CONTAIN_REFS(0x18U, true, 8, 2, 1); // v8: this, v2: y, v1: x (dead v0: ex) CHECK_REGS_CONTAIN_REFS(0x1aU, true, 8, 5, 2, 1); // v8: this, v5: x[1], v2: y, v1: x (dead v0: ex) - CHECK_REGS_CONTAIN_REFS(0x1dU, true, 8, 5, 2, 1); // v8: this, v5: x[1], v2: y, v1: x (dead v0: ex) - // v5 is removed from the root set because there is a "merge" operation. - // See 0015: if-nez v2, 001f. - CHECK_REGS_CONTAIN_REFS(0x1fU, true, 8, 2, 1); // v8: this, v2: y, v1: x (dead v0: ex) + if (!m->IsOptimized(sizeof(void*))) { + // v8: this, v5: x[1], v2: y, v1: x (dead v0: ex) + CHECK_REGS_CONTAIN_REFS(0x1dU, true, 8, 5, 2, 1); + // v5 is removed from the root set because there is a "merge" operation. + // See 0015: if-nez v2, 001f. + CHECK_REGS_CONTAIN_REFS(0x1fU, true, 8, 2, 1); // v8: this, v2: y, v1: x (dead v0: ex) + } CHECK_REGS_CONTAIN_REFS(0x21U, true, 8, 2, 1); // v8: this, v2: y, v1: x (dead v0: ex) - CHECK_REGS_CONTAIN_REFS(0x27U, true, 8, 4, 2, 1); // v8: this, v4: ex, v2: y, v1: x + + if (!m->IsOptimized(sizeof(void*))) { + CHECK_REGS_CONTAIN_REFS(0x27U, true, 8, 4, 2, 1); // v8: this, v4: ex, v2: y, v1: x + } CHECK_REGS_CONTAIN_REFS(0x29U, true, 8, 4, 2, 1); // v8: this, v4: ex, v2: y, v1: x CHECK_REGS_CONTAIN_REFS(0x2cU, true, 8, 4, 2, 1); // v8: this, v4: ex, v2: y, v1: x // Note that it is OK for a compiler to not have a dex map at these two dex PCs because diff --git a/test/004-SignalTest/expected.txt b/test/004-SignalTest/expected.txt index fd5ec00067..b3a0e1cbe0 100644 --- a/test/004-SignalTest/expected.txt +++ b/test/004-SignalTest/expected.txt @@ -1,3 +1,4 @@ +JNI_OnLoad called init signal test Caught NullPointerException Caught StackOverflowError diff --git a/test/004-StackWalk/expected.txt b/test/004-StackWalk/expected.txt index bde00246a3..5af68cd85d 100644 --- a/test/004-StackWalk/expected.txt +++ b/test/004-StackWalk/expected.txt @@ -1,3 +1,4 @@ +JNI_OnLoad called 1st call 172001234567891011121314151617181920652310201919 2nd call diff --git a/test/004-UnsafeTest/expected.txt b/test/004-UnsafeTest/expected.txt index e69de29bb2..6a5618ebc6 100644 --- a/test/004-UnsafeTest/expected.txt +++ b/test/004-UnsafeTest/expected.txt @@ -0,0 +1 @@ +JNI_OnLoad called diff --git a/test/024-illegal-access/expected.txt b/test/024-illegal-access/expected.txt index 5f951f4939..0ae4a774f3 100644 --- a/test/024-illegal-access/expected.txt +++ b/test/024-illegal-access/expected.txt @@ -1,2 +1,5 @@ Got expected failure 1 Got expected failure 2 +Got expected failure 3 +Got expected failure 4 +Got expected failure 5 diff --git a/test/024-illegal-access/src/Main.java b/test/024-illegal-access/src/Main.java index bde73e9452..84c7114cb4 100644 --- a/test/024-illegal-access/src/Main.java +++ b/test/024-illegal-access/src/Main.java @@ -17,7 +17,7 @@ public class Main { static public void main(String[] args) { try { - PublicAccess.main(); + PublicAccess.accessStaticField(); System.err.println("ERROR: call 1 not expected to succeed"); } catch (VerifyError ve) { // dalvik @@ -28,14 +28,41 @@ public class Main { } try { - CheckInstanceof.main(new Object()); + PublicAccess.accessStaticMethod(); System.err.println("ERROR: call 2 not expected to succeed"); + } catch (IllegalAccessError iae) { + // reference + System.out.println("Got expected failure 2"); + } + + try { + PublicAccess.accessInstanceField(); + System.err.println("ERROR: call 3 not expected to succeed"); } catch (VerifyError ve) { // dalvik - System.out.println("Got expected failure 2"); + System.out.println("Got expected failure 3"); } catch (IllegalAccessError iae) { // reference - System.out.println("Got expected failure 2"); + System.out.println("Got expected failure 3"); + } + + try { + PublicAccess.accessInstanceMethod(); + System.err.println("ERROR: call 4 not expected to succeed"); + } catch (IllegalAccessError iae) { + // reference + System.out.println("Got expected failure 4"); + } + + try { + CheckInstanceof.main(new Object()); + System.err.println("ERROR: call 5 not expected to succeed"); + } catch (VerifyError ve) { + // dalvik + System.out.println("Got expected failure 5"); + } catch (IllegalAccessError iae) { + // reference + System.out.println("Got expected failure 5"); } } } diff --git a/test/024-illegal-access/src/PublicAccess.java b/test/024-illegal-access/src/PublicAccess.java index 4e72cd4dce..e3fef855e2 100644 --- a/test/024-illegal-access/src/PublicAccess.java +++ b/test/024-illegal-access/src/PublicAccess.java @@ -18,8 +18,20 @@ * Some stuff for access checks. */ public class PublicAccess { - public static void main() { - String shouldFail = SemiPrivate.mPrivvy; + public static void accessStaticField() { + String shouldFail = SemiPrivate.mStaticPrivvy; + System.out.println("Got " + shouldFail); + } + public static void accessStaticMethod() { + String shouldFail = SemiPrivate.privvyStaticMethod(); + System.out.println("Got " + shouldFail); + } + public static void accessInstanceField() { + String shouldFail = new SemiPrivate().mInstancePrivvy; + System.out.println("Got " + shouldFail); + } + public static void accessInstanceMethod() { + String shouldFail = new SemiPrivate().privvyInstanceMethod(); System.out.println("Got " + shouldFail); } } diff --git a/test/024-illegal-access/src/SemiPrivate.java b/test/024-illegal-access/src/SemiPrivate.java index 06b16c40b9..62e0d05213 100644 --- a/test/024-illegal-access/src/SemiPrivate.java +++ b/test/024-illegal-access/src/SemiPrivate.java @@ -18,5 +18,15 @@ * Version with package scope access. */ public class SemiPrivate { - /* not private */ static String mPrivvy = "stuff"; + /* not private */ static String mStaticPrivvy = "stuff"; + + /* not private */ static String privvyStaticMethod() { + return "stuff"; + } + + /* not private */ String mInstancePrivvy = "stuff"; + + /* not private */ String privvyInstanceMethod() { + return "stuff"; + } } diff --git a/test/024-illegal-access/src2/SemiPrivate.java b/test/024-illegal-access/src2/SemiPrivate.java index 064265ab37..4f36a07418 100644 --- a/test/024-illegal-access/src2/SemiPrivate.java +++ b/test/024-illegal-access/src2/SemiPrivate.java @@ -18,5 +18,15 @@ * Version with private access. */ public class SemiPrivate { - private static String mPrivvy = "stuff"; + private static String mStaticPrivvy = "stuff"; + + private static String privvyStaticMethod() { + return "stuff"; + } + + private String mInstancePrivvy = "stuff"; + + private String privvyInstanceMethod() { + return "stuff"; + } } diff --git a/test/044-proxy/expected.txt b/test/044-proxy/expected.txt index f86948ad6c..052c8faf1b 100644 --- a/test/044-proxy/expected.txt +++ b/test/044-proxy/expected.txt @@ -93,4 +93,5 @@ Invocation of public abstract java.lang.String NarrowingTest$I2.foo() Got expected exception Proxy narrowed invocation return type passed 5.8 +JNI_OnLoad called callback diff --git a/test/051-thread/expected.txt b/test/051-thread/expected.txt index 54e34af3aa..c6cd4f8bea 100644 --- a/test/051-thread/expected.txt +++ b/test/051-thread/expected.txt @@ -1,3 +1,4 @@ +JNI_OnLoad called thread test starting testThreadCapacity thread count: 512 testThreadDaemons starting thread 'TestDaemonThread' diff --git a/test/087-gc-after-link/src/Main.java b/test/087-gc-after-link/src/Main.java index e0a187a3a0..2f6d496f44 100644 --- a/test/087-gc-after-link/src/Main.java +++ b/test/087-gc-after-link/src/Main.java @@ -155,6 +155,12 @@ public class Main { * See if we can GC after a failed load. */ static void testFailLoadAndGc() throws TestFailed { + processFailLoadAndGc(); + Runtime.getRuntime().gc(); + System.out.println("GC complete."); + } + + private static void processFailLoadAndGc() throws TestFailed { try { BrokenDexLoader loader; @@ -170,7 +176,5 @@ public class Main { ite.printStackTrace(); } } - Runtime.getRuntime().gc(); - System.out.println("GC complete."); } } diff --git a/test/088-monitor-verification/expected.txt b/test/088-monitor-verification/expected.txt index 13b8c73970..2cb8f2cdf2 100644 --- a/test/088-monitor-verification/expected.txt +++ b/test/088-monitor-verification/expected.txt @@ -1,12 +1,7 @@ +JNI_OnLoad called recursiveSync ok nestedMayThrow ok constantLock ok notNested ok twoPath ok triplet ok -OK -TooDeep -NotStructuredOverUnlock -NotStructuredUnderUnlock -UnbalancedJoin -UnbalancedStraight diff --git a/test/088-monitor-verification/smali/NotStructuredOverUnlock.smali b/test/088-monitor-verification/smali/NotStructuredOverUnlock.smali index aa0c2d5a13..0dc492f2b3 100644 --- a/test/088-monitor-verification/smali/NotStructuredOverUnlock.smali +++ b/test/088-monitor-verification/smali/NotStructuredOverUnlock.smali @@ -5,7 +5,7 @@ .method public static run(Ljava/lang/Object;)V .registers 3 - invoke-static {}, LMain;->assertCallerIsInterpreted()V + invoke-static {}, LMain;->assertIsInterpreted()V # Lock twice, but unlock thrice. diff --git a/test/088-monitor-verification/smali/NotStructuredUnderUnlock.smali b/test/088-monitor-verification/smali/NotStructuredUnderUnlock.smali index 2c31fdaa85..df6e168685 100644 --- a/test/088-monitor-verification/smali/NotStructuredUnderUnlock.smali +++ b/test/088-monitor-verification/smali/NotStructuredUnderUnlock.smali @@ -5,7 +5,7 @@ .method public static run(Ljava/lang/Object;)V .registers 3 - invoke-static {}, LMain;->assertCallerIsInterpreted()V + invoke-static {}, LMain;->assertIsInterpreted()V # Lock thrice, but only unlock twice. diff --git a/test/088-monitor-verification/smali/OK.smali b/test/088-monitor-verification/smali/OK.smali index 596798d80c..a43ecb0704 100644 --- a/test/088-monitor-verification/smali/OK.smali +++ b/test/088-monitor-verification/smali/OK.smali @@ -20,7 +20,7 @@ .method public static runNoMonitors(Ljava/lang/Object;Ljava/lang/Object;)V .registers 3 - invoke-static {}, LMain;->assertCallerIsManaged()V + invoke-static {}, LMain;->assertIsManaged()V return-void @@ -29,7 +29,7 @@ .method public static runStraightLine(Ljava/lang/Object;Ljava/lang/Object;)V .registers 3 - invoke-static {}, LMain;->assertCallerIsManaged()V + invoke-static {}, LMain;->assertIsManaged()V monitor-enter v1 # 1 monitor-enter v2 # 2 @@ -44,7 +44,7 @@ .method public static runBalancedJoin(Ljava/lang/Object;Ljava/lang/Object;)V .registers 3 - invoke-static {}, LMain;->assertCallerIsManaged()V + invoke-static {}, LMain;->assertIsManaged()V monitor-enter v1 # 1 diff --git a/test/088-monitor-verification/smali/TooDeep.smali b/test/088-monitor-verification/smali/TooDeep.smali index 1a8f2f06e8..a1e328148d 100644 --- a/test/088-monitor-verification/smali/TooDeep.smali +++ b/test/088-monitor-verification/smali/TooDeep.smali @@ -7,7 +7,7 @@ # Lock depth is 33, which is more than the verifier supports. This should have been punted to # the interpreter. - invoke-static {}, LMain;->assertCallerIsInterpreted()V + invoke-static {}, LMain;->assertIsInterpreted()V monitor-enter v2 # 1 monitor-enter v2 # 2 diff --git a/test/088-monitor-verification/smali/UnbalancedJoin.smali b/test/088-monitor-verification/smali/UnbalancedJoin.smali index da8f7732af..993f32c022 100644 --- a/test/088-monitor-verification/smali/UnbalancedJoin.smali +++ b/test/088-monitor-verification/smali/UnbalancedJoin.smali @@ -5,7 +5,7 @@ .method public static run(Ljava/lang/Object;Ljava/lang/Object;)V .registers 3 - invoke-static {}, LMain;->assertCallerIsInterpreted()V + invoke-static {}, LMain;->assertIsInterpreted()V if-eqz v2, :Lnull diff --git a/test/088-monitor-verification/smali/UnbalancedStraight.smali b/test/088-monitor-verification/smali/UnbalancedStraight.smali index 68edb6c783..cbb8bcc488 100644 --- a/test/088-monitor-verification/smali/UnbalancedStraight.smali +++ b/test/088-monitor-verification/smali/UnbalancedStraight.smali @@ -5,7 +5,7 @@ .method public static run(Ljava/lang/Object;Ljava/lang/Object;)V .registers 3 - invoke-static {}, LMain;->assertCallerIsInterpreted()V + invoke-static {}, LMain;->assertIsInterpreted()V monitor-enter v1 # 1 monitor-enter v2 # 2 diff --git a/test/088-monitor-verification/src/Main.java b/test/088-monitor-verification/src/Main.java index 53b72e9f5c..218805543e 100644 --- a/test/088-monitor-verification/src/Main.java +++ b/test/088-monitor-verification/src/Main.java @@ -220,6 +220,11 @@ public class Main { // Smali testing code. private static void runSmaliTests() { + if (!hasOatFile() || runtimeIsSoftFail() || isInterpreted()) { + // Skip test, this seems to be a non-compiled code test configuration. + return; + } + runTest("OK", new Object[] { new Object(), new Object() }, null); runTest("TooDeep", new Object[] { new Object() }, null); runTest("NotStructuredOverUnlock", new Object[] { new Object() }, @@ -231,7 +236,6 @@ public class Main { } private static void runTest(String className, Object[] parameters, Class<?> excType) { - System.out.println(className); try { Class<?> c = Class.forName(className); @@ -273,6 +277,9 @@ public class Main { } // Helpers for the smali code. - public static native void assertCallerIsInterpreted(); - public static native void assertCallerIsManaged(); + public static native void assertIsInterpreted(); + public static native void assertIsManaged(); + public static native boolean hasOatFile(); + public static native boolean runtimeIsSoftFail(); + public static native boolean isInterpreted(); } diff --git a/test/115-native-bridge/expected.txt b/test/115-native-bridge/expected.txt index 372ecd0484..b003307ab7 100644 --- a/test/115-native-bridge/expected.txt +++ b/test/115-native-bridge/expected.txt @@ -17,6 +17,7 @@ Test ART callbacks: all JNI function number is 11. name:testSignal, signature:()I, shorty:I. name:testZeroLengthByteBuffers, signature:()V, shorty:V. trampoline_JNI_OnLoad called! +JNI_OnLoad called Getting trampoline for Java_Main_testFindClassOnAttachedNativeThread with shorty V. trampoline_Java_Main_testFindClassOnAttachedNativeThread called! Getting trampoline for Java_Main_testFindFieldOnAttachedNativeThreadNative with shorty V. diff --git a/test/116-nodex2oat/expected.txt b/test/116-nodex2oat/expected.txt index 05b1c2f387..157dfc4ea4 100644 --- a/test/116-nodex2oat/expected.txt +++ b/test/116-nodex2oat/expected.txt @@ -1,6 +1,9 @@ Run -Xnodex2oat +JNI_OnLoad called Has oat is false, is dex2oat enabled is false. Run -Xdex2oat +JNI_OnLoad called Has oat is true, is dex2oat enabled is true. Run default +JNI_OnLoad called Has oat is true, is dex2oat enabled is true. diff --git a/test/116-nodex2oat/nodex2oat.cc b/test/116-nodex2oat/nodex2oat.cc deleted file mode 100644 index 131af312be..0000000000 --- a/test/116-nodex2oat/nodex2oat.cc +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (C) 2014 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "class_linker.h" -#include "dex_file-inl.h" -#include "mirror/class-inl.h" -#include "scoped_thread_state_change.h" -#include "thread.h" - -namespace art { - -class NoDex2OatTest { - public: - static bool hasOat(jclass cls) { - ScopedObjectAccess soa(Thread::Current()); - mirror::Class* klass = soa.Decode<mirror::Class*>(cls); - const DexFile& dex_file = klass->GetDexFile(); - const OatFile::OatDexFile* oat_dex_file = dex_file.GetOatDexFile(); - return oat_dex_file != nullptr; - } -}; - -extern "C" JNIEXPORT jboolean JNICALL Java_Main_hasOat(JNIEnv*, jclass cls) { - return NoDex2OatTest::hasOat(cls); -} - -extern "C" JNIEXPORT jboolean JNICALL Java_Main_isDex2OatEnabled(JNIEnv*, jclass) { - return Runtime::Current()->IsDex2OatEnabled(); -} - -} // namespace art diff --git a/test/116-nodex2oat/src/Main.java b/test/116-nodex2oat/src/Main.java index 086ffb9295..229735f4b8 100644 --- a/test/116-nodex2oat/src/Main.java +++ b/test/116-nodex2oat/src/Main.java @@ -18,16 +18,16 @@ public class Main { public static void main(String[] args) { System.loadLibrary(args[0]); System.out.println( - "Has oat is " + hasOat() + ", is dex2oat enabled is " + isDex2OatEnabled() + "."); + "Has oat is " + hasOatFile() + ", is dex2oat enabled is " + isDex2OatEnabled() + "."); - if (hasOat() && !isDex2OatEnabled()) { + if (hasOatFile() && !isDex2OatEnabled()) { throw new Error("Application with dex2oat disabled runs with an oat file"); - } else if (!hasOat() && isDex2OatEnabled()) { + } else if (!hasOatFile() && isDex2OatEnabled()) { throw new Error("Application with dex2oat enabled runs without an oat file"); } } - private native static boolean hasOat(); + private native static boolean hasOatFile(); private native static boolean isDex2OatEnabled(); } diff --git a/test/117-nopatchoat/expected.txt b/test/117-nopatchoat/expected.txt index 5cc02d1662..0cd4715d09 100644 --- a/test/117-nopatchoat/expected.txt +++ b/test/117-nopatchoat/expected.txt @@ -1,9 +1,12 @@ Run without dex2oat/patchoat +JNI_OnLoad called dex2oat & patchoat are disabled, has oat is true, has executable oat is expected. This is a function call Run with dexoat/patchoat +JNI_OnLoad called dex2oat & patchoat are enabled, has oat is true, has executable oat is expected. This is a function call Run default +JNI_OnLoad called dex2oat & patchoat are enabled, has oat is true, has executable oat is expected. This is a function call diff --git a/test/117-nopatchoat/nopatchoat.cc b/test/117-nopatchoat/nopatchoat.cc index 7eac412681..3e533ad62e 100644 --- a/test/117-nopatchoat/nopatchoat.cc +++ b/test/117-nopatchoat/nopatchoat.cc @@ -16,7 +16,10 @@ #include "class_linker.h" #include "dex_file-inl.h" +#include "gc/heap.h" +#include "gc/space/image_space.h" #include "mirror/class-inl.h" +#include "runtime.h" #include "scoped_thread_state_change.h" #include "thread.h" @@ -31,6 +34,11 @@ class NoPatchoatTest { return dex_file.GetOatDexFile(); } + static bool isRelocationDeltaZero() { + gc::space::ImageSpace* space = Runtime::Current()->GetHeap()->GetImageSpace(); + return space != nullptr && space->GetImageHeader().GetPatchDelta() == 0; + } + static bool hasExecutableOat(jclass cls) { const OatFile::OatDexFile* oat_dex_file = getOatDexFile(cls); @@ -49,6 +57,10 @@ class NoPatchoatTest { } }; +extern "C" JNIEXPORT jboolean JNICALL Java_Main_isRelocationDeltaZero(JNIEnv*, jclass) { + return NoPatchoatTest::isRelocationDeltaZero(); +} + extern "C" JNIEXPORT jboolean JNICALL Java_Main_hasExecutableOat(JNIEnv*, jclass cls) { return NoPatchoatTest::hasExecutableOat(cls); } diff --git a/test/117-nopatchoat/run b/test/117-nopatchoat/run index c749c74345..c634900218 100755 --- a/test/117-nopatchoat/run +++ b/test/117-nopatchoat/run @@ -36,8 +36,6 @@ fi # Make sure we can run without relocation echo "Run without dex2oat/patchoat" -# /bin/false is actually not even there for either, so the exec will fail. -# Unfortunately there is no equivalent to /bin/false in android. ${RUN} ${flags} --runtime-option -Xnodex2oat # Make sure we can run with the oat file. diff --git a/test/117-nopatchoat/src/Main.java b/test/117-nopatchoat/src/Main.java index 223e12084d..425cf4863f 100644 --- a/test/117-nopatchoat/src/Main.java +++ b/test/117-nopatchoat/src/Main.java @@ -18,16 +18,20 @@ public class Main { public static void main(String[] args) { System.loadLibrary(args[0]); + // With a relocationDelta of 0, the runtime has no way to determine if the oat file in + // ANDROID_DATA has been relocated, since a non-relocated oat file always has a 0 delta. + // Hitting this condition should be rare and ideally we would prevent it from happening but + // there is no way to do so without major changes to the run-test framework. boolean executable_correct = (isPic() ? - hasExecutableOat() == true : - hasExecutableOat() == isDex2OatEnabled()); + hasExecutableOat() == true : + hasExecutableOat() == (isDex2OatEnabled() || isRelocationDeltaZero())); System.out.println( "dex2oat & patchoat are " + ((isDex2OatEnabled()) ? "enabled" : "disabled") + - ", has oat is " + hasOat() + ", has executable oat is " + ( + ", has oat is " + hasOatFile() + ", has executable oat is " + ( executable_correct ? "expected" : "not expected") + "."); - if (!hasOat() && isDex2OatEnabled()) { + if (!hasOatFile() && isDex2OatEnabled()) { throw new Error("Application with dex2oat enabled runs without an oat file"); } @@ -47,7 +51,9 @@ public class Main { private native static boolean isPic(); - private native static boolean hasOat(); + private native static boolean hasOatFile(); private native static boolean hasExecutableOat(); + + private native static boolean isRelocationDeltaZero(); } diff --git a/test/118-noimage-dex2oat/expected.txt b/test/118-noimage-dex2oat/expected.txt index 0103e899f6..166481e96a 100644 --- a/test/118-noimage-dex2oat/expected.txt +++ b/test/118-noimage-dex2oat/expected.txt @@ -1,11 +1,14 @@ Run -Xnoimage-dex2oat +JNI_OnLoad called Has image is false, is image dex2oat enabled is false, is BOOTCLASSPATH on disk is false. testB18485243 PASS Run -Xnoimage-dex2oat -Xno-dex-file-fallback Failed to initialize runtime (check log for details) Run -Ximage-dex2oat +JNI_OnLoad called Has image is true, is image dex2oat enabled is true, is BOOTCLASSPATH on disk is true. testB18485243 PASS Run default +JNI_OnLoad called Has image is true, is image dex2oat enabled is true, is BOOTCLASSPATH on disk is true. testB18485243 PASS diff --git a/test/118-noimage-dex2oat/noimage-dex2oat.cc b/test/118-noimage-dex2oat/noimage-dex2oat.cc deleted file mode 100644 index aacf00f300..0000000000 --- a/test/118-noimage-dex2oat/noimage-dex2oat.cc +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (C) 2014 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "class_linker.h" -#include "dex_file-inl.h" -#include "mirror/class-inl.h" -#include "scoped_thread_state_change.h" -#include "thread.h" - -namespace art { - -class NoDex2OatTest { - public: - static bool hasOat(jclass cls) { - ScopedObjectAccess soa(Thread::Current()); - mirror::Class* klass = soa.Decode<mirror::Class*>(cls); - const DexFile& dex_file = klass->GetDexFile(); - const OatFile::OatDexFile* oat_dex_file = dex_file.GetOatDexFile(); - return oat_dex_file != nullptr; - } -}; - -extern "C" JNIEXPORT jboolean JNICALL Java_Main_hasImage(JNIEnv*, jclass) { - return Runtime::Current()->GetHeap()->HasImageSpace(); -} - -extern "C" JNIEXPORT jboolean JNICALL Java_Main_isImageDex2OatEnabled(JNIEnv*, jclass) { - return Runtime::Current()->IsImageDex2OatEnabled(); -} - -} // namespace art diff --git a/test/119-noimage-patchoat/expected.txt b/test/119-noimage-patchoat/expected.txt index ed136621c3..9b9db58fcd 100644 --- a/test/119-noimage-patchoat/expected.txt +++ b/test/119-noimage-patchoat/expected.txt @@ -1,8 +1,11 @@ Run -Xnoimage-dex2oat -Xpatchoat:/system/bin/false +JNI_OnLoad called Has image is false, is image dex2oat enabled is false. Run -Xnoimage-dex2oat -Xpatchoat:/system/bin/false -Xno-dex-file-fallback Failed to initialize runtime (check log for details) Run -Ximage-dex2oat +JNI_OnLoad called Has image is true, is image dex2oat enabled is true. Run default +JNI_OnLoad called Has image is true, is image dex2oat enabled is true. diff --git a/test/137-cfi/expected.txt b/test/137-cfi/expected.txt index e69de29bb2..6a5618ebc6 100644 --- a/test/137-cfi/expected.txt +++ b/test/137-cfi/expected.txt @@ -0,0 +1 @@ +JNI_OnLoad called diff --git a/test/139-register-natives/expected.txt b/test/139-register-natives/expected.txt index e69de29bb2..6a5618ebc6 100644 --- a/test/139-register-natives/expected.txt +++ b/test/139-register-natives/expected.txt @@ -0,0 +1 @@ +JNI_OnLoad called diff --git a/test/141-class-unload/expected.txt b/test/141-class-unload/expected.txt new file mode 100644 index 0000000000..53d7abecaf --- /dev/null +++ b/test/141-class-unload/expected.txt @@ -0,0 +1,23 @@ +1 +2 +JNI_OnLoad called +JNI_OnUnload called +1 +2 +JNI_OnLoad called +JNI_OnUnload called +null +null +JNI_OnLoad called +JNI_OnUnload called +null +loader null false +loader null false +JNI_OnLoad called +JNI_OnUnload called +null +1 +2 +JNI_OnLoad called +class null false test +JNI_OnUnload called diff --git a/test/141-class-unload/info.txt b/test/141-class-unload/info.txt new file mode 100644 index 0000000000..d8dd381dc7 --- /dev/null +++ b/test/141-class-unload/info.txt @@ -0,0 +1 @@ +Test that classes get freed after they are no longer reachable. diff --git a/test/141-class-unload/jni_unload.cc b/test/141-class-unload/jni_unload.cc new file mode 100644 index 0000000000..d913efe53e --- /dev/null +++ b/test/141-class-unload/jni_unload.cc @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "jni.h" + +#include <iostream> + +#include "jit/jit.h" +#include "jit/jit_instrumentation.h" +#include "runtime.h" +#include "thread-inl.h" + +namespace art { +namespace { + +extern "C" JNIEXPORT void JNICALL Java_IntHolder_waitForCompilation(JNIEnv*, jclass) { + jit::Jit* jit = Runtime::Current()->GetJit(); + if (jit != nullptr) { + jit->GetInstrumentationCache()->WaitForCompilationToFinish(Thread::Current()); + } +} + +} // namespace +} // namespace art diff --git a/test/141-class-unload/src-ex/IntHolder.java b/test/141-class-unload/src-ex/IntHolder.java new file mode 100644 index 0000000000..feff0d2ba1 --- /dev/null +++ b/test/141-class-unload/src-ex/IntHolder.java @@ -0,0 +1,43 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Simple class that holds a static int for testing that class unloading works +// and re-runs the class initializer. +public class IntHolder { + private static int value = 1; + + public static void setValue(int newValue) { + value = newValue; + } + + public static int getValue() { + return value; + } + + public static void runGC() { + Runtime.getRuntime().gc(); + } + + public static void loadLibrary(String name) { + System.loadLibrary(name); + } + + public static native void waitForCompilation(); + + public static Throwable generateStackTrace() { + return new Exception("test"); + } +} diff --git a/test/141-class-unload/src/Main.java b/test/141-class-unload/src/Main.java new file mode 100644 index 0000000000..3cc43accbe --- /dev/null +++ b/test/141-class-unload/src/Main.java @@ -0,0 +1,166 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.lang.ref.WeakReference; +import java.lang.reflect.Constructor; +import java.lang.reflect.Method; + +public class Main { + static final String DEX_FILE = System.getenv("DEX_LOCATION") + "/141-class-unload-ex.jar"; + static String nativeLibraryName; + + public static void main(String[] args) throws Exception { + nativeLibraryName = args[0]; + Class pathClassLoader = Class.forName("dalvik.system.PathClassLoader"); + if (pathClassLoader == null) { + throw new AssertionError("Couldn't find path class loader class"); + } + Constructor constructor = + pathClassLoader.getDeclaredConstructor(String.class, ClassLoader.class); + try { + testUnloadClass(constructor); + testUnloadLoader(constructor); + // Test that we don't unload if we have a Method keeping the class live. + testNoUnloadInvoke(constructor); + // Test that we don't unload if we have an instance. + testNoUnloadInstance(constructor); + // Test JNI_OnLoad and JNI_OnUnload. + testLoadAndUnloadLibrary(constructor); + // Test that stack traces keep the classes live. + testStackTrace(constructor); + // Stress test to make sure we dont leak memory. + stressTest(constructor); + } catch (Exception e) { + System.out.println(e); + } + } + + private static void stressTest(Constructor constructor) throws Exception { + for (int i = 0; i <= 100; ++i) { + setUpUnloadLoader(constructor, false); + if (i % 10 == 0) { + Runtime.getRuntime().gc(); + } + } + } + + private static void testUnloadClass(Constructor constructor) throws Exception { + WeakReference<Class> klass = setUpUnloadClass(constructor); + // No strong refernces to class loader, should get unloaded. + Runtime.getRuntime().gc(); + WeakReference<Class> klass2 = setUpUnloadClass(constructor); + Runtime.getRuntime().gc(); + // If the weak reference is cleared, then it was unloaded. + System.out.println(klass.get()); + System.out.println(klass2.get()); + } + + private static void testUnloadLoader(Constructor constructor) + throws Exception { + WeakReference<ClassLoader> loader = setUpUnloadLoader(constructor, true); + // No strong refernces to class loader, should get unloaded. + Runtime.getRuntime().gc(); + // If the weak reference is cleared, then it was unloaded. + System.out.println(loader.get()); + } + + private static void testStackTrace(Constructor constructor) throws Exception { + WeakReference<Class> klass = setUpUnloadClass(constructor); + Method stackTraceMethod = klass.get().getDeclaredMethod("generateStackTrace"); + Throwable throwable = (Throwable) stackTraceMethod.invoke(klass.get()); + stackTraceMethod = null; + Runtime.getRuntime().gc(); + boolean isNull = klass.get() == null; + System.out.println("class null " + isNull + " " + throwable.getMessage()); + } + + private static void testLoadAndUnloadLibrary(Constructor constructor) throws Exception { + WeakReference<ClassLoader> loader = setUpLoadLibrary(constructor); + // No strong refernces to class loader, should get unloaded. + Runtime.getRuntime().gc(); + // If the weak reference is cleared, then it was unloaded. + System.out.println(loader.get()); + } + + private static void testNoUnloadInvoke(Constructor constructor) throws Exception { + WeakReference<ClassLoader> loader = + new WeakReference((ClassLoader) constructor.newInstance( + DEX_FILE, ClassLoader.getSystemClassLoader())); + WeakReference<Class> intHolder = new WeakReference(loader.get().loadClass("IntHolder")); + intHolder.get().getDeclaredMethod("runGC").invoke(intHolder.get()); + boolean isNull = loader.get() == null; + System.out.println("loader null " + isNull); + } + + private static void testNoUnloadInstance(Constructor constructor) throws Exception { + WeakReference<ClassLoader> loader = + new WeakReference((ClassLoader) constructor.newInstance( + DEX_FILE, ClassLoader.getSystemClassLoader())); + WeakReference<Class> intHolder = new WeakReference(loader.get().loadClass("IntHolder")); + Object o = intHolder.get().newInstance(); + Runtime.getRuntime().gc(); + boolean isNull = loader.get() == null; + System.out.println("loader null " + isNull); + } + + private static WeakReference<Class> setUpUnloadClass(Constructor constructor) throws Exception { + ClassLoader loader = (ClassLoader) constructor.newInstance( + DEX_FILE, ClassLoader.getSystemClassLoader()); + Class intHolder = loader.loadClass("IntHolder"); + Method getValue = intHolder.getDeclaredMethod("getValue"); + Method setValue = intHolder.getDeclaredMethod("setValue", Integer.TYPE); + // Make sure we don't accidentally preserve the value in the int holder, the class + // initializer should be re-run. + System.out.println((int) getValue.invoke(intHolder)); + setValue.invoke(intHolder, 2); + System.out.println((int) getValue.invoke(intHolder)); + waitForCompilation(intHolder); + return new WeakReference(intHolder); + } + + private static WeakReference<ClassLoader> setUpUnloadLoader(Constructor constructor, + boolean waitForCompilation) + throws Exception { + ClassLoader loader = (ClassLoader) constructor.newInstance( + DEX_FILE, ClassLoader.getSystemClassLoader()); + Class intHolder = loader.loadClass("IntHolder"); + Method setValue = intHolder.getDeclaredMethod("setValue", Integer.TYPE); + setValue.invoke(intHolder, 2); + if (waitForCompilation) { + waitForCompilation(intHolder); + } + return new WeakReference(loader); + } + + private static void waitForCompilation(Class intHolder) throws Exception { + // Load the native library so that we can call waitForCompilation. + Method loadLibrary = intHolder.getDeclaredMethod("loadLibrary", String.class); + loadLibrary.invoke(intHolder, nativeLibraryName); + // Wait for JIT compilation to finish since the async threads may prevent unloading. + Method waitForCompilation = intHolder.getDeclaredMethod("waitForCompilation"); + waitForCompilation.invoke(intHolder); + } + + private static WeakReference<ClassLoader> setUpLoadLibrary(Constructor constructor) + throws Exception { + ClassLoader loader = (ClassLoader) constructor.newInstance( + DEX_FILE, ClassLoader.getSystemClassLoader()); + Class intHolder = loader.loadClass("IntHolder"); + Method loadLibrary = intHolder.getDeclaredMethod("loadLibrary", String.class); + loadLibrary.invoke(intHolder, nativeLibraryName); + return new WeakReference(loader); + } +} diff --git a/test/449-checker-bce/expected.txt b/test/449-checker-bce/expected.txt index e114c50371..4665d7af8b 100644 --- a/test/449-checker-bce/expected.txt +++ b/test/449-checker-bce/expected.txt @@ -1 +1,2 @@ +JNI_OnLoad called java.lang.ArrayIndexOutOfBoundsException: length=5; index=82 diff --git a/test/449-checker-bce/src/Main.java b/test/449-checker-bce/src/Main.java index a746664160..22829cddc8 100644 --- a/test/449-checker-bce/src/Main.java +++ b/test/449-checker-bce/src/Main.java @@ -249,6 +249,26 @@ public class Main { array[Integer.MAX_VALUE - 998] = 1; } + /// CHECK-START: void Main.constantIndexing6(int[]) BCE (before) + /// CHECK: BoundsCheck + /// CHECK: ArraySet + /// CHECK: BoundsCheck + /// CHECK: ArraySet + + /// CHECK-START: void Main.constantIndexing6(int[]) BCE (after) + /// CHECK: Deoptimize + + static void constantIndexing6(int[] array) { + array[3] = 1; + array[4] = 1; + } + + // A helper into which the actual throwing function should be inlined. + static void constantIndexingForward6(int[] array) { + assertIsManaged(); + constantIndexing6(array); + } + /// CHECK-START: void Main.loopPattern1(int[]) BCE (before) /// CHECK: BoundsCheck /// CHECK: ArraySet @@ -599,10 +619,19 @@ public class Main { static int foo() { try { + assertIsManaged(); // This will cause AIOOBE. constantIndexing2(new int[3]); } catch (ArrayIndexOutOfBoundsException e) { - return 99; + assertIsManaged(); // This is to ensure that single-frame deoptimization works. + // Will need to be updated if constantIndexing2 is inlined. + try { + // This will cause AIOOBE. + constantIndexingForward6(new int[3]); + } catch (ArrayIndexOutOfBoundsException e2) { + assertIsManaged(); + return 99; + } } return 0; } @@ -610,13 +639,13 @@ public class Main { int sum; - /// CHECK-START: void Main.foo1(int[], int, int) BCE (before) + /// CHECK-START: void Main.foo1(int[], int, int, boolean) BCE (before) /// CHECK: BoundsCheck /// CHECK: ArraySet /// CHECK-NOT: BoundsCheck /// CHECK: ArrayGet - /// CHECK-START: void Main.foo1(int[], int, int) BCE (after) + /// CHECK-START: void Main.foo1(int[], int, int, boolean) BCE (after) /// CHECK: Phi /// CHECK-NOT: BoundsCheck /// CHECK: ArraySet @@ -633,25 +662,30 @@ public class Main { /// CHECK: Phi /// CHECK: Goto - void foo1(int[] array, int start, int end) { + void foo1(int[] array, int start, int end, boolean expectInterpreter) { // Three HDeoptimize will be added. One for // start >= 0, one for end <= array.length, // and one for null check on array (to hoist null // check and array.length out of loop). for (int i = start ; i < end; i++) { + if (expectInterpreter) { + assertIsInterpreted(); + } else { + assertIsManaged(); + } array[i] = 1; sum += array[i]; } } - /// CHECK-START: void Main.foo2(int[], int, int) BCE (before) + /// CHECK-START: void Main.foo2(int[], int, int, boolean) BCE (before) /// CHECK: BoundsCheck /// CHECK: ArraySet /// CHECK-NOT: BoundsCheck /// CHECK: ArrayGet - /// CHECK-START: void Main.foo2(int[], int, int) BCE (after) + /// CHECK-START: void Main.foo2(int[], int, int, boolean) BCE (after) /// CHECK: Phi /// CHECK-NOT: BoundsCheck /// CHECK: ArraySet @@ -668,25 +702,30 @@ public class Main { /// CHECK: Phi /// CHECK: Goto - void foo2(int[] array, int start, int end) { + void foo2(int[] array, int start, int end, boolean expectInterpreter) { // Three HDeoptimize will be added. One for // start >= 0, one for end <= array.length, // and one for null check on array (to hoist null // check and array.length out of loop). for (int i = start ; i <= end; i++) { + if (expectInterpreter) { + assertIsInterpreted(); + } else { + assertIsManaged(); + } array[i] = 1; sum += array[i]; } } - /// CHECK-START: void Main.foo3(int[], int) BCE (before) + /// CHECK-START: void Main.foo3(int[], int, boolean) BCE (before) /// CHECK: BoundsCheck /// CHECK: ArraySet /// CHECK-NOT: BoundsCheck /// CHECK: ArrayGet - /// CHECK-START: void Main.foo3(int[], int) BCE (after) + /// CHECK-START: void Main.foo3(int[], int, boolean) BCE (after) /// CHECK: Phi /// CHECK-NOT: BoundsCheck /// CHECK: ArraySet @@ -702,24 +741,29 @@ public class Main { /// CHECK: Phi /// CHECK: Goto - void foo3(int[] array, int end) { + void foo3(int[] array, int end, boolean expectInterpreter) { // Two HDeoptimize will be added. One for end < array.length, // and one for null check on array (to hoist null check // and array.length out of loop). for (int i = 3 ; i <= end; i++) { + if (expectInterpreter) { + assertIsInterpreted(); + } else { + assertIsManaged(); + } array[i] = 1; sum += array[i]; } } - /// CHECK-START: void Main.foo4(int[], int) BCE (before) + /// CHECK-START: void Main.foo4(int[], int, boolean) BCE (before) /// CHECK: BoundsCheck /// CHECK: ArraySet /// CHECK-NOT: BoundsCheck /// CHECK: ArrayGet - /// CHECK-START: void Main.foo4(int[], int) BCE (after) + /// CHECK-START: void Main.foo4(int[], int, boolean) BCE (after) /// CHECK: Phi /// CHECK-NOT: BoundsCheck /// CHECK: ArraySet @@ -735,18 +779,23 @@ public class Main { /// CHECK: Phi /// CHECK: Goto - void foo4(int[] array, int end) { + void foo4(int[] array, int end, boolean expectInterpreter) { // Two HDeoptimize will be added. One for end <= array.length, // and one for null check on array (to hoist null check // and array.length out of loop). for (int i = end ; i > 0; i--) { + if (expectInterpreter) { + assertIsInterpreted(); + } else { + assertIsManaged(); + } array[i - 1] = 1; sum += array[i - 1]; } } - /// CHECK-START: void Main.foo5(int[], int) BCE (before) + /// CHECK-START: void Main.foo5(int[], int, boolean) BCE (before) /// CHECK: BoundsCheck /// CHECK: ArraySet /// CHECK: BoundsCheck @@ -756,7 +805,7 @@ public class Main { /// CHECK: BoundsCheck /// CHECK: ArrayGet - /// CHECK-START: void Main.foo5(int[], int) BCE (after) + /// CHECK-START: void Main.foo5(int[], int, boolean) BCE (after) /// CHECK-NOT: BoundsCheck /// CHECK: ArraySet /// CHECK: Phi @@ -776,7 +825,7 @@ public class Main { /// CHECK-NOT: Phi /// CHECK: Goto - void foo5(int[] array, int end) { + void foo5(int[] array, int end, boolean expectInterpreter) { // Bounds check in this loop can be eliminated without deoptimization. for (int i = array.length - 1 ; i >= 0; i--) { array[i] = 1; @@ -784,6 +833,11 @@ public class Main { // One HDeoptimize will be added. // It's for (end - 2 <= array.length - 2). for (int i = end - 2 ; i > 0; i--) { + if (expectInterpreter) { + assertIsInterpreted(); + } else { + assertIsManaged(); + } sum += array[i - 1]; sum += array[i]; sum += array[i + 1]; @@ -791,7 +845,7 @@ public class Main { } - /// CHECK-START: void Main.foo6(int[], int, int) BCE (before) + /// CHECK-START: void Main.foo6(int[], int, int, boolean) BCE (before) /// CHECK: BoundsCheck /// CHECK: ArrayGet /// CHECK: BoundsCheck @@ -805,7 +859,7 @@ public class Main { /// CHECK-NOT: BoundsCheck /// CHECK: ArraySet - /// CHECK-START: void Main.foo6(int[], int, int) BCE (after) + /// CHECK-START: void Main.foo6(int[], int, int, boolean) BCE (after) /// CHECK: Phi /// CHECK-NOT: BoundsCheck /// CHECK: ArrayGet @@ -831,12 +885,17 @@ public class Main { /// CHECK: Goto /// CHECK-NOT: Deoptimize - void foo6(int[] array, int start, int end) { + void foo6(int[] array, int start, int end, boolean expectInterpreter) { // Three HDeoptimize will be added. One for // start >= 2, one for end <= array.length - 3, // and one for null check on array (to hoist null // check and array.length out of loop). for (int i = end; i >= start; i--) { + if (expectInterpreter) { + assertIsInterpreted(); + } else { + assertIsManaged(); + } array[i] = (array[i-2] + array[i-1] + array[i] + array[i+1] + array[i+2]) / 5; } } @@ -924,12 +983,12 @@ public class Main { } - /// CHECK-START: void Main.foo9(int[]) BCE (before) + /// CHECK-START: void Main.foo9(int[], boolean) BCE (before) /// CHECK: NullCheck /// CHECK: BoundsCheck /// CHECK: ArrayGet - /// CHECK-START: void Main.foo9(int[]) BCE (after) + /// CHECK-START: void Main.foo9(int[], boolean) BCE (after) // The loop is guaranteed to be entered. No need to transform the // loop for loop body entry test. /// CHECK: Deoptimize @@ -940,10 +999,15 @@ public class Main { /// CHECK-NOT: BoundsCheck /// CHECK: ArrayGet - void foo9(int[] array) { + void foo9(int[] array, boolean expectInterpreter) { // Two HDeoptimize will be added. One for // 10 <= array.length, and one for null check on array. for (int i = 0 ; i < 10; i++) { + if (expectInterpreter) { + assertIsInterpreted(); + } else { + assertIsManaged(); + } sum += array[i]; } } @@ -975,7 +1039,7 @@ public class Main { static void testUnknownBounds() { boolean caught = false; Main main = new Main(); - main.foo1(new int[10], 0, 10); + main.foo1(new int[10], 0, 10, false); if (main.sum != 10) { System.out.println("foo1 failed!"); } @@ -983,7 +1047,7 @@ public class Main { caught = false; main = new Main(); try { - main.foo1(new int[10], 0, 11); + main.foo1(new int[10], 0, 11, true); } catch (ArrayIndexOutOfBoundsException e) { caught = true; } @@ -992,7 +1056,7 @@ public class Main { } main = new Main(); - main.foo2(new int[10], 0, 9); + main.foo2(new int[10], 0, 9, false); if (main.sum != 10) { System.out.println("foo2 failed!"); } @@ -1000,7 +1064,7 @@ public class Main { caught = false; main = new Main(); try { - main.foo2(new int[10], 0, 10); + main.foo2(new int[10], 0, 10, true); } catch (ArrayIndexOutOfBoundsException e) { caught = true; } @@ -1009,7 +1073,7 @@ public class Main { } main = new Main(); - main.foo3(new int[10], 9); + main.foo3(new int[10], 9, false); if (main.sum != 7) { System.out.println("foo3 failed!"); } @@ -1017,7 +1081,7 @@ public class Main { caught = false; main = new Main(); try { - main.foo3(new int[10], 10); + main.foo3(new int[10], 10, true); } catch (ArrayIndexOutOfBoundsException e) { caught = true; } @@ -1026,7 +1090,7 @@ public class Main { } main = new Main(); - main.foo4(new int[10], 10); + main.foo4(new int[10], 10, false); if (main.sum != 10) { System.out.println("foo4 failed!"); } @@ -1034,7 +1098,7 @@ public class Main { caught = false; main = new Main(); try { - main.foo4(new int[10], 11); + main.foo4(new int[10], 11, true); } catch (ArrayIndexOutOfBoundsException e) { caught = true; } @@ -1043,7 +1107,7 @@ public class Main { } main = new Main(); - main.foo5(new int[10], 10); + main.foo5(new int[10], 10, false); if (main.sum != 24) { System.out.println("foo5 failed!"); } @@ -1051,7 +1115,7 @@ public class Main { caught = false; main = new Main(); try { - main.foo5(new int[10], 11); + main.foo5(new int[10], 11, true); } catch (ArrayIndexOutOfBoundsException e) { caught = true; } @@ -1060,11 +1124,11 @@ public class Main { } main = new Main(); - main.foo6(new int[10], 2, 7); + main.foo6(new int[10], 2, 7, false); main = new Main(); int[] array9 = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; - main.foo9(array9); + main.foo9(array9, false); if (main.sum != 45) { System.out.println("foo9 failed!"); } @@ -1080,7 +1144,7 @@ public class Main { caught = false; main = new Main(); try { - main.foo6(new int[10], 2, 8); + main.foo6(new int[10], 2, 8, true); } catch (ArrayIndexOutOfBoundsException e) { caught = true; } @@ -1091,7 +1155,7 @@ public class Main { caught = false; main = new Main(); try { - main.foo6(new int[10], 1, 7); + main.foo6(new int[10], 1, 7, true); } catch (ArrayIndexOutOfBoundsException e) { caught = true; } @@ -1128,6 +1192,15 @@ public class Main { /// CHECK: ParallelMove public static void main(String[] args) { + System.loadLibrary(args[0]); + + if (!compiledWithOptimizing() || + !hasOatFile() || + runtimeIsSoftFail() || + isInterpreted()) { + disableStackFrameAsserts(); + } + sieve(20); int[] array = {5, 2, 3, 7, 0, 1, 6, 4}; @@ -1166,4 +1239,11 @@ public class Main { new Main().testExceptionMessage(); } + public static native boolean compiledWithOptimizing(); + public static native void disableStackFrameAsserts(); + public static native void assertIsManaged(); + public static native void assertIsInterpreted(); + public static native boolean hasOatFile(); + public static native boolean runtimeIsSoftFail(); + public static native boolean isInterpreted(); } diff --git a/test/454-get-vreg/expected.txt b/test/454-get-vreg/expected.txt index e69de29bb2..6a5618ebc6 100644 --- a/test/454-get-vreg/expected.txt +++ b/test/454-get-vreg/expected.txt @@ -0,0 +1 @@ +JNI_OnLoad called diff --git a/test/455-set-vreg/expected.txt b/test/455-set-vreg/expected.txt index e69de29bb2..6a5618ebc6 100644 --- a/test/455-set-vreg/expected.txt +++ b/test/455-set-vreg/expected.txt @@ -0,0 +1 @@ +JNI_OnLoad called diff --git a/test/457-regs/expected.txt b/test/457-regs/expected.txt index e69de29bb2..6a5618ebc6 100644 --- a/test/457-regs/expected.txt +++ b/test/457-regs/expected.txt @@ -0,0 +1 @@ +JNI_OnLoad called diff --git a/test/458-checker-instruction-simplification/src/Main.java b/test/458-checker-instruction-simplification/src/Main.java index a14200e7ce..c32d34aa6f 100644 --- a/test/458-checker-instruction-simplification/src/Main.java +++ b/test/458-checker-instruction-simplification/src/Main.java @@ -84,6 +84,172 @@ public class Main { return arg & -1; } + /// CHECK-START: int Main.UShr28And15(int) instruction_simplifier (before) + /// CHECK-DAG: <<Arg:i\d+>> ParameterValue + /// CHECK-DAG: <<Const28:i\d+>> IntConstant 28 + /// CHECK-DAG: <<Const15:i\d+>> IntConstant 15 + /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Arg>>,<<Const28>>] + /// CHECK-DAG: <<And:i\d+>> And [<<UShr>>,<<Const15>>] + /// CHECK-DAG: Return [<<And>>] + + /// CHECK-START: int Main.UShr28And15(int) instruction_simplifier (after) + /// CHECK-DAG: <<Arg:i\d+>> ParameterValue + /// CHECK-DAG: <<Const28:i\d+>> IntConstant 28 + /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Arg>>,<<Const28>>] + /// CHECK-DAG: Return [<<UShr>>] + + /// CHECK-START: int Main.UShr28And15(int) instruction_simplifier (after) + /// CHECK-NOT: And + + public static int UShr28And15(int arg) { + return (arg >>> 28) & 15; + } + + /// CHECK-START: long Main.UShr60And15(long) instruction_simplifier (before) + /// CHECK-DAG: <<Arg:j\d+>> ParameterValue + /// CHECK-DAG: <<Const60:i\d+>> IntConstant 60 + /// CHECK-DAG: <<Const15:j\d+>> LongConstant 15 + /// CHECK-DAG: <<UShr:j\d+>> UShr [<<Arg>>,<<Const60>>] + /// CHECK-DAG: <<And:j\d+>> And [<<UShr>>,<<Const15>>] + /// CHECK-DAG: Return [<<And>>] + + /// CHECK-START: long Main.UShr60And15(long) instruction_simplifier (after) + /// CHECK-DAG: <<Arg:j\d+>> ParameterValue + /// CHECK-DAG: <<Const60:i\d+>> IntConstant 60 + /// CHECK-DAG: <<UShr:j\d+>> UShr [<<Arg>>,<<Const60>>] + /// CHECK-DAG: Return [<<UShr>>] + + /// CHECK-START: long Main.UShr60And15(long) instruction_simplifier (after) + /// CHECK-NOT: And + + public static long UShr60And15(long arg) { + return (arg >>> 60) & 15; + } + + /// CHECK-START: int Main.UShr28And7(int) instruction_simplifier (before) + /// CHECK-DAG: <<Arg:i\d+>> ParameterValue + /// CHECK-DAG: <<Const28:i\d+>> IntConstant 28 + /// CHECK-DAG: <<Const7:i\d+>> IntConstant 7 + /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Arg>>,<<Const28>>] + /// CHECK-DAG: <<And:i\d+>> And [<<UShr>>,<<Const7>>] + /// CHECK-DAG: Return [<<And>>] + + /// CHECK-START: int Main.UShr28And7(int) instruction_simplifier (after) + /// CHECK-DAG: <<Arg:i\d+>> ParameterValue + /// CHECK-DAG: <<Const28:i\d+>> IntConstant 28 + /// CHECK-DAG: <<Const7:i\d+>> IntConstant 7 + /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Arg>>,<<Const28>>] + /// CHECK-DAG: <<And:i\d+>> And [<<UShr>>,<<Const7>>] + /// CHECK-DAG: Return [<<And>>] + + public static int UShr28And7(int arg) { + return (arg >>> 28) & 7; + } + + /// CHECK-START: long Main.UShr60And7(long) instruction_simplifier (before) + /// CHECK-DAG: <<Arg:j\d+>> ParameterValue + /// CHECK-DAG: <<Const60:i\d+>> IntConstant 60 + /// CHECK-DAG: <<Const7:j\d+>> LongConstant 7 + /// CHECK-DAG: <<UShr:j\d+>> UShr [<<Arg>>,<<Const60>>] + /// CHECK-DAG: <<And:j\d+>> And [<<UShr>>,<<Const7>>] + /// CHECK-DAG: Return [<<And>>] + + /// CHECK-START: long Main.UShr60And7(long) instruction_simplifier (after) + /// CHECK-DAG: <<Arg:j\d+>> ParameterValue + /// CHECK-DAG: <<Const60:i\d+>> IntConstant 60 + /// CHECK-DAG: <<Const7:j\d+>> LongConstant 7 + /// CHECK-DAG: <<UShr:j\d+>> UShr [<<Arg>>,<<Const60>>] + /// CHECK-DAG: <<And:j\d+>> And [<<UShr>>,<<Const7>>] + /// CHECK-DAG: Return [<<And>>] + + public static long UShr60And7(long arg) { + return (arg >>> 60) & 7; + } + + /// CHECK-START: int Main.Shr24And255(int) instruction_simplifier (before) + /// CHECK-DAG: <<Arg:i\d+>> ParameterValue + /// CHECK-DAG: <<Const24:i\d+>> IntConstant 24 + /// CHECK-DAG: <<Const255:i\d+>> IntConstant 255 + /// CHECK-DAG: <<Shr:i\d+>> Shr [<<Arg>>,<<Const24>>] + /// CHECK-DAG: <<And:i\d+>> And [<<Shr>>,<<Const255>>] + /// CHECK-DAG: Return [<<And>>] + + /// CHECK-START: int Main.Shr24And255(int) instruction_simplifier (after) + /// CHECK-DAG: <<Arg:i\d+>> ParameterValue + /// CHECK-DAG: <<Const24:i\d+>> IntConstant 24 + /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Arg>>,<<Const24>>] + /// CHECK-DAG: Return [<<UShr>>] + + /// CHECK-START: int Main.Shr24And255(int) instruction_simplifier (after) + /// CHECK-NOT: Shr + /// CHECK-NOT: And + + public static int Shr24And255(int arg) { + return (arg >> 24) & 255; + } + + /// CHECK-START: long Main.Shr56And255(long) instruction_simplifier (before) + /// CHECK-DAG: <<Arg:j\d+>> ParameterValue + /// CHECK-DAG: <<Const56:i\d+>> IntConstant 56 + /// CHECK-DAG: <<Const255:j\d+>> LongConstant 255 + /// CHECK-DAG: <<Shr:j\d+>> Shr [<<Arg>>,<<Const56>>] + /// CHECK-DAG: <<And:j\d+>> And [<<Shr>>,<<Const255>>] + /// CHECK-DAG: Return [<<And>>] + + /// CHECK-START: long Main.Shr56And255(long) instruction_simplifier (after) + /// CHECK-DAG: <<Arg:j\d+>> ParameterValue + /// CHECK-DAG: <<Const56:i\d+>> IntConstant 56 + /// CHECK-DAG: <<UShr:j\d+>> UShr [<<Arg>>,<<Const56>>] + /// CHECK-DAG: Return [<<UShr>>] + + /// CHECK-START: long Main.Shr56And255(long) instruction_simplifier (after) + /// CHECK-NOT: Shr + /// CHECK-NOT: And + + public static long Shr56And255(long arg) { + return (arg >> 56) & 255; + } + + /// CHECK-START: int Main.Shr24And127(int) instruction_simplifier (before) + /// CHECK-DAG: <<Arg:i\d+>> ParameterValue + /// CHECK-DAG: <<Const24:i\d+>> IntConstant 24 + /// CHECK-DAG: <<Const127:i\d+>> IntConstant 127 + /// CHECK-DAG: <<Shr:i\d+>> Shr [<<Arg>>,<<Const24>>] + /// CHECK-DAG: <<And:i\d+>> And [<<Shr>>,<<Const127>>] + /// CHECK-DAG: Return [<<And>>] + + /// CHECK-START: int Main.Shr24And127(int) instruction_simplifier (after) + /// CHECK-DAG: <<Arg:i\d+>> ParameterValue + /// CHECK-DAG: <<Const24:i\d+>> IntConstant 24 + /// CHECK-DAG: <<Const127:i\d+>> IntConstant 127 + /// CHECK-DAG: <<Shr:i\d+>> Shr [<<Arg>>,<<Const24>>] + /// CHECK-DAG: <<And:i\d+>> And [<<Shr>>,<<Const127>>] + /// CHECK-DAG: Return [<<And>>] + + public static int Shr24And127(int arg) { + return (arg >> 24) & 127; + } + + /// CHECK-START: long Main.Shr56And127(long) instruction_simplifier (before) + /// CHECK-DAG: <<Arg:j\d+>> ParameterValue + /// CHECK-DAG: <<Const56:i\d+>> IntConstant 56 + /// CHECK-DAG: <<Const127:j\d+>> LongConstant 127 + /// CHECK-DAG: <<Shr:j\d+>> Shr [<<Arg>>,<<Const56>>] + /// CHECK-DAG: <<And:j\d+>> And [<<Shr>>,<<Const127>>] + /// CHECK-DAG: Return [<<And>>] + + /// CHECK-START: long Main.Shr56And127(long) instruction_simplifier (after) + /// CHECK-DAG: <<Arg:j\d+>> ParameterValue + /// CHECK-DAG: <<Const56:i\d+>> IntConstant 56 + /// CHECK-DAG: <<Const127:j\d+>> LongConstant 127 + /// CHECK-DAG: <<Shr:j\d+>> Shr [<<Arg>>,<<Const56>>] + /// CHECK-DAG: <<And:j\d+>> And [<<Shr>>,<<Const127>>] + /// CHECK-DAG: Return [<<And>>] + + public static long Shr56And127(long arg) { + return (arg >> 56) & 127; + } + /// CHECK-START: long Main.Div1(long) instruction_simplifier (before) /// CHECK-DAG: <<Arg:j\d+>> ParameterValue /// CHECK-DAG: <<Const1:j\d+>> LongConstant 1 @@ -1109,5 +1275,13 @@ public class Main { assertFloatEquals(DivMP25(100.0f), -400.0f); assertDoubleEquals(DivMP25(150.0), -600.0); assertLongEquals(Shl1(100), 200); + assertIntEquals(UShr28And15(0xc1234567), 0xc); + assertLongEquals(UShr60And15(0xc123456787654321L), 0xcL); + assertIntEquals(UShr28And7(0xc1234567), 0x4); + assertLongEquals(UShr60And7(0xc123456787654321L), 0x4L); + assertIntEquals(Shr24And255(0xc1234567), 0xc1); + assertLongEquals(Shr56And255(0xc123456787654321L), 0xc1L); + assertIntEquals(Shr24And127(0xc1234567), 0x41); + assertLongEquals(Shr56And127(0xc123456787654321L), 0x41L); } } diff --git a/test/461-get-reference-vreg/expected.txt b/test/461-get-reference-vreg/expected.txt index e69de29bb2..6a5618ebc6 100644 --- a/test/461-get-reference-vreg/expected.txt +++ b/test/461-get-reference-vreg/expected.txt @@ -0,0 +1 @@ +JNI_OnLoad called diff --git a/test/466-get-live-vreg/expected.txt b/test/466-get-live-vreg/expected.txt index e69de29bb2..6a5618ebc6 100644 --- a/test/466-get-live-vreg/expected.txt +++ b/test/466-get-live-vreg/expected.txt @@ -0,0 +1 @@ +JNI_OnLoad called diff --git a/test/466-get-live-vreg/get_live_vreg_jni.cc b/test/466-get-live-vreg/get_live_vreg_jni.cc index e3e00918f8..7e9a583faf 100644 --- a/test/466-get-live-vreg/get_live_vreg_jni.cc +++ b/test/466-get-live-vreg/get_live_vreg_jni.cc @@ -42,7 +42,9 @@ class TestVisitor : public StackVisitor { } else if (m_name.compare("testIntervalHole") == 0) { found_method_ = true; uint32_t value = 0; - if (GetCurrentQuickFrame() != nullptr && m->IsOptimized(sizeof(void*))) { + if (GetCurrentQuickFrame() != nullptr && + m->IsOptimized(sizeof(void*)) && + !Runtime::Current()->IsDebuggable()) { CHECK_EQ(GetVReg(m, 0, kIntVReg, &value), false); } else { CHECK(GetVReg(m, 0, kIntVReg, &value)); diff --git a/test/482-checker-loop-back-edge-use/src/Main.java b/test/482-checker-loop-back-edge-use/src/Main.java index 2cfb04d652..6b4da9de27 100644 --- a/test/482-checker-loop-back-edge-use/src/Main.java +++ b/test/482-checker-loop-back-edge-use/src/Main.java @@ -18,16 +18,27 @@ public class Main { /// CHECK-START: void Main.loop1(boolean) liveness (after) - /// CHECK: ParameterValue liveness:2 ranges:{[2,22)} uses:[17,22] - /// CHECK: Goto liveness:20 + /// CHECK: <<Arg:z\d+>> ParameterValue liveness:<<ArgLiv:\d+>> ranges:{[<<ArgLiv>>,<<ArgLoopUse:\d+>>)} uses:[<<ArgUse:\d+>>,<<ArgLoopUse>>] + /// CHECK: If [<<Arg>>] liveness:<<IfLiv:\d+>> + /// CHECK: Goto liveness:<<GotoLiv:\d+>> + /// CHECK: Exit + /// CHECK-EVAL: <<IfLiv>> + 1 == <<ArgUse>> + /// CHECK-EVAL: <<GotoLiv>> + 2 == <<ArgLoopUse>> + public static void loop1(boolean incoming) { while (incoming) {} } /// CHECK-START: void Main.loop2(boolean) liveness (after) - /// CHECK: ParameterValue liveness:4 ranges:{[4,44)} uses:[35,40,44] - /// CHECK: Goto liveness:38 - /// CHECK: Goto liveness:42 + /// CHECK: <<Arg:z\d+>> ParameterValue liveness:<<ArgLiv:\d+>> ranges:{[<<ArgLiv>>,<<ArgLoopUse2:\d+>>)} uses:[<<ArgUse:\d+>>,<<ArgLoopUse1:\d+>>,<<ArgLoopUse2>>] + /// CHECK: If [<<Arg>>] liveness:<<IfLiv:\d+>> + /// CHECK: Goto liveness:<<GotoLiv1:\d+>> + /// CHECK: Goto liveness:<<GotoLiv2:\d+>> + /// CHECK-EVAL: <<IfLiv>> + 1 == <<ArgUse>> + /// CHECK-EVAL: <<GotoLiv1>> < <<GotoLiv2>> + /// CHECK-EVAL: <<GotoLiv1>> + 2 == <<ArgLoopUse1>> + /// CHECK-EVAL: <<GotoLiv2>> + 2 == <<ArgLoopUse2>> + public static void loop2(boolean incoming) { while (true) { System.out.println("foo"); @@ -36,11 +47,14 @@ public class Main { } /// CHECK-START: void Main.loop3(boolean) liveness (after) - /// CHECK: ParameterValue liveness:4 ranges:{[4,60)} uses:[56,60] - /// CHECK: Goto liveness:58 + /// CHECK: <<Arg:z\d+>> ParameterValue liveness:<<ArgLiv:\d+>> ranges:{[<<ArgLiv>>,<<ArgLoopUse:\d+>>)} uses:[<<ArgUse:\d+>>,<<ArgLoopUse>>] + /// CHECK: Goto liveness:<<GotoLiv1:\d+>> + /// CHECK: InvokeVirtual [{{l\d+}},<<Arg>>] method_name:java.io.PrintStream.println liveness:<<InvokeLiv:\d+>> + /// CHECK: Goto liveness:<<GotoLiv2:\d+>> + /// CHECK-EVAL: <<InvokeLiv>> == <<ArgUse>> + /// CHECK-EVAL: <<GotoLiv1>> < <<GotoLiv2>> + /// CHECK-EVAL: <<GotoLiv2>> + 2 == <<ArgLoopUse>> - // CHECK-START: void Main.loop3(boolean) liveness (after) - // CHECK-NOT: Goto liveness:50 public static void loop3(boolean incoming) { // 'incoming' only needs a use at the outer loop's back edge. while (System.currentTimeMillis() != 42) { @@ -49,11 +63,11 @@ public class Main { } } - // CHECK-START: void Main.loop4(boolean) liveness (after) - // CHECK: ParameterValue liveness:4 ranges:{[4,22)} uses:[22] + /// CHECK-START: void Main.loop4(boolean) liveness (after) + /// CHECK: <<Arg:z\d+>> ParameterValue liveness:<<ArgLiv:\d+>> ranges:{[<<ArgLiv>>,<<ArgUse:\d+>>)} uses:[<<ArgUse>>] + /// CHECK: InvokeVirtual [{{l\d+}},<<Arg>>] method_name:java.io.PrintStream.println liveness:<<InvokeLiv:\d+>> + /// CHECK-EVAL: <<InvokeLiv>> == <<ArgUse>> - // CHECK-START: void Main.loop4(boolean) liveness (after) - // CHECK-NOT: Goto liveness:18 public static void loop4(boolean incoming) { // 'incoming' has no loop use, so should not have back edge uses. System.out.println(incoming); @@ -63,59 +77,98 @@ public class Main { } /// CHECK-START: void Main.loop5(boolean) liveness (after) - /// CHECK: ParameterValue liveness:4 ranges:{[4,54)} uses:[37,46,50,54] - /// CHECK: Goto liveness:48 - /// CHECK: Goto liveness:52 + /// CHECK: <<Arg:z\d+>> ParameterValue liveness:<<ArgLiv:\d+>> ranges:{[<<ArgLiv>>,<<ArgLoopUse2:\d+>>)} uses:[<<ArgUse:\d+>>,<<ArgLoopUse1:\d+>>,<<ArgLoopUse2>>] + /// CHECK: InvokeVirtual [{{l\d+}},<<Arg>>] method_name:java.io.PrintStream.println liveness:<<InvokeLiv:\d+>> + /// CHECK: Goto liveness:<<GotoLiv1:\d+>> + /// CHECK: Goto liveness:<<GotoLiv2:\d+>> + /// CHECK: Exit + /// CHECK-EVAL: <<InvokeLiv>> == <<ArgUse>> + /// CHECK-EVAL: <<GotoLiv1>> < <<GotoLiv2>> + /// CHECK-EVAL: <<GotoLiv1>> + 2 == <<ArgLoopUse1>> + /// CHECK-EVAL: <<GotoLiv2>> + 2 == <<ArgLoopUse2>> + public static void loop5(boolean incoming) { // 'incoming' must have a use at both back edges. - while (Runtime.getRuntime() != null) { - while (incoming) { + for (long i = System.nanoTime(); i < 42; ++i) { + for (long j = System.currentTimeMillis(); j != 42; ++j) { System.out.println(incoming); } } } /// CHECK-START: void Main.loop6(boolean) liveness (after) - /// CHECK: ParameterValue liveness:4 ranges:{[4,50)} uses:[26,50] - /// CHECK: Goto liveness:48 + /// CHECK: <<Arg:z\d+>> ParameterValue liveness:<<ArgLiv:\d+>> ranges:{[<<ArgLiv>>,<<ArgLoopUse:\d+>>)} uses:[<<ArgUse:\d+>>,<<ArgLoopUse>>] + /// CHECK: InvokeVirtual [{{l\d+}},<<Arg>>] method_name:java.io.PrintStream.println liveness:<<InvokeLiv:\d+>> + /// CHECK: Add + /// CHECK: Goto liveness:<<GotoLiv1:\d+>> + /// CHECK: Add + /// CHECK: Goto liveness:<<GotoLiv2:\d+>> + /// CHECK: Exit + /// CHECK-EVAL: <<InvokeLiv>> == <<ArgUse>> + /// CHECK-EVAL: <<GotoLiv1>> < <<GotoLiv2>> + /// CHECK-EVAL: <<GotoLiv2>> + 2 == <<ArgLoopUse>> - /// CHECK-START: void Main.loop6(boolean) liveness (after) - /// CHECK-NOT: Goto liveness:24 public static void loop6(boolean incoming) { // 'incoming' must have a use only at the first loop's back edge. - while (true) { + for (long i = System.nanoTime(); i < 42; ++i) { System.out.println(incoming); - while (Runtime.getRuntime() != null) {} + for (long j = System.currentTimeMillis(); j != 42; ++j) {} } } /// CHECK-START: void Main.loop7(boolean) liveness (after) - /// CHECK: ParameterValue liveness:4 ranges:{[4,54)} uses:[36,45,50,54] - /// CHECK: Goto liveness:48 - /// CHECK: Goto liveness:52 + /// CHECK: <<Arg:z\d+>> ParameterValue liveness:<<ArgLiv:\d+>> ranges:{[<<ArgLiv>>,<<ArgLoopUse2:\d+>>)} uses:[<<ArgUse1:\d+>>,<<ArgUse2:\d+>>,<<ArgLoopUse1:\d+>>,<<ArgLoopUse2>>] + /// CHECK: InvokeVirtual [{{l\d+}},<<Arg>>] method_name:java.io.PrintStream.println liveness:<<InvokeLiv:\d+>> + /// CHECK: If [<<Arg>>] liveness:<<IfLiv:\d+>> + /// CHECK: Goto liveness:<<GotoLiv1:\d+>> + /// CHECK: Goto liveness:<<GotoLiv2:\d+>> + /// CHECK: Exit + /// CHECK-EVAL: <<InvokeLiv>> == <<ArgUse1>> + /// CHECK-EVAL: <<IfLiv>> + 1 == <<ArgUse2>> + /// CHECK-EVAL: <<GotoLiv1>> < <<GotoLiv2>> + /// CHECK-EVAL: <<GotoLiv1>> + 2 == <<ArgLoopUse1>> + /// CHECK-EVAL: <<GotoLiv2>> + 2 == <<ArgLoopUse2>> + public static void loop7(boolean incoming) { // 'incoming' must have a use at both back edges. while (Runtime.getRuntime() != null) { System.out.println(incoming); while (incoming) {} + System.nanoTime(); // beat back edge splitting } } /// CHECK-START: void Main.loop8() liveness (after) - /// CHECK: StaticFieldGet liveness:14 ranges:{[14,48)} uses:[39,44,48] - /// CHECK: Goto liveness:42 - /// CHECK: Goto liveness:46 + /// CHECK: <<Arg:z\d+>> StaticFieldGet liveness:<<ArgLiv:\d+>> ranges:{[<<ArgLiv>>,<<ArgLoopUse2:\d+>>)} uses:[<<ArgUse:\d+>>,<<ArgLoopUse1:\d+>>,<<ArgLoopUse2>>] + /// CHECK: If [<<Arg>>] liveness:<<IfLiv:\d+>> + /// CHECK: Goto liveness:<<GotoLiv1:\d+>> + /// CHECK: Goto liveness:<<GotoLiv2:\d+>> + /// CHECK: Exit + /// CHECK-EVAL: <<IfLiv>> + 1 == <<ArgUse>> + /// CHECK-EVAL: <<GotoLiv1>> < <<GotoLiv2>> + /// CHECK-EVAL: <<GotoLiv1>> + 2 == <<ArgLoopUse1>> + /// CHECK-EVAL: <<GotoLiv2>> + 2 == <<ArgLoopUse2>> + public static void loop8() { // 'incoming' must have a use at both back edges. boolean incoming = field; while (Runtime.getRuntime() != null) { + System.nanoTime(); // beat pre-header creation while (incoming) {} + System.nanoTime(); // beat back edge splitting } } /// CHECK-START: void Main.loop9() liveness (after) - /// CHECK: StaticFieldGet liveness:26 ranges:{[26,40)} uses:[35,40] - /// CHECK: Goto liveness:42 + /// CHECK: <<Arg:z\d+>> StaticFieldGet liveness:<<ArgLiv:\d+>> ranges:{[<<ArgLiv>>,<<ArgLoopUse:\d+>>)} uses:[<<ArgUse:\d+>>,<<ArgLoopUse>>] + /// CHECK: If [<<Arg>>] liveness:<<IfLiv:\d+>> + /// CHECK: Goto liveness:<<GotoLiv1:\d+>> + /// CHECK: Goto liveness:<<GotoLiv2:\d+>> + /// CHECK: Exit + /// CHECK-EVAL: <<IfLiv>> + 1 == <<ArgUse>> + /// CHECK-EVAL: <<GotoLiv1>> < <<GotoLiv2>> + /// CHECK-EVAL: <<GotoLiv1>> + 2 == <<ArgLoopUse>> + public static void loop9() { while (Runtime.getRuntime() != null) { // 'incoming' must only have a use in the inner loop. diff --git a/test/485-checker-dce-switch/expected.txt b/test/485-checker-dce-switch/expected.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/test/485-checker-dce-switch/expected.txt diff --git a/test/485-checker-dce-switch/info.txt b/test/485-checker-dce-switch/info.txt new file mode 100644 index 0000000000..6653526827 --- /dev/null +++ b/test/485-checker-dce-switch/info.txt @@ -0,0 +1 @@ +Tests that DCE can remove a packed switch. diff --git a/test/485-checker-dce-switch/src/Main.java b/test/485-checker-dce-switch/src/Main.java new file mode 100644 index 0000000000..019d876ec8 --- /dev/null +++ b/test/485-checker-dce-switch/src/Main.java @@ -0,0 +1,192 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class Main { + + public static int $inline$method() { + return 5; + } + + /// CHECK-START: int Main.wholeSwitchDead(int) dead_code_elimination_final (before) + /// CHECK-DAG: PackedSwitch + + /// CHECK-START: int Main.wholeSwitchDead(int) dead_code_elimination_final (after) + /// CHECK-DAG: <<Const100:i\d+>> IntConstant 100 + /// CHECK-DAG: Return [<<Const100>>] + + /// CHECK-START: int Main.wholeSwitchDead(int) dead_code_elimination_final (after) + /// CHECK-NOT: PackedSwitch + + public static int wholeSwitchDead(int j) { + int i = $inline$method(); + int l = 100; + if (i > 100) { + switch(j) { + case 1: + i++; + break; + case 2: + i = 99; + break; + case 3: + i = 100; + break; + case 4: + i = -100; + break; + case 5: + i = 7; + break; + case 6: + i = -9; + break; + } + l += i; + } + + return l; + } + + /// CHECK-START: int Main.constantSwitch_InRange() dead_code_elimination_final (before) + /// CHECK-DAG: PackedSwitch + + /// CHECK-START: int Main.constantSwitch_InRange() dead_code_elimination_final (after) + /// CHECK-DAG: <<Const7:i\d+>> IntConstant 7 + /// CHECK-DAG: Return [<<Const7>>] + + /// CHECK-START: int Main.constantSwitch_InRange() dead_code_elimination_final (after) + /// CHECK-NOT: PackedSwitch + + public static int constantSwitch_InRange() { + int i = $inline$method(); + switch(i) { + case 1: + i++; + break; + case 2: + i = 99; + break; + case 3: + i = 100; + break; + case 4: + i = -100; + break; + case 5: + i = 7; + break; + case 6: + i = -9; + break; + } + + return i; + } + + /// CHECK-START: int Main.constantSwitch_AboveRange() dead_code_elimination_final (before) + /// CHECK-DAG: PackedSwitch + + /// CHECK-START: int Main.constantSwitch_AboveRange() dead_code_elimination_final (after) + /// CHECK-DAG: <<Const15:i\d+>> IntConstant 15 + /// CHECK-DAG: Return [<<Const15>>] + + /// CHECK-START: int Main.constantSwitch_AboveRange() dead_code_elimination_final (after) + /// CHECK-NOT: PackedSwitch + + public static int constantSwitch_AboveRange() { + int i = $inline$method() + 10; + switch(i) { + case 1: + i++; + break; + case 2: + i = 99; + break; + case 3: + i = 100; + break; + case 4: + i = -100; + break; + case 5: + i = 7; + break; + case 6: + i = -9; + break; + } + + return i; + } + + /// CHECK-START: int Main.constantSwitch_BelowRange() dead_code_elimination_final (before) + /// CHECK-DAG: PackedSwitch + + /// CHECK-START: int Main.constantSwitch_BelowRange() dead_code_elimination_final (after) + /// CHECK-DAG: <<ConstM5:i\d+>> IntConstant -5 + /// CHECK-DAG: Return [<<ConstM5>>] + + /// CHECK-START: int Main.constantSwitch_BelowRange() dead_code_elimination_final (after) + /// CHECK-NOT: PackedSwitch + + public static int constantSwitch_BelowRange() { + int i = $inline$method() - 10; + switch(i) { + case 1: + i++; + break; + case 2: + i = 99; + break; + case 3: + i = 100; + break; + case 4: + i = -100; + break; + case 5: + i = 7; + break; + case 6: + i = -9; + break; + } + + return i; + } + + public static void main(String[] args) throws Exception { + int ret_val = wholeSwitchDead(10); + if (ret_val != 100) { + throw new Error("Incorrect return value from wholeSwitchDead:" + ret_val); + } + + ret_val = constantSwitch_InRange(); + if (ret_val != 7) { + throw new Error("Incorrect return value from constantSwitch_InRange:" + ret_val); + } + + ret_val = constantSwitch_AboveRange(); + if (ret_val != 15) { + throw new Error("Incorrect return value from constantSwitch_AboveRange:" + ret_val); + } + + ret_val = constantSwitch_BelowRange(); + if (ret_val != -5) { + throw new Error("Incorrect return value from constantSwitch_BelowRange:" + ret_val); + } + } +} diff --git a/test/497-inlining-and-class-loader/expected.txt b/test/497-inlining-and-class-loader/expected.txt index f5b9fe07de..905dbfd2cb 100644 --- a/test/497-inlining-and-class-loader/expected.txt +++ b/test/497-inlining-and-class-loader/expected.txt @@ -1,3 +1,4 @@ +JNI_OnLoad called java.lang.Exception at Main.$noinline$bar(Main.java:124) at Level2.$inline$bar(Level1.java:25) diff --git a/test/510-checker-try-catch/smali/Builder.smali b/test/510-checker-try-catch/smali/Builder.smali index 2274ba4d43..1fde5edc23 100644 --- a/test/510-checker-try-catch/smali/Builder.smali +++ b/test/510-checker-try-catch/smali/Builder.smali @@ -59,7 +59,7 @@ ## CHECK: StoreLocal [v0,<<Minus2>>] ## CHECK: name "<<BCatch3>>" -## CHECK: predecessors "<<BEnterTry1>>" "<<BExitTry1>>" "<<BEnterTry2>>" "<<BExitTry2>>" +## CHECK: predecessors "<<BEnterTry1>>" "<<BEnterTry2>>" "<<BExitTry1>>" "<<BExitTry2>>" ## CHECK: successors "<<BReturn>>" ## CHECK: flags "catch_block" ## CHECK: StoreLocal [v0,<<Minus3>>] @@ -70,18 +70,18 @@ ## CHECK: xhandlers "<<BCatch1>>" "<<BCatch3>>" ## CHECK: TryBoundary kind:entry -## CHECK: name "<<BExitTry1>>" -## CHECK: predecessors "<<BTry1>>" -## CHECK: successors "<<BAdd>>" -## CHECK: xhandlers "<<BCatch1>>" "<<BCatch3>>" -## CHECK: TryBoundary kind:exit - ## CHECK: name "<<BEnterTry2>>" ## CHECK: predecessors "<<BAdd>>" ## CHECK: successors "<<BTry2>>" ## CHECK: xhandlers "<<BCatch2>>" "<<BCatch3>>" ## CHECK: TryBoundary kind:entry +## CHECK: name "<<BExitTry1>>" +## CHECK: predecessors "<<BTry1>>" +## CHECK: successors "<<BAdd>>" +## CHECK: xhandlers "<<BCatch1>>" "<<BCatch3>>" +## CHECK: TryBoundary kind:exit + ## CHECK: name "<<BExitTry2>>" ## CHECK: predecessors "<<BTry2>>" ## CHECK: successors "<<BReturn>>" @@ -121,8 +121,7 @@ goto :return .end method -# Test that multiple try-entry blocks are generated if there are multiple entry -# points into the try block. +# Tests try-entry block when there are multiple entry points into the try block. ## CHECK-START: int Builder.testMultipleEntries(int, int, int, int) builder (after) @@ -142,20 +141,20 @@ ## CHECK: name "<<BTry1:B\d+>>" ## CHECK: predecessors "<<BEnterTry1>>" -## CHECK: successors "<<BTry2:B\d+>>" +## CHECK: successors "<<BExitTry1:B\d+>>" ## CHECK: Div -## CHECK: name "<<BTry2>>" -## CHECK: predecessors "<<BEnterTry2>>" "<<BTry1>>" -## CHECK: successors "<<BExitTry:B\d+>>" +## CHECK: name "<<BTry2:B\d+>>" +## CHECK: predecessors "<<BEnterTry2>>" +## CHECK: successors "<<BExitTry2:B\d+>>" ## CHECK: Div ## CHECK: name "<<BReturn:B\d+>>" -## CHECK: predecessors "<<BExitTry>>" "<<BCatch:B\d+>>" +## CHECK: predecessors "<<BExitTry2>>" "<<BCatch:B\d+>>" ## CHECK: Return ## CHECK: name "<<BCatch>>" -## CHECK: predecessors "<<BEnterTry1>>" "<<BEnterTry2>>" "<<BExitTry>>" +## CHECK: predecessors "<<BEnterTry1>>" "<<BEnterTry2>>" "<<BExitTry1>>" "<<BExitTry2>>" ## CHECK: successors "<<BReturn>>" ## CHECK: flags "catch_block" ## CHECK: StoreLocal [v0,<<Minus1>>] @@ -167,12 +166,18 @@ ## CHECK: TryBoundary kind:entry ## CHECK: name "<<BEnterTry2>>" -## CHECK: predecessors "<<BIf>>" +## CHECK: predecessors "<<BIf>>" "<<BExitTry1>>" ## CHECK: successors "<<BTry2>>" ## CHECK: xhandlers "<<BCatch>>" ## CHECK: TryBoundary kind:entry -## CHECK: name "<<BExitTry>>" +## CHECK: name "<<BExitTry1>>" +## CHECK: predecessors "<<BTry1>>" +## CHECK: successors "<<BEnterTry2>>" +## CHECK: xhandlers "<<BCatch>>" +## CHECK: TryBoundary kind:exit + +## CHECK: name "<<BExitTry2>>" ## CHECK: predecessors "<<BTry2>>" ## CHECK: successors "<<BReturn>>" ## CHECK: xhandlers "<<BCatch>>" @@ -314,18 +319,18 @@ ## CHECK: xhandlers "<<BCatch1>>" ## CHECK: TryBoundary kind:entry -## CHECK: name "<<BExit1>>" -## CHECK: predecessors "<<BTry1>>" -## CHECK: successors "<<BEnter2>>" -## CHECK: xhandlers "<<BCatch1>>" -## CHECK: TryBoundary kind:exit - ## CHECK: name "<<BEnter2>>" ## CHECK: predecessors "<<BExit1>>" ## CHECK: successors "<<BTry2>>" ## CHECK: xhandlers "<<BCatch2>>" ## CHECK: TryBoundary kind:entry +## CHECK: name "<<BExit1>>" +## CHECK: predecessors "<<BTry1>>" +## CHECK: successors "<<BEnter2>>" +## CHECK: xhandlers "<<BCatch1>>" +## CHECK: TryBoundary kind:exit + ## CHECK: name "<<BExit2>>" ## CHECK: predecessors "<<BTry2>>" ## CHECK: successors "<<BReturn>>" @@ -402,18 +407,18 @@ ## CHECK: xhandlers "<<BCatch1>>" ## CHECK: TryBoundary kind:entry -## CHECK: name "<<BExit1>>" -## CHECK: predecessors "<<BTry1>>" -## CHECK: successors "<<BReturn>>" -## CHECK: xhandlers "<<BCatch1>>" -## CHECK: TryBoundary kind:exit - ## CHECK: name "<<BEnter2>>" ## CHECK: predecessors "<<BGoto>>" ## CHECK: successors "<<BTry2>>" ## CHECK: xhandlers "<<BCatch2>>" ## CHECK: TryBoundary kind:entry +## CHECK: name "<<BExit1>>" +## CHECK: predecessors "<<BTry1>>" +## CHECK: successors "<<BReturn>>" +## CHECK: xhandlers "<<BCatch1>>" +## CHECK: TryBoundary kind:exit + ## CHECK: name "<<BExit2>>" ## CHECK: predecessors "<<BTry2>>" ## CHECK: successors "<<BEnter1>>" @@ -483,7 +488,7 @@ ## CHECK: StoreLocal [v0,<<Minus1>>] ## CHECK: name "<<BCatchAll>>" -## CHECK: predecessors "<<BEnter1>>" "<<BExit1>>" "<<BEnter2>>" "<<BExit2>>" "<<BEnter3>>" "<<BExit3>>" +## CHECK: predecessors "<<BEnter1>>" "<<BEnter2>>" "<<BEnter3>>" "<<BExit1>>" "<<BExit2>>" "<<BExit3>>" ## CHECK: successors "<<BReturn>>" ## CHECK: flags "catch_block" ## CHECK: StoreLocal [v0,<<Minus2>>] @@ -494,30 +499,30 @@ ## CHECK: xhandlers "<<BCatchAll>>" ## CHECK: TryBoundary kind:entry -## CHECK: name "<<BExit1>>" -## CHECK: predecessors "<<BTry1>>" -## CHECK: successors "<<BEnter2>>" -## CHECK: xhandlers "<<BCatchAll>>" -## CHECK: TryBoundary kind:exit - ## CHECK: name "<<BEnter2>>" ## CHECK: predecessors "<<BExit1>>" ## CHECK: successors "<<BTry2>>" ## CHECK: xhandlers "<<BCatchArith>>" "<<BCatchAll>>" ## CHECK: TryBoundary kind:entry -## CHECK: name "<<BExit2>>" -## CHECK: predecessors "<<BTry2>>" -## CHECK: successors "<<BEnter3>>" -## CHECK: xhandlers "<<BCatchArith>>" "<<BCatchAll>>" -## CHECK: TryBoundary kind:exit - ## CHECK: name "<<BEnter3>>" ## CHECK: predecessors "<<BExit2>>" ## CHECK: successors "<<BTry3>>" ## CHECK: xhandlers "<<BCatchAll>>" ## CHECK: TryBoundary kind:entry +## CHECK: name "<<BExit1>>" +## CHECK: predecessors "<<BTry1>>" +## CHECK: successors "<<BEnter2>>" +## CHECK: xhandlers "<<BCatchAll>>" +## CHECK: TryBoundary kind:exit + +## CHECK: name "<<BExit2>>" +## CHECK: predecessors "<<BTry2>>" +## CHECK: successors "<<BEnter3>>" +## CHECK: xhandlers "<<BCatchArith>>" "<<BCatchAll>>" +## CHECK: TryBoundary kind:exit + ## CHECK: name "<<BExit3>>" ## CHECK: predecessors "<<BTry3>>" ## CHECK: successors "<<BReturn>>" @@ -577,7 +582,7 @@ ## CHECK: Div ## CHECK: name "<<BCatch>>" -## CHECK: predecessors "<<BEnterTry1>>" "<<BExitTry1>>" "<<BEnterTry2>>" "<<BExitTry2>>" +## CHECK: predecessors "<<BEnterTry1>>" "<<BEnterTry2>>" "<<BExitTry1>>" "<<BExitTry2>>" ## CHECK: successors "<<BReturn>>" ## CHECK: flags "catch_block" ## CHECK: StoreLocal [v0,<<Minus1>>] @@ -588,18 +593,18 @@ ## CHECK: xhandlers "<<BCatch>>" ## CHECK: TryBoundary kind:entry -## CHECK: name "<<BExitTry1>>" -## CHECK: predecessors "<<BTry1>>" -## CHECK: successors "<<BOutside>>" -## CHECK: xhandlers "<<BCatch>>" -## CHECK: TryBoundary kind:exit - ## CHECK: name "<<BEnterTry2>>" ## CHECK: predecessors "<<BOutside>>" ## CHECK: successors "<<BTry2>>" ## CHECK: xhandlers "<<BCatch>>" ## CHECK: TryBoundary kind:entry +## CHECK: name "<<BExitTry1>>" +## CHECK: predecessors "<<BTry1>>" +## CHECK: successors "<<BOutside>>" +## CHECK: xhandlers "<<BCatch>>" +## CHECK: TryBoundary kind:exit + ## CHECK: name "<<BExitTry2>>" ## CHECK: predecessors "<<BTry2>>" ## CHECK: successors "<<BReturn>>" @@ -647,21 +652,21 @@ ## CHECK: name "<<BTry1:B\d+>>" ## CHECK: predecessors "<<BEnterTry1>>" -## CHECK: successors "<<BTry2:B\d+>>" +## CHECK: successors "<<BExitTry1:B\d+>>" ## CHECK: Div -## CHECK: name "<<BTry2>>" -## CHECK: predecessors "<<BEnterTry2>>" "<<BTry1>>" -## CHECK: successors "<<BExitTry:B\d+>>" +## CHECK: name "<<BTry2:B\d+>>" +## CHECK: predecessors "<<BEnterTry2>>" +## CHECK: successors "<<BExitTry2:B\d+>>" ## CHECK: Div ## CHECK: name "<<BOutside>>" -## CHECK: predecessors "<<BPSwitch1>>" "<<BExitTry>>" +## CHECK: predecessors "<<BPSwitch1>>" "<<BExitTry2>>" ## CHECK: successors "<<BCatchReturn:B\d+>>" ## CHECK: Div ## CHECK: name "<<BCatchReturn>>" -## CHECK: predecessors "<<BOutside>>" "<<BEnterTry1>>" "<<BEnterTry2>>" "<<BExitTry>>" +## CHECK: predecessors "<<BOutside>>" "<<BEnterTry1>>" "<<BEnterTry2>>" "<<BExitTry1>>" "<<BExitTry2>>" ## CHECK: flags "catch_block" ## CHECK: Return @@ -677,7 +682,13 @@ ## CHECK: xhandlers "<<BCatchReturn>>" ## CHECK: TryBoundary kind:entry -## CHECK: name "<<BExitTry>>" +## CHECK: name "<<BExitTry1>>" +## CHECK: predecessors "<<BTry1>>" +## CHECK: successors "<<BEnterTry2>>" +## CHECK: xhandlers "<<BCatchReturn>>" +## CHECK: TryBoundary kind:exit + +## CHECK: name "<<BExitTry2>>" ## CHECK: predecessors "<<BTry2>>" ## CHECK: successors "<<BOutside>>" ## CHECK: xhandlers "<<BCatchReturn>>" @@ -741,7 +752,7 @@ ## CHECK: Div ## CHECK: name "<<BCatchReturn>>" -## CHECK: predecessors "<<BOutside>>" "<<BEnterTry1>>" "<<BExitTry1>>" "<<BEnterTry2>>" "<<BExitTry2>>" +## CHECK: predecessors "<<BOutside>>" "<<BEnterTry1>>" "<<BEnterTry2>>" "<<BExitTry1>>" "<<BExitTry2>>" ## CHECK: flags "catch_block" ## CHECK: Return @@ -751,18 +762,18 @@ ## CHECK: xhandlers "<<BCatchReturn>>" ## CHECK: TryBoundary kind:entry -## CHECK: name "<<BExitTry1>>" -## CHECK: predecessors "<<BPSwitch0>>" -## CHECK: successors "<<BPSwitch1>>" -## CHECK: xhandlers "<<BCatchReturn>>" -## CHECK: TryBoundary kind:exit - ## CHECK: name "<<BEnterTry2>>" ## CHECK: predecessors "<<BPSwitch1>>" ## CHECK: successors "<<BTry1>>" ## CHECK: xhandlers "<<BCatchReturn>>" ## CHECK: TryBoundary kind:entry +## CHECK: name "<<BExitTry1>>" +## CHECK: predecessors "<<BPSwitch0>>" +## CHECK: successors "<<BPSwitch1>>" +## CHECK: xhandlers "<<BCatchReturn>>" +## CHECK: TryBoundary kind:exit + ## CHECK: name "<<BExitTry2>>" ## CHECK: predecessors "<<BTry2>>" ## CHECK: successors "<<BOutside>>" @@ -907,7 +918,7 @@ ## CHECK: Div ## CHECK: name "<<BCatch:B\d+>>" -## CHECK: predecessors "<<BExitTry1>>" "<<BEnterTry1>>" "<<BExitTry1>>" "<<BEnterTry2:B\d+>>" "<<BExitTry2:B\d+>>" +## CHECK: predecessors "<<BExitTry1>>" "<<BEnterTry1>>" "<<BEnterTry2:B\d+>>" "<<BExitTry1>>" "<<BExitTry2:B\d+>>" ## CHECK: successors "<<BEnterTry2>>" ## CHECK: flags "catch_block" @@ -928,18 +939,18 @@ ## CHECK: xhandlers "<<BCatch>>" ## CHECK: TryBoundary kind:entry -## CHECK: name "<<BExitTry1>>" -## CHECK: predecessors "<<BTry1>>" -## CHECK: successors "<<BCatch>>" -## CHECK: xhandlers "<<BCatch>>" -## CHECK: TryBoundary kind:exit - ## CHECK: name "<<BEnterTry2>>" ## CHECK: predecessors "<<BCatch>>" ## CHECK: successors "<<BTry2>>" ## CHECK: xhandlers "<<BCatch>>" ## CHECK: TryBoundary kind:entry +## CHECK: name "<<BExitTry1>>" +## CHECK: predecessors "<<BTry1>>" +## CHECK: successors "<<BCatch>>" +## CHECK: xhandlers "<<BCatch>>" +## CHECK: TryBoundary kind:exit + ## CHECK: name "<<BExitTry2>>" ## CHECK: predecessors "<<BTry2>>" ## CHECK: successors "<<BReturn>>" @@ -1001,18 +1012,18 @@ ## CHECK: xhandlers "<<BCatch2>>" ## CHECK: TryBoundary kind:entry -## CHECK: name "<<BExitTry1>>" -## CHECK: predecessors "<<BTry1>>" -## CHECK: successors "<<BCatch2>>" -## CHECK: xhandlers "<<BCatch2>>" -## CHECK: TryBoundary kind:exit - ## CHECK: name "<<BEnterTry2>>" ## CHECK: predecessors "<<BCatch2>>" ## CHECK: successors "<<BTry2>>" ## CHECK: xhandlers "<<BCatch1>>" ## CHECK: TryBoundary kind:entry +## CHECK: name "<<BExitTry1>>" +## CHECK: predecessors "<<BTry1>>" +## CHECK: successors "<<BCatch2>>" +## CHECK: xhandlers "<<BCatch2>>" +## CHECK: TryBoundary kind:exit + ## CHECK: name "<<BExitTry2>>" ## CHECK: predecessors "<<BTry2>>" ## CHECK: successors "<<BReturn>>" @@ -1037,6 +1048,52 @@ return p0 .end method +# Test graph with try/catch inside a loop. + +## CHECK-START: int Builder.testTryInLoop(int, int) builder (after) + +## CHECK: name "B0" +## CHECK: successors "<<BEnterTry:B\d+>>" + +## CHECK: name "<<BTry:B\d+>>" +## CHECK: predecessors "<<BEnterTry>>" +## CHECK: successors "<<BExitTry:B\d+>>" +## CHECK: Div + +## CHECK: name "<<BCatch:B\d+>>" +## CHECK: predecessors "<<BEnterTry>>" "<<BExitTry>>" +## CHECK: successors "<<BEnterTry>>" +## CHECK: flags "catch_block" + +## CHECK: name "<<BExit:B\d+>>" +## CHECK-NOT: predecessors "{{B\d+}}" +## CHECK: end_block + +## CHECK: name "<<BEnterTry>>" +## CHECK: predecessors "B0" +## CHECK: successors "<<BTry>>" +## CHECK: xhandlers "<<BCatch>>" +## CHECK: TryBoundary kind:entry + +## CHECK: name "<<BExitTry>>" +## CHECK: predecessors "<<BTry>>" +## CHECK: successors "<<BEnterTry>>" +## CHECK: xhandlers "<<BCatch>>" +## CHECK: TryBoundary kind:exit + +.method public static testTryInLoop(II)I + .registers 3 + + :try_start + div-int/2addr p0, p1 + goto :try_start + :try_end + .catchall {:try_start .. :try_end} :catch_all + + :catch_all + goto :try_start +.end method + # Test that a MOVE_RESULT instruction is placed into the same block as the # INVOKE it follows, even if there is a try boundary between them. diff --git a/test/526-checker-caller-callee-regs/src/Main.java b/test/526-checker-caller-callee-regs/src/Main.java index a1f33014ef..f402c2cd48 100644 --- a/test/526-checker-caller-callee-regs/src/Main.java +++ b/test/526-checker-caller-callee-regs/src/Main.java @@ -36,6 +36,8 @@ public class Main { // ------------------------------|------------------------|----------------- // ARM64 callee-saved registers | [x20-x29] | x2[0-9] // ARM callee-saved registers | [r5-r8,r10,r11] | r([5-8]|10|11) + // X86 callee-saved registers | [ebp,esi,edi] | e(bp|si|di) + // X86_64 callee-saved registers | [rbx,rbp,r12-15] | r(bx|bp|1[2-5]) /** * Check that a value live across a function call is allocated in a callee @@ -58,7 +60,21 @@ public class Main { /// CHECK: Sub [<<t1>>,<<t2>>] /// CHECK: Return - // TODO: Add tests for other architectures. + /// CHECK-START-X86: int Main.$opt$LiveInCall(int) register (after) + /// CHECK-DAG: <<Arg:i\d+>> ParameterValue + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 + /// CHECK: <<t1:i\d+>> Add [<<Arg>>,<<Const1>>] {{.*->e(bp|si|di)}} + /// CHECK: <<t2:i\d+>> InvokeStaticOrDirect + /// CHECK: Sub [<<t1>>,<<t2>>] + /// CHECK: Return + + /// CHECK-START-X86_64: int Main.$opt$LiveInCall(int) register (after) + /// CHECK-DAG: <<Arg:i\d+>> ParameterValue + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 + /// CHECK: <<t1:i\d+>> Add [<<Arg>>,<<Const1>>] {{.*->r(bx|bp|1[2-5])}} + /// CHECK: <<t2:i\d+>> InvokeStaticOrDirect + /// CHECK: Sub [<<t1>>,<<t2>>] + /// CHECK: Return public static int $opt$LiveInCall(int arg) { int t1 = arg + 1; diff --git a/test/529-checker-unresolved/expected.txt b/test/529-checker-unresolved/expected.txt index 358048c75b..1e7dbfed2e 100644 --- a/test/529-checker-unresolved/expected.txt +++ b/test/529-checker-unresolved/expected.txt @@ -3,3 +3,5 @@ UnresolvedClass.staticMethod() UnresolvedClass.virtualMethod() UnresolvedClass.interfaceMethod() UnresolvedClass.superMethod() +instanceof ok +checkcast ok diff --git a/test/529-checker-unresolved/src/Main.java b/test/529-checker-unresolved/src/Main.java index 6f047974b3..5219c04c37 100644 --- a/test/529-checker-unresolved/src/Main.java +++ b/test/529-checker-unresolved/src/Main.java @@ -44,13 +44,141 @@ public class Main extends UnresolvedSuperClass { super.superMethod(); } + /// CHECK-START: void Main.callUnresolvedStaticFieldAccess() register (before) + /// CHECK: UnresolvedStaticFieldSet field_type:PrimByte + /// CHECK: UnresolvedStaticFieldSet field_type:PrimChar + /// CHECK: UnresolvedStaticFieldSet field_type:PrimInt + /// CHECK: UnresolvedStaticFieldSet field_type:PrimLong + /// CHECK: UnresolvedStaticFieldSet field_type:PrimFloat + /// CHECK: UnresolvedStaticFieldSet field_type:PrimDouble + /// CHECK: UnresolvedStaticFieldSet field_type:PrimNot + + /// CHECK: UnresolvedStaticFieldGet field_type:PrimByte + /// CHECK: UnresolvedStaticFieldGet field_type:PrimChar + /// CHECK: UnresolvedStaticFieldGet field_type:PrimInt + /// CHECK: UnresolvedStaticFieldGet field_type:PrimLong + /// CHECK: UnresolvedStaticFieldGet field_type:PrimFloat + /// CHECK: UnresolvedStaticFieldGet field_type:PrimDouble + /// CHECK: UnresolvedStaticFieldGet field_type:PrimNot + static public void callUnresolvedStaticFieldAccess() { + Object o = new Object(); + UnresolvedClass.staticByte = (byte)1; + UnresolvedClass.staticChar = '1'; + UnresolvedClass.staticInt = 123456789; + UnresolvedClass.staticLong = 123456789123456789l; + UnresolvedClass.staticFloat = 123456789123456789f; + UnresolvedClass.staticDouble = 123456789123456789d; + UnresolvedClass.staticObject = o; + + expectEquals((byte)1, UnresolvedClass.staticByte); + expectEquals('1', UnresolvedClass.staticChar); + expectEquals(123456789, UnresolvedClass.staticInt); + expectEquals(123456789123456789l, UnresolvedClass.staticLong); + expectEquals(123456789123456789f, UnresolvedClass.staticFloat); + expectEquals(123456789123456789d, UnresolvedClass.staticDouble); + expectEquals(o, UnresolvedClass.staticObject); + } + + /// CHECK-START: void Main.callUnresolvedInstanceFieldAccess(UnresolvedClass) register (before) + /// CHECK: UnresolvedInstanceFieldSet field_type:PrimByte + /// CHECK: UnresolvedInstanceFieldSet field_type:PrimChar + /// CHECK: UnresolvedInstanceFieldSet field_type:PrimInt + /// CHECK: UnresolvedInstanceFieldSet field_type:PrimLong + /// CHECK: UnresolvedInstanceFieldSet field_type:PrimFloat + /// CHECK: UnresolvedInstanceFieldSet field_type:PrimDouble + /// CHECK: UnresolvedInstanceFieldSet field_type:PrimNot + + /// CHECK: UnresolvedInstanceFieldGet field_type:PrimByte + /// CHECK: UnresolvedInstanceFieldGet field_type:PrimChar + /// CHECK: UnresolvedInstanceFieldGet field_type:PrimInt + /// CHECK: UnresolvedInstanceFieldGet field_type:PrimLong + /// CHECK: UnresolvedInstanceFieldGet field_type:PrimFloat + /// CHECK: UnresolvedInstanceFieldGet field_type:PrimDouble + /// CHECK: UnresolvedInstanceFieldGet field_type:PrimNot + static public void callUnresolvedInstanceFieldAccess(UnresolvedClass c) { + Object o = new Object(); + c.instanceByte = (byte)1; + c.instanceChar = '1'; + c.instanceInt = 123456789; + c.instanceLong = 123456789123456789l; + c.instanceFloat = 123456789123456789f; + c.instanceDouble = 123456789123456789d; + c.instanceObject = o; + + expectEquals((byte)1, c.instanceByte); + expectEquals('1', c.instanceChar); + expectEquals(123456789, c.instanceInt); + expectEquals(123456789123456789l, c.instanceLong); + expectEquals(123456789123456789f, c.instanceFloat); + expectEquals(123456789123456789d, c.instanceDouble); + expectEquals(o, c.instanceObject); + } + + static public void testInstanceOf(Object o) { + if (o instanceof UnresolvedSuperClass) { + System.out.println("instanceof ok"); + } + } + + static public UnresolvedSuperClass testCheckCast(Object o) { + UnresolvedSuperClass c = (UnresolvedSuperClass) o; + System.out.println("checkcast ok"); + return c; + } /// CHECK-START: void Main.main(java.lang.String[]) register (before) /// CHECK: InvokeUnresolved invoke_type:direct static public void main(String[] args) { UnresolvedClass c = new UnresolvedClass(); + Main m = new Main(); callInvokeUnresolvedStatic(); callInvokeUnresolvedVirtual(c); callInvokeUnresolvedInterface(c); - callInvokeUnresolvedSuper(new Main()); + callInvokeUnresolvedSuper(m); + callUnresolvedStaticFieldAccess(); + callUnresolvedInstanceFieldAccess(c); + testInstanceOf(m); + testCheckCast(m); + } + + public static void expectEquals(byte expected, byte result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } + + public static void expectEquals(char expected, char result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } + + public static void expectEquals(int expected, int result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } + + public static void expectEquals(long expected, long result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } + + public static void expectEquals(float expected, float result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } + + public static void expectEquals(double expected, double result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } + + public static void expectEquals(Object expected, Object result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } } } diff --git a/test/529-checker-unresolved/src/Unresolved.java b/test/529-checker-unresolved/src/Unresolved.java index 5bf92dd331..20ac6e0b89 100644 --- a/test/529-checker-unresolved/src/Unresolved.java +++ b/test/529-checker-unresolved/src/Unresolved.java @@ -40,15 +40,21 @@ class UnresolvedClass extends UnresolvedSuperClass implements UnresolvedInterfac public void interfaceMethod() { System.out.println("UnresolvedClass.interfaceMethod()"); } -} - -final class UnresolvedFinalClass { - public void directMethod() { - System.out.println("UnresolvedFinalClass.directMethod()"); - } -} -class UnresolvedAtRuntime { - public void unresolvedAtRuntime() { } + public static byte staticByte; + public static char staticChar; + public static int staticInt; + public static long staticLong; + public static float staticFloat; + public static double staticDouble; + public static Object staticObject; + + public byte instanceByte; + public char instanceChar; + public int instanceInt; + public long instanceLong; + public float instanceFloat; + public double instanceDouble; + public Object instanceObject; } diff --git a/test/530-checker-loops/src/Main.java b/test/530-checker-loops/src/Main.java index e518a61f88..58c92f1ea4 100644 --- a/test/530-checker-loops/src/Main.java +++ b/test/530-checker-loops/src/Main.java @@ -22,7 +22,7 @@ public class Main { static int sResult; // - // Various sequence variables where bound checks can be removed from loop. + // Various sequence variables used in bound checks. // /// CHECK-START: int Main.linear(int[]) BCE (before) @@ -62,6 +62,19 @@ public class Main { return result; } + /// CHECK-START: int Main.linearVeryObscure(int[]) BCE (before) + /// CHECK-DAG: BoundsCheck + /// CHECK-START: int Main.linearVeryObscure(int[]) BCE (after) + /// CHECK-NOT: BoundsCheck + private static int linearVeryObscure(int[] x) { + int result = 0; + for (int i = 0; i < x.length; i++) { + int k = (-i) + (i << 5) + i - (32 * i) + 5 + (int) i; + result += x[k - 5]; + } + return result; + } + /// CHECK-START: int Main.linearWhile(int[]) BCE (before) /// CHECK-DAG: BoundsCheck /// CHECK-START: int Main.linearWhile(int[]) BCE (after) @@ -75,6 +88,42 @@ public class Main { return result; } + /// CHECK-START: int Main.linearThreeWayPhi(int[]) BCE (before) + /// CHECK-DAG: BoundsCheck + /// CHECK-START: int Main.linearThreeWayPhi(int[]) BCE (after) + /// CHECK-NOT: BoundsCheck + private static int linearThreeWayPhi(int[] x) { + int result = 0; + for (int i = 0; i < x.length; ) { + if (x[i] == 5) { + i++; + continue; + } + result += x[i++]; + } + return result; + } + + /// CHECK-START: int Main.linearFourWayPhi(int[]) BCE (before) + /// CHECK-DAG: BoundsCheck + /// CHECK-START: int Main.linearFourWayPhi(int[]) BCE (after) + /// CHECK-NOT: BoundsCheck + private static int linearFourWayPhi(int[] x) { + int result = 0; + for (int i = 0; i < x.length; ) { + if (x[i] == 5) { + i++; + continue; + } else if (x[i] == 6) { + i++; + result += 7; + continue; + } + result += x[i++]; + } + return result; + } + /// CHECK-START: int Main.wrapAroundThenLinear(int[]) BCE (before) /// CHECK-DAG: BoundsCheck /// CHECK-START: int Main.wrapAroundThenLinear(int[]) BCE (after) @@ -90,6 +139,25 @@ public class Main { return result; } + /// CHECK-START: int Main.wrapAroundThenLinearThreeWayPhi(int[]) BCE (before) + /// CHECK-DAG: BoundsCheck + /// CHECK-START: int Main.wrapAroundThenLinearThreeWayPhi(int[]) BCE (after) + /// CHECK-NOT: BoundsCheck + private static int wrapAroundThenLinearThreeWayPhi(int[] x) { + // Loop with wrap around (length - 1, 0, 1, 2, ..). + int w = x.length - 1; + int result = 0; + for (int i = 0; i < x.length; ) { + if (x[w] == 1) { + w = i++; + continue; + } + result += x[w]; + w = i++; + } + return result; + } + /// CHECK-START: int[] Main.linearWithParameter(int) BCE (before) /// CHECK-DAG: BoundsCheck /// CHECK-START: int[] Main.linearWithParameter(int) BCE (after) @@ -102,6 +170,19 @@ public class Main { return x; } + /// CHECK-START: int[] Main.linearCopy(int[]) BCE (before) + /// CHECK-DAG: BoundsCheck + /// CHECK-START: int[] Main.linearCopy(int[]) BCE (after) + /// CHECK-NOT: BoundsCheck + private static int[] linearCopy(int x[]) { + int n = x.length; + int y[] = new int[n]; + for (int i = 0; i < n; i++) { + y[i] = x[i]; + } + return y; + } + /// CHECK-START: int Main.linearWithCompoundStride() BCE (before) /// CHECK-DAG: BoundsCheck /// CHECK-START: int Main.linearWithCompoundStride() BCE (after) @@ -126,7 +207,7 @@ public class Main { int result = 0; int k = 0; // Range analysis has no problem with a trip-count defined by a - // reasonably large positive stride. + // reasonably large positive stride far away from upper bound. for (int i = 1; i <= 10 * 10000000 + 1; i += 10000000) { result += x[k++]; } @@ -143,7 +224,7 @@ public class Main { int k = 0; // Range analysis conservatively bails due to potential of wrap-around // arithmetic while computing the trip-count for this very large stride. - for (int i = 1; i < 2147483647; i += 195225786) { + for (int i = 1; i < Integer.MAX_VALUE; i += 195225786) { result += x[k++]; } return result; @@ -158,7 +239,7 @@ public class Main { int result = 0; int k = 0; // Range analysis has no problem with a trip-count defined by a - // reasonably large negative stride. + // reasonably large negative stride far away from lower bound. for (int i = -1; i >= -10 * 10000000 - 1; i -= 10000000) { result += x[k++]; } @@ -175,12 +256,80 @@ public class Main { int k = 0; // Range analysis conservatively bails due to potential of wrap-around // arithmetic while computing the trip-count for this very large stride. - for (int i = -2; i > -2147483648; i -= 195225786) { + for (int i = -2; i > Integer.MIN_VALUE; i -= 195225786) { result += x[k++]; } return result; } + /// CHECK-START: int Main.linearForNEUp() BCE (before) + /// CHECK-DAG: BoundsCheck + /// CHECK-START: int Main.linearForNEUp() BCE (after) + /// CHECK-NOT: BoundsCheck + private static int linearForNEUp() { + int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + int result = 0; + for (int i = 0; i != 10; i++) { + result += x[i]; + } + return result; + } + + /// CHECK-START: int Main.linearForNEDown() BCE (before) + /// CHECK-DAG: BoundsCheck + /// CHECK-START: int Main.linearForNEDown() BCE (after) + /// CHECK-NOT: BoundsCheck + private static int linearForNEDown() { + int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + int result = 0; + for (int i = 9; i != -1; i--) { + result += x[i]; + } + return result; + } + + /// CHECK-START: int Main.linearDoWhileUp() BCE (before) + /// CHECK-DAG: BoundsCheck + /// CHECK-START: int Main.linearDoWhileUp() BCE (after) + /// CHECK-NOT: BoundsCheck + private static int linearDoWhileUp() { + int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + int result = 0; + int i = 0; + do { + result += x[i++]; + } while (i < 10); + return result; + } + + /// CHECK-START: int Main.linearDoWhileDown() BCE (before) + /// CHECK-DAG: BoundsCheck + /// CHECK-START: int Main.linearDoWhileDown() BCE (after) + /// CHECK-NOT: BoundsCheck + private static int linearDoWhileDown() { + int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + int result = 0; + int i = 9; + do { + result += x[i--]; + } while (0 <= i); + return result; + } + + /// CHECK-START: int Main.linearShort() BCE (before) + /// CHECK-DAG: BoundsCheck + /// CHECK-START: int Main.linearShort() BCE (after) + /// CHECK-DAG: BoundsCheck + private static int linearShort() { + int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + int result = 0; + // TODO: make this work + for (short i = 0; i < 10; i++) { + result += x[i]; + } + return result; + } + /// CHECK-START: int Main.periodicIdiom(int) BCE (before) /// CHECK-DAG: BoundsCheck /// CHECK-START: int Main.periodicIdiom(int) BCE (after) @@ -242,23 +391,156 @@ public class Main { return result; } - // - // Cases that actually go out of bounds. These test cases - // ensure the exceptions are thrown at the right places. - // + /// CHECK-START: int Main.justRightUp1() BCE (before) + /// CHECK-DAG: BoundsCheck + /// CHECK-START: int Main.justRightUp1() BCE (after) + /// CHECK-NOT: BoundsCheck + private static int justRightUp1() { + int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + int result = 0; + for (int i = Integer.MAX_VALUE - 10, k = 0; i < Integer.MAX_VALUE; i++) { + result += x[k++]; + } + return result; + } + /// CHECK-START: int Main.justRightUp2() BCE (before) + /// CHECK-DAG: BoundsCheck + /// CHECK-START: int Main.justRightUp2() BCE (after) + /// CHECK-NOT: BoundsCheck + private static int justRightUp2() { + int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + int result = 0; + for (int i = Integer.MAX_VALUE - 10; i < Integer.MAX_VALUE; i++) { + result += x[i - Integer.MAX_VALUE + 10]; + } + return result; + } + + /// CHECK-START: int Main.justRightUp3() BCE (before) + /// CHECK-DAG: BoundsCheck + /// CHECK-START: int Main.justRightUp3() BCE (after) + /// CHECK-NOT: BoundsCheck + private static int justRightUp3() { + int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + int result = 0; + for (int i = Integer.MAX_VALUE - 10, k = 0; i <= Integer.MAX_VALUE - 1; i++) { + result += x[k++]; + } + return result; + } + + /// CHECK-START: int Main.justOOBUp() BCE (before) + /// CHECK-DAG: BoundsCheck + /// CHECK-START: int Main.justOOBUp() BCE (after) + /// CHECK-DAG: BoundsCheck + private static int justOOBUp() { + int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + int result = 0; + // Infinite loop! + for (int i = Integer.MAX_VALUE - 9, k = 0; i <= Integer.MAX_VALUE; i++) { + result += x[k++]; + } + return result; + } + + /// CHECK-START: int Main.justRightDown1() BCE (before) + /// CHECK-DAG: BoundsCheck + /// CHECK-START: int Main.justRightDown1() BCE (after) + /// CHECK-NOT: BoundsCheck + private static int justRightDown1() { + int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + int result = 0; + for (int i = Integer.MIN_VALUE + 10, k = 0; i > Integer.MIN_VALUE; i--) { + result += x[k++]; + } + return result; + } + + /// CHECK-START: int Main.justRightDown2() BCE (before) + /// CHECK-DAG: BoundsCheck + /// CHECK-START: int Main.justRightDown2() BCE (after) + /// CHECK-NOT: BoundsCheck + private static int justRightDown2() { + int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + int result = 0; + for (int i = Integer.MIN_VALUE + 10; i > Integer.MIN_VALUE; i--) { + result += x[Integer.MAX_VALUE + i]; + } + return result; + } + + /// CHECK-START: int Main.justRightDown3() BCE (before) + /// CHECK-DAG: BoundsCheck + /// CHECK-START: int Main.justRightDown3() BCE (after) + /// CHECK-NOT: BoundsCheck + private static int justRightDown3() { + int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + int result = 0; + for (int i = Integer.MIN_VALUE + 10, k = 0; i >= Integer.MIN_VALUE + 1; i--) { + result += x[k++]; + } + return result; + } + + /// CHECK-START: int Main.justOOBDown() BCE (before) + /// CHECK-DAG: BoundsCheck + /// CHECK-START: int Main.justOOBDown() BCE (after) + /// CHECK-DAG: BoundsCheck + private static int justOOBDown() { + int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + int result = 0; + // Infinite loop! + for (int i = Integer.MIN_VALUE + 9, k = 0; i >= Integer.MIN_VALUE; i--) { + result += x[k++]; + } + return result; + } + + /// CHECK-START: void Main.lowerOOB(int[]) BCE (before) + /// CHECK-DAG: BoundsCheck + /// CHECK-START: void Main.lowerOOB(int[]) BCE (after) + /// CHECK-DAG: BoundsCheck private static void lowerOOB(int[] x) { for (int i = -1; i < x.length; i++) { sResult += x[i]; } } + /// CHECK-START: void Main.upperOOB(int[]) BCE (before) + /// CHECK-DAG: BoundsCheck + /// CHECK-START: void Main.upperOOB(int[]) BCE (after) + /// CHECK-DAG: BoundsCheck private static void upperOOB(int[] x) { for (int i = 0; i <= x.length; i++) { sResult += x[i]; } } + /// CHECK-START: void Main.doWhileUpOOB() BCE (before) + /// CHECK-DAG: BoundsCheck + /// CHECK-START: void Main.doWhileUpOOB() BCE (after) + /// CHECK-DAG: BoundsCheck + private static void doWhileUpOOB() { + int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + int i = 0; + do { + sResult += x[i++]; + } while (i <= x.length); + } + + /// CHECK-START: void Main.doWhileDownOOB() BCE (before) + /// CHECK-DAG: BoundsCheck + /// CHECK-START: void Main.doWhileDownOOB() BCE (after) + /// CHECK-DAG: BoundsCheck + private static void doWhileDownOOB() { + int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + int i = x.length - 1; + do { + sResult += x[i--]; + } while (-1 <= i); + } + // // Verifier. // @@ -274,10 +556,18 @@ public class Main { expectEquals(55, linearDown(x)); expectEquals(0, linearObscure(empty)); expectEquals(55, linearObscure(x)); + expectEquals(0, linearVeryObscure(empty)); + expectEquals(55, linearVeryObscure(x)); expectEquals(0, linearWhile(empty)); expectEquals(55, linearWhile(x)); + expectEquals(0, linearThreeWayPhi(empty)); + expectEquals(50, linearThreeWayPhi(x)); + expectEquals(0, linearFourWayPhi(empty)); + expectEquals(51, linearFourWayPhi(x)); expectEquals(0, wrapAroundThenLinear(empty)); expectEquals(55, wrapAroundThenLinear(x)); + expectEquals(0, wrapAroundThenLinearThreeWayPhi(empty)); + expectEquals(54, wrapAroundThenLinearThreeWayPhi(x)); // Linear with parameter. sResult = 0; @@ -295,6 +585,16 @@ public class Main { } } + // Linear copy. + expectEquals(0, linearCopy(empty).length); + { + int[] r = linearCopy(x); + expectEquals(x.length, r.length); + for (int i = 0; i < x.length; i++) { + expectEquals(x[i], r[i]); + } + } + // Linear with non-unit strides. expectEquals(56, linearWithCompoundStride()); expectEquals(66, linearWithLargePositiveStride()); @@ -302,6 +602,13 @@ public class Main { expectEquals(66, linearWithLargeNegativeStride()); expectEquals(66, linearWithVeryLargeNegativeStride()); + // Special forms. + expectEquals(55, linearForNEUp()); + expectEquals(55, linearForNEDown()); + expectEquals(55, linearDoWhileUp()); + expectEquals(55, linearDoWhileDown()); + expectEquals(55, linearShort()); + // Periodic adds (1, 3), one at the time. expectEquals(0, periodicIdiom(-1)); for (int tc = 0; tc < 32; tc++) { @@ -326,6 +633,28 @@ public class Main { expectEquals(tc * 16, periodicSequence4(tc)); } + // Large bounds. + expectEquals(55, justRightUp1()); + expectEquals(55, justRightUp2()); + expectEquals(55, justRightUp3()); + expectEquals(55, justRightDown1()); + expectEquals(55, justRightDown2()); + expectEquals(55, justRightDown3()); + sResult = 0; + try { + justOOBUp(); + } catch (ArrayIndexOutOfBoundsException e) { + sResult = 1; + } + expectEquals(1, sResult); + sResult = 0; + try { + justOOBDown(); + } catch (ArrayIndexOutOfBoundsException e) { + sResult = 1; + } + expectEquals(1, sResult); + // Lower bound goes OOB. sResult = 0; try { @@ -344,6 +673,23 @@ public class Main { } expectEquals(1055, sResult); + // Do while up goes OOB. + sResult = 0; + try { + doWhileUpOOB(); + } catch (ArrayIndexOutOfBoundsException e) { + sResult += 1000; + } + expectEquals(1055, sResult); + + // Do while down goes OOB. + sResult = 0; + try { + doWhileDownOOB(); + } catch (ArrayIndexOutOfBoundsException e) { + sResult += 1000; + } + expectEquals(1055, sResult); } private static void expectEquals(int expected, int result) { diff --git a/test/532-checker-nonnull-arrayset/expected.txt b/test/532-checker-nonnull-arrayset/expected.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/test/532-checker-nonnull-arrayset/expected.txt diff --git a/test/532-checker-nonnull-arrayset/info.txt b/test/532-checker-nonnull-arrayset/info.txt new file mode 100644 index 0000000000..e1578c8f14 --- /dev/null +++ b/test/532-checker-nonnull-arrayset/info.txt @@ -0,0 +1 @@ +Test that we optimize ArraySet when the value is not null. diff --git a/test/532-checker-nonnull-arrayset/src/Main.java b/test/532-checker-nonnull-arrayset/src/Main.java new file mode 100644 index 0000000000..7d8fff46ba --- /dev/null +++ b/test/532-checker-nonnull-arrayset/src/Main.java @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class Main { + + // Check that we don't put a null check in the card marking code. + + /// CHECK-START: void Main.test() instruction_simplifier (before) + /// CHECK: ArraySet value_can_be_null:true + + /// CHECK-START: void Main.test() instruction_simplifier (after) + /// CHECK: ArraySet value_can_be_null:false + + /// CHECK-START-X86: void Main.test() disassembly (after) + /// CHECK: ArraySet value_can_be_null:false + /// CHECK-NOT: test + /// CHECK: ReturnVoid + public static void test() { + Object[] array = new Object[1]; + Object nonNull = array[0]; + nonNull.getClass(); // Ensure nonNull has an implicit null check. + array[0] = nonNull; + } + + public static void main(String[] args) {} +} diff --git a/test/533-regression-debugphi/expected.txt b/test/533-regression-debugphi/expected.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/test/533-regression-debugphi/expected.txt diff --git a/test/533-regression-debugphi/info.txt b/test/533-regression-debugphi/info.txt new file mode 100644 index 0000000000..a4d4857035 --- /dev/null +++ b/test/533-regression-debugphi/info.txt @@ -0,0 +1,2 @@ +Test a regression where DeadPhiHandling would infinitely loop over +complicated phi dependencies. diff --git a/test/533-regression-debugphi/smali/TestCase.smali b/test/533-regression-debugphi/smali/TestCase.smali new file mode 100644 index 0000000000..1908e72c57 --- /dev/null +++ b/test/533-regression-debugphi/smali/TestCase.smali @@ -0,0 +1,72 @@ +# Copyright (C) 2015 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +.class public LTestCase; +.super Ljava/lang/Object; + +# This is a reduced test case that used to trigger an infinite loop +# in the DeadPhiHandling phase of the optimizing compiler (only used +# with debuggable flag). +.method public static testCase(IILjava/lang/Object;)V + .registers 5 + const/4 v0, 0x0 + + :B4 + invoke-static {}, Ljava/lang/System;->nanoTime()J + goto :B7 + + :B7 + invoke-static {}, Ljava/lang/System;->nanoTime()J + if-nez p2, :Btmp + goto :B111 + + :Btmp + invoke-static {}, Ljava/lang/System;->nanoTime()J + if-nez p2, :B9 + goto :B110 + + :B13 + invoke-static {}, Ljava/lang/System;->nanoTime()J + add-int v0, p0, p1 + goto :B7 + + :B110 + invoke-static {}, Ljava/lang/System;->nanoTime()J + add-int v0, p0, p1 + goto :B111 + + :B111 + invoke-static {}, Ljava/lang/System;->nanoTime()J + goto :B4 + + :B9 + invoke-static {}, Ljava/lang/System;->nanoTime()J + if-nez p2, :B10 + + :B11 + invoke-static {}, Ljava/lang/System;->nanoTime()J + move v1, v0 + goto :B12 + + :B10 + invoke-static {}, Ljava/lang/System;->nanoTime()J + move-object v1, p2 + goto :B12 + + :B12 + invoke-static {}, Ljava/lang/System;->nanoTime()J + goto :B13 + + return-void +.end method diff --git a/test/533-regression-debugphi/src/Main.java b/test/533-regression-debugphi/src/Main.java new file mode 100644 index 0000000000..858770f508 --- /dev/null +++ b/test/533-regression-debugphi/src/Main.java @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class Main { + // Workaround for b/18051191. + class InnerClass {} + + public static void main(String[] args) {} +} diff --git a/test/534-checker-bce-deoptimization/expected.txt b/test/534-checker-bce-deoptimization/expected.txt new file mode 100644 index 0000000000..3823a29f3f --- /dev/null +++ b/test/534-checker-bce-deoptimization/expected.txt @@ -0,0 +1 @@ +finish diff --git a/test/534-checker-bce-deoptimization/info.txt b/test/534-checker-bce-deoptimization/info.txt new file mode 100644 index 0000000000..9f097d0ac7 --- /dev/null +++ b/test/534-checker-bce-deoptimization/info.txt @@ -0,0 +1,8 @@ +Checker test for testing the behavior of deoptimization generated by +bounds check elimination. + +The runtime used to trip on that test because it used to deopt the +whole stack, and the compiler was not preserving dex registers at +call sites. + +We fixed the bug by doing single frame deoptimization. diff --git a/test/534-checker-bce-deoptimization/src/Main.java b/test/534-checker-bce-deoptimization/src/Main.java new file mode 100644 index 0000000000..8cd20f677a --- /dev/null +++ b/test/534-checker-bce-deoptimization/src/Main.java @@ -0,0 +1,57 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class Main { + public static void main(String[] args) { + new Main().run(); + System.out.println("finish"); + } + + public void run() { + double a[][] = new double[200][201]; + double b[] = new double[200]; + int n = 100; + + foo1(a, n, b); + } + + void foo1(double a[][], int n, double b[]) { + double t; + int i,k; + + for (i = 0; i < n; i++) { + k = n - (i + 1); + b[k] /= a[k][k]; + t = -b[k]; + foo2(k + 1000, t, b); + } + } + + void foo2(int n, double c, double b[]) { + try { + foo3(n, c, b); + } catch (Exception e) { + } + } + + void foo3(int n, double c, double b[]) { + int i = 0; + for (i = 0; i < n; i++) { + b[i + 1] += c * b[i + 1]; + } + } +} + diff --git a/test/535-deopt-and-inlining/expected.txt b/test/535-deopt-and-inlining/expected.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/test/535-deopt-and-inlining/expected.txt diff --git a/test/535-deopt-and-inlining/info.txt b/test/535-deopt-and-inlining/info.txt new file mode 100644 index 0000000000..717612a1ad --- /dev/null +++ b/test/535-deopt-and-inlining/info.txt @@ -0,0 +1,2 @@ +Stress test for deoptimization and JIT, to ensure the +stack visitor uses the right ArtMethod when deopting. diff --git a/test/535-deopt-and-inlining/src/Main.java b/test/535-deopt-and-inlining/src/Main.java new file mode 100644 index 0000000000..c231bf0e87 --- /dev/null +++ b/test/535-deopt-and-inlining/src/Main.java @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class Main { + + public static void run() { + // Loop enough to get JIT compilation. + for (int i = 0; i < 10000; ++i) { + doCall(new int[0]); + } + } + + public static void main(String[] args) throws Exception { + run(); + } + + public static void doCall(int[] array) { + try { + deopt(array); + } catch (IndexOutOfBoundsException ioobe) { + // Expected + } + } + + public static void deopt(int[] array) { + // Invoke `deopt` much more than `$inline$deopt` so that only `deopt` gets + // initially JITted. + if (call == 100) { + call = 0; + $inline$deopt(array); + } else { + call++; + } + } + + public static void $inline$deopt(int[] array) { + array[0] = 1; + array[1] = 1; + } + + static int call = 0; +} diff --git a/test/535-regression-const-val/expected.txt b/test/535-regression-const-val/expected.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/test/535-regression-const-val/expected.txt diff --git a/test/535-regression-const-val/info.txt b/test/535-regression-const-val/info.txt new file mode 100644 index 0000000000..ea3e67b79c --- /dev/null +++ b/test/535-regression-const-val/info.txt @@ -0,0 +1,2 @@ +Test a regression where SsaChecker would fail comparing raw value of IntConstant +vs FloatConstant due to a static_cast sign extend. diff --git a/test/535-regression-const-val/smali/TestCase.smali b/test/535-regression-const-val/smali/TestCase.smali new file mode 100644 index 0000000000..f42f1738b5 --- /dev/null +++ b/test/535-regression-const-val/smali/TestCase.smali @@ -0,0 +1,36 @@ +# Copyright (C) 2015 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +.class public LTestCase; +.super Ljava/lang/Object; + +.method public static testCase(ZZ)I + .registers 5 + + # Create Phi [ 0.0f, -0.25f ]. + # Binary representation of -0.25f has the most significant bit set. + if-eqz p0, :else + :then + const v0, 0x0 + goto :merge + :else + const/high16 v0, 0xbe800000 + :merge + + # Now use as either float or int. + if-eqz p1, :return + float-to-int v0, v0 + :return + return v0 +.end method diff --git a/test/535-regression-const-val/src/Main.java b/test/535-regression-const-val/src/Main.java new file mode 100644 index 0000000000..858770f508 --- /dev/null +++ b/test/535-regression-const-val/src/Main.java @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class Main { + // Workaround for b/18051191. + class InnerClass {} + + public static void main(String[] args) {} +} diff --git a/test/800-smali/expected.txt b/test/800-smali/expected.txt index 6568eac29f..17c1f00c41 100644 --- a/test/800-smali/expected.txt +++ b/test/800-smali/expected.txt @@ -1,4 +1,6 @@ PackedSwitch +PackedSwitch key INT_MAX +PackedSwitch key overflow b/17790197 FloatBadArgReg negLong diff --git a/test/800-smali/smali/PackedSwitch.smali b/test/800-smali/smali/PackedSwitch.smali index 6a3e5f00ba..95659fb16f 100644 --- a/test/800-smali/smali/PackedSwitch.smali +++ b/test/800-smali/smali/PackedSwitch.smali @@ -24,3 +24,29 @@ goto :return .end method + +.method public static packedSwitch_INT_MAX(I)I + .registers 2 + + const/4 v0, 0 + packed-switch v0, :switch_data + goto :default + + :switch_data + .packed-switch 0x7FFFFFFE + :case1 # key = INT_MAX - 1 + :case2 # key = INT_MAX + .end packed-switch + + :return + return v1 + + :default + goto :return + + :case1 + goto :return + :case2 + goto :return + +.end method diff --git a/test/800-smali/smali/b_24399945.smali b/test/800-smali/smali/b_24399945.smali new file mode 100644 index 0000000000..68f59d0387 --- /dev/null +++ b/test/800-smali/smali/b_24399945.smali @@ -0,0 +1,32 @@ +.class public Lb_24399945; + +.super Ljava/lang/Object; + +.method public static packedSwitch_overflow(I)I + .registers 2 + + const/4 v0, 0 + packed-switch v0, :switch_data + goto :default + + :switch_data + .packed-switch 0x7FFFFFFE + :case1 # key = INT_MAX - 1 + :case2 # key = INT_MAX + :case3 # key = INT_MIN (overflow!) + .end packed-switch + + :return + return v1 + + :default + goto :return + + :case1 + goto :return + :case2 + goto :return + :case3 + goto :return + +.end method diff --git a/test/800-smali/src/Main.java b/test/800-smali/src/Main.java index ba4990a76e..f75747d5c5 100644 --- a/test/800-smali/src/Main.java +++ b/test/800-smali/src/Main.java @@ -51,6 +51,10 @@ public class Main { testCases = new LinkedList<TestCase>(); testCases.add(new TestCase("PackedSwitch", "PackedSwitch", "packedSwitch", new Object[]{123}, null, 123)); + testCases.add(new TestCase("PackedSwitch key INT_MAX", "PackedSwitch", + "packedSwitch_INT_MAX", new Object[]{123}, null, 123)); + testCases.add(new TestCase("PackedSwitch key overflow", "b_24399945", + "packedSwitch_overflow", new Object[]{123}, new VerifyError(), null)); testCases.add(new TestCase("b/17790197", "B17790197", "getInt", null, null, 100)); testCases.add(new TestCase("FloatBadArgReg", "FloatBadArgReg", "getInt", diff --git a/test/955-lambda-smali/expected.txt b/test/955-lambda-smali/expected.txt index 36370998f4..16381e4b46 100644 --- a/test/955-lambda-smali/expected.txt +++ b/test/955-lambda-smali/expected.txt @@ -16,3 +16,13 @@ Caught NPE (MoveResult) testF success (MoveResult) testD success (MoveResult) testL success +(CaptureVariables) (0-args, 1 captured variable 'Z'): value is true +(CaptureVariables) (0-args, 1 captured variable 'B'): value is R +(CaptureVariables) (0-args, 1 captured variable 'C'): value is ∂ +(CaptureVariables) (0-args, 1 captured variable 'S'): value is 1000 +(CaptureVariables) (0-args, 1 captured variable 'I'): value is 12345678 +(CaptureVariables) (0-args, 1 captured variable 'J'): value is 3287471278325742 +(CaptureVariables) (0-args, 1 captured variable 'F'): value is Infinity +(CaptureVariables) (0-args, 1 captured variable 'D'): value is -Infinity +(CaptureVariables) (0-args, 8 captured variable 'ZBCSIJFD'): value is true,R,∂,1000,12345678,3287471278325742,Infinity,-Infinity +(CaptureVariables) Caught NPE diff --git a/test/955-lambda-smali/smali/BoxUnbox.smali b/test/955-lambda-smali/smali/BoxUnbox.smali index 108b5fafbc..915de2d55d 100644 --- a/test/955-lambda-smali/smali/BoxUnbox.smali +++ b/test/955-lambda-smali/smali/BoxUnbox.smali @@ -1,4 +1,3 @@ -# # Copyright (C) 2015 The Android Open Source Project # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -36,8 +35,8 @@ .end method #TODO: should use a closure type instead of ArtMethod. -.method public static doHelloWorld(Ljava/lang/reflect/ArtMethod;)V - .registers 3 # 1 parameters, 2 locals +.method public static doHelloWorld(J)V + .registers 4 # 1 wide parameters, 2 locals const-string v0, "(BoxUnbox) Hello boxing world! (0-args, no closure)" @@ -51,9 +50,9 @@ .method private static testBox()V .registers 3 - create-lambda v0, LBoxUnbox;->doHelloWorld(Ljava/lang/reflect/ArtMethod;)V + create-lambda v0, LBoxUnbox;->doHelloWorld(J)V box-lambda v2, v0 # v2 = box(v0) - unbox-lambda v0, v2, Ljava/lang/reflect/ArtMethod; # v0 = unbox(v2) + unbox-lambda v0, v2, J # v0 = unbox(v2) invoke-lambda v0, {} return-void @@ -63,7 +62,7 @@ .method private static testBoxEquality()V .registers 6 # 0 parameters, 6 locals - create-lambda v0, LBoxUnbox;->doHelloWorld(Ljava/lang/reflect/ArtMethod;)V + create-lambda v0, LBoxUnbox;->doHelloWorld(J)V box-lambda v2, v0 # v2 = box(v0) box-lambda v3, v0 # v3 = box(v0) @@ -95,7 +94,7 @@ const v0, 0 # v0 = null const v1, 0 # v1 = null :start - unbox-lambda v2, v0, Ljava/lang/reflect/ArtMethod; + unbox-lambda v2, v0, J # attempting to unbox a null lambda will throw NPE :end return-void @@ -140,7 +139,7 @@ const-string v0, "This is not a boxed lambda" :start # TODO: use \FunctionalType; here instead - unbox-lambda v2, v0, Ljava/lang/reflect/ArtMethod; + unbox-lambda v2, v0, J # can't use a string, expects a lambda object here. throws ClassCastException. :end return-void diff --git a/test/955-lambda-smali/smali/CaptureVariables.smali b/test/955-lambda-smali/smali/CaptureVariables.smali new file mode 100644 index 0000000000..f18b7ff741 --- /dev/null +++ b/test/955-lambda-smali/smali/CaptureVariables.smali @@ -0,0 +1,311 @@ +# +# Copyright (C) 2015 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +.class public LCaptureVariables; +.super Ljava/lang/Object; + +.method public constructor <init>()V +.registers 1 + invoke-direct {p0}, Ljava/lang/Object;-><init>()V + return-void +.end method + +.method public static run()V +.registers 8 + # Test boolean capture + const v2, 1 # v2 = true + capture-variable v2, "Z" + create-lambda v0, LCaptureVariables;->printCapturedVariable_Z(J)V + # TODO: create-lambda should not write to both v0 and v1 + invoke-lambda v0, {} + + # Test byte capture + const v2, 82 # v2 = 82, 'R' + capture-variable v2, "B" + create-lambda v0, LCaptureVariables;->printCapturedVariable_B(J)V + # TODO: create-lambda should not write to both v0 and v1 + invoke-lambda v0, {} + + # Test char capture + const v2, 0x2202 # v2 = 0x2202, '∂' + capture-variable v2, "C" + create-lambda v0, LCaptureVariables;->printCapturedVariable_C(J)V + # TODO: create-lambda should not write to both v0 and v1 + invoke-lambda v0, {} + + # Test short capture + const v2, 1000 # v2 = 1000 + capture-variable v2, "S" + create-lambda v0, LCaptureVariables;->printCapturedVariable_S(J)V + # TODO: create-lambda should not write to both v0 and v1 + invoke-lambda v0, {} + + # Test int capture + const v2, 12345678 + capture-variable v2, "I" + create-lambda v0, LCaptureVariables;->printCapturedVariable_I(J)V + # TODO: create-lambda should not write to both v0 and v1 + invoke-lambda v0, {} + + # Test long capture + const-wide v2, 0x0badf00dc0ffeeL # v2 = 3287471278325742 + capture-variable v2, "J" + create-lambda v0, LCaptureVariables;->printCapturedVariable_J(J)V + # TODO: create-lambda should not write to both v0 and v1 + invoke-lambda v0, {} + + # Test float capture + const v2, infinityf + capture-variable v2, "F" + create-lambda v0, LCaptureVariables;->printCapturedVariable_F(J)V + # TODO: create-lambda should not write to both v0 and v1 + invoke-lambda v0, {} + + # Test double capture + const-wide v2, -infinity + capture-variable v2, "D" + create-lambda v0, LCaptureVariables;->printCapturedVariable_D(J)V + # TODO: create-lambda should not write to both v0 and v1 + invoke-lambda v0, {} + + #TODO: capture objects and lambdas once we have support for it + + # Test capturing multiple variables + invoke-static {}, LCaptureVariables;->testMultipleCaptures()V + + # Test failures + invoke-static {}, LCaptureVariables;->testFailures()V + + return-void +.end method + +#TODO: should use a closure type instead of a long +.method public static printCapturedVariable_Z(J)V + .registers 5 # 1 wide parameter, 3 locals + + const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'Z'): value is " + + sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream; + invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V + + liberate-variable v2, p0, "Z" + invoke-virtual {v1, v2}, Ljava/io/PrintStream;->println(Z)V + + return-void +.end method + +#TODO: should use a closure type instead of a long +.method public static printCapturedVariable_B(J)V + .registers 5 # 1 wide parameter, 3 locals + + const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'B'): value is " + + sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream; + invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V + + liberate-variable v2, p0, "B" + invoke-virtual {v1, v2}, Ljava/io/PrintStream;->println(C)V # no println(B), use char instead. + + return-void +.end method + +#TODO: should use a closure type instead of a long +.method public static printCapturedVariable_C(J)V + .registers 5 # 1 wide parameter, 3 locals + + const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'C'): value is " + + sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream; + invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V + + liberate-variable v2, p0, "C" + invoke-virtual {v1, v2}, Ljava/io/PrintStream;->println(C)V + + return-void +.end method + +#TODO: should use a closure type instead of a long +.method public static printCapturedVariable_S(J)V + .registers 5 # 1 wide parameter, 3 locals + + const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'S'): value is " + + sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream; + invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V + + liberate-variable v2, p0, "S" + invoke-virtual {v1, v2}, Ljava/io/PrintStream;->println(I)V # no println(S), use int instead + + return-void +.end method + +#TODO: should use a closure type instead of a long +.method public static printCapturedVariable_I(J)V + .registers 5 # 1 wide parameter, 3 locals + + const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'I'): value is " + + sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream; + invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V + + liberate-variable v2, p0, "I" + invoke-virtual {v1, v2}, Ljava/io/PrintStream;->println(I)V + + return-void +.end method + +#TODO: should use a closure type instead of a long +.method public static printCapturedVariable_J(J)V + .registers 6 # 1 wide parameter, 4 locals + + const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'J'): value is " + + sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream; + invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V + + liberate-variable v2, p0, "J" + invoke-virtual {v1, v2, v3}, Ljava/io/PrintStream;->println(J)V + + return-void +.end method + +#TODO: should use a closure type instead of a long +.method public static printCapturedVariable_F(J)V + .registers 5 # 1 parameter, 4 locals + + const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'F'): value is " + + sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream; + invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V + + liberate-variable v2, p0, "F" + invoke-virtual {v1, v2}, Ljava/io/PrintStream;->println(F)V + + return-void +.end method + +#TODO: should use a closure type instead of a long +.method public static printCapturedVariable_D(J)V + .registers 6 # 1 wide parameter, 4 locals + + const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'D'): value is " + + sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream; + invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V + + liberate-variable v2, p0, "D" + invoke-virtual {v1, v2, v3}, Ljava/io/PrintStream;->println(D)V + + return-void +.end method + +# Test capturing more than one variable. +.method private static testMultipleCaptures()V + .registers 4 # 0 parameters, 4 locals + + const v2, 1 # v2 = true + capture-variable v2, "Z" + + const v2, 82 # v2 = 82, 'R' + capture-variable v2, "B" + + const v2, 0x2202 # v2 = 0x2202, '∂' + capture-variable v2, "C" + + const v2, 1000 # v2 = 1000 + capture-variable v2, "S" + + const v2, 12345678 + capture-variable v2, "I" + + const-wide v2, 0x0badf00dc0ffeeL # v2 = 3287471278325742 + capture-variable v2, "J" + + const v2, infinityf + capture-variable v2, "F" + + const-wide v2, -infinity + capture-variable v2, "D" + + create-lambda v0, LCaptureVariables;->printCapturedVariable_ZBCSIJFD(J)V + # TODO: create-lambda should not write to both v0 and v1 + invoke-lambda v0, {} + +.end method + +#TODO: should use a closure type instead of a long +.method public static printCapturedVariable_ZBCSIJFD(J)V + .registers 7 # 1 wide parameter, 5 locals + + const-string v0, "(CaptureVariables) (0-args, 8 captured variable 'ZBCSIJFD'): value is " + const-string v4, "," + + sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream; + invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V + + liberate-variable v2, p0, "Z" + invoke-virtual {v1, v2}, Ljava/io/PrintStream;->print(Z)V + invoke-virtual {v1, v4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V + + liberate-variable v2, p0, "B" + invoke-virtual {v1, v2}, Ljava/io/PrintStream;->print(C)V + invoke-virtual {v1, v4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V + + liberate-variable v2, p0, "C" + invoke-virtual {v1, v2}, Ljava/io/PrintStream;->print(C)V + invoke-virtual {v1, v4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V + + liberate-variable v2, p0, "S" + invoke-virtual {v1, v2}, Ljava/io/PrintStream;->print(I)V + invoke-virtual {v1, v4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V + + liberate-variable v2, p0, "I" + invoke-virtual {v1, v2}, Ljava/io/PrintStream;->print(I)V + invoke-virtual {v1, v4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V + + liberate-variable v2, p0, "J" + invoke-virtual {v1, v2, v3}, Ljava/io/PrintStream;->print(J)V + invoke-virtual {v1, v4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V + + liberate-variable v2, p0, "F" + invoke-virtual {v1, v2}, Ljava/io/PrintStream;->print(F)V + invoke-virtual {v1, v4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V + + liberate-variable v2, p0, "D" + invoke-virtual {v1, v2, v3}, Ljava/io/PrintStream;->println(D)V + + return-void +.end method + +# Test exceptions are thrown as expected when used opcodes incorrectly +.method private static testFailures()V + .registers 4 # 0 parameters, 4 locals + + const v0, 0 # v0 = null + const v1, 0 # v1 = null +:start + liberate-variable v0, v2, "Z" # invoking a null lambda shall raise an NPE +:end + return-void + +:handler + const-string v2, "(CaptureVariables) Caught NPE" + sget-object v3, Ljava/lang/System;->out:Ljava/io/PrintStream; + invoke-virtual {v3, v2}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V + + return-void + + .catch Ljava/lang/NullPointerException; {:start .. :end} :handler +.end method diff --git a/test/955-lambda-smali/smali/Main.smali b/test/955-lambda-smali/smali/Main.smali index 5d2aabb386..9892d6124e 100644 --- a/test/955-lambda-smali/smali/Main.smali +++ b/test/955-lambda-smali/smali/Main.smali @@ -24,6 +24,7 @@ invoke-static {}, LTrivialHelloWorld;->run()V invoke-static {}, LBoxUnbox;->run()V invoke-static {}, LMoveResult;->run()V + invoke-static {}, LCaptureVariables;->run()V # TODO: add tests when verification fails diff --git a/test/955-lambda-smali/smali/MoveResult.smali b/test/955-lambda-smali/smali/MoveResult.smali index 1725da3044..52f7ba363b 100644 --- a/test/955-lambda-smali/smali/MoveResult.smali +++ b/test/955-lambda-smali/smali/MoveResult.smali @@ -41,7 +41,7 @@ .method public static testZ()V .registers 6 - create-lambda v0, LMoveResult;->lambdaZ(Ljava/lang/reflect/ArtMethod;)Z + create-lambda v0, LMoveResult;->lambdaZ(J)Z invoke-lambda v0, {} move-result v2 const v3, 1 @@ -61,7 +61,7 @@ .end method # Lambda target for testZ. Always returns "true". -.method public static lambdaZ(Ljava/lang/reflect/ArtMethod;)Z +.method public static lambdaZ(J)Z .registers 3 const v0, 1 @@ -73,7 +73,7 @@ .method public static testB()V .registers 6 - create-lambda v0, LMoveResult;->lambdaB(Ljava/lang/reflect/ArtMethod;)B + create-lambda v0, LMoveResult;->lambdaB(J)B invoke-lambda v0, {} move-result v2 const v3, 15 @@ -93,7 +93,7 @@ .end method # Lambda target for testB. Always returns "15". -.method public static lambdaB(Ljava/lang/reflect/ArtMethod;)B +.method public static lambdaB(J)B .registers 3 # 1 parameters, 2 locals const v0, 15 @@ -105,7 +105,7 @@ .method public static testS()V .registers 6 - create-lambda v0, LMoveResult;->lambdaS(Ljava/lang/reflect/ArtMethod;)S + create-lambda v0, LMoveResult;->lambdaS(J)S invoke-lambda v0, {} move-result v2 const/16 v3, 31000 @@ -125,7 +125,7 @@ .end method # Lambda target for testS. Always returns "31000". -.method public static lambdaS(Ljava/lang/reflect/ArtMethod;)S +.method public static lambdaS(J)S .registers 3 const/16 v0, 31000 @@ -137,7 +137,7 @@ .method public static testI()V .registers 6 - create-lambda v0, LMoveResult;->lambdaI(Ljava/lang/reflect/ArtMethod;)I + create-lambda v0, LMoveResult;->lambdaI(J)I invoke-lambda v0, {} move-result v2 const v3, 128000 @@ -157,7 +157,7 @@ .end method # Lambda target for testI. Always returns "128000". -.method public static lambdaI(Ljava/lang/reflect/ArtMethod;)I +.method public static lambdaI(J)I .registers 3 const v0, 128000 @@ -167,9 +167,9 @@ # Test that chars are returned correctly via move-result. .method public static testC()V - .registers 6 + .registers 7 - create-lambda v0, LMoveResult;->lambdaC(Ljava/lang/reflect/ArtMethod;)C + create-lambda v0, LMoveResult;->lambdaC(J)C invoke-lambda v0, {} move-result v2 const v3, 65535 @@ -189,7 +189,7 @@ .end method # Lambda target for testC. Always returns "65535". -.method public static lambdaC(Ljava/lang/reflect/ArtMethod;)C +.method public static lambdaC(J)C .registers 3 const v0, 65535 @@ -199,12 +199,12 @@ # Test that longs are returned correctly via move-result. .method public static testJ()V - .registers 8 + .registers 9 - create-lambda v0, LMoveResult;->lambdaJ(Ljava/lang/reflect/ArtMethod;)J + create-lambda v0, LMoveResult;->lambdaJ(J)J invoke-lambda v0, {} move-result v2 - const-wide v4, 0xdeadf00dc0ffee + const-wide v4, 0xdeadf00dc0ffeeL if-ne v4, v2, :is_not_equal const-string v6, "(MoveResult) testJ success" @@ -220,11 +220,11 @@ .end method -# Lambda target for testC. Always returns "0xdeadf00dc0ffee". -.method public static lambdaJ(Ljava/lang/reflect/ArtMethod;)J - .registers 4 +# Lambda target for testC. Always returns "0xdeadf00dc0ffeeL". +.method public static lambdaJ(J)J + .registers 5 - const-wide v0, 0xdeadf00dc0ffee + const-wide v0, 0xdeadf00dc0ffeeL return-wide v0 .end method @@ -233,7 +233,7 @@ .method public static testF()V .registers 6 - create-lambda v0, LMoveResult;->lambdaF(Ljava/lang/reflect/ArtMethod;)F + create-lambda v0, LMoveResult;->lambdaF(J)F invoke-lambda v0, {} move-result v2 const v3, infinityf @@ -253,8 +253,8 @@ .end method # Lambda target for testF. Always returns "infinityf". -.method public static lambdaF(Ljava/lang/reflect/ArtMethod;)F - .registers 3 +.method public static lambdaF(J)F + .registers 4 const v0, infinityf return v0 @@ -265,10 +265,10 @@ .method public static testD()V .registers 8 - create-lambda v0, LMoveResult;->lambdaD(Ljava/lang/reflect/ArtMethod;)D + create-lambda v0, LMoveResult;->lambdaD(J)D invoke-lambda v0, {} move-result-wide v2 - const-wide v4, infinity + const-wide v4, -infinity if-ne v4, v2, :is_not_equal const-string v6, "(MoveResult) testD success" @@ -285,10 +285,10 @@ .end method # Lambda target for testD. Always returns "infinity". -.method public static lambdaD(Ljava/lang/reflect/ArtMethod;)D - .registers 4 +.method public static lambdaD(J)D + .registers 5 - const-wide v0, infinity # 123.456789 + const-wide v0, -infinity return-wide v0 .end method @@ -298,7 +298,7 @@ .method public static testL()V .registers 8 - create-lambda v0, LMoveResult;->lambdaL(Ljava/lang/reflect/ArtMethod;)Ljava/lang/String; + create-lambda v0, LMoveResult;->lambdaL(J)Ljava/lang/String; invoke-lambda v0, {} move-result-object v2 const-string v4, "Interned string" @@ -319,8 +319,8 @@ .end method # Lambda target for testL. Always returns "Interned string" (string). -.method public static lambdaL(Ljava/lang/reflect/ArtMethod;)Ljava/lang/String; - .registers 4 +.method public static lambdaL(J)Ljava/lang/String; + .registers 5 const-string v0, "Interned string" return-object v0 diff --git a/test/955-lambda-smali/smali/TrivialHelloWorld.smali b/test/955-lambda-smali/smali/TrivialHelloWorld.smali index 38ee95ac7e..3444b13a65 100644 --- a/test/955-lambda-smali/smali/TrivialHelloWorld.smali +++ b/test/955-lambda-smali/smali/TrivialHelloWorld.smali @@ -25,12 +25,12 @@ .method public static run()V .registers 8 # Trivial 0-arg hello world - create-lambda v0, LTrivialHelloWorld;->doHelloWorld(Ljava/lang/reflect/ArtMethod;)V + create-lambda v0, LTrivialHelloWorld;->doHelloWorld(J)V # TODO: create-lambda should not write to both v0 and v1 invoke-lambda v0, {} # Slightly more interesting 4-arg hello world - create-lambda v2, doHelloWorldArgs(Ljava/lang/reflect/ArtMethod;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)V + create-lambda v2, doHelloWorldArgs(JLjava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)V # TODO: create-lambda should not write to both v2 and v3 const-string v4, "A" const-string v5, "B" @@ -43,9 +43,9 @@ return-void .end method -#TODO: should use a closure type instead of ArtMethod. -.method public static doHelloWorld(Ljava/lang/reflect/ArtMethod;)V - .registers 3 # 1 parameters, 2 locals +#TODO: should use a closure type instead of jlong. +.method public static doHelloWorld(J)V + .registers 5 # 1 wide parameters, 3 locals const-string v0, "Hello world! (0-args, no closure)" @@ -55,17 +55,17 @@ return-void .end method -#TODO: should use a closure type instead of ArtMethod. -.method public static doHelloWorldArgs(Ljava/lang/reflect/ArtMethod;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)V - .registers 7 # 5 parameters, 2 locals +#TODO: should use a closure type instead of jlong. +.method public static doHelloWorldArgs(JLjava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)V + .registers 9 # 1 wide parameter, 4 narrow parameters, 3 locals const-string v0, " Hello world! (4-args, no closure)" sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream; - invoke-virtual {v1, p1}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V invoke-virtual {v1, p2}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V invoke-virtual {v1, p3}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V invoke-virtual {v1, p4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V + invoke-virtual {v1, p5}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V invoke-virtual {v1, v0}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V diff --git a/test/Android.libarttest.mk b/test/Android.libarttest.mk index 7f05a043d8..bffd0e0aa6 100644 --- a/test/Android.libarttest.mk +++ b/test/Android.libarttest.mk @@ -19,6 +19,8 @@ LOCAL_PATH := $(call my-dir) include art/build/Android.common_build.mk LIBARTTEST_COMMON_SRC_FILES := \ + common/runtime_state.cc \ + common/stack_inspect.cc \ 004-JniTest/jni_test.cc \ 004-SignalTest/signaltest.cc \ 004-ReferenceMap/stack_walk_refmap_jni.cc \ @@ -26,13 +28,11 @@ LIBARTTEST_COMMON_SRC_FILES := \ 004-UnsafeTest/unsafe_test.cc \ 044-proxy/native_proxy.cc \ 051-thread/thread_test.cc \ - 088-monitor-verification/stack_inspect.cc \ - 116-nodex2oat/nodex2oat.cc \ 117-nopatchoat/nopatchoat.cc \ - 118-noimage-dex2oat/noimage-dex2oat.cc \ 1337-gc-coverage/gc_coverage.cc \ 137-cfi/cfi.cc \ 139-register-natives/regnative.cc \ + 141-class-unload/jni_unload.cc \ 454-get-vreg/get_vreg_jni.cc \ 455-set-vreg/set_vreg_jni.cc \ 457-regs/regs_jni.cc \ diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk index 29e015f534..db16b97ea6 100644 --- a/test/Android.run-test.mk +++ b/test/Android.run-test.mk @@ -369,7 +369,9 @@ TEST_ART_BROKEN_FALLBACK_RUN_TESTS := # This test dynamically enables tracing to force a deoptimization. This makes the test meaningless # when already tracing, and writes an error message that we do not want to check for. TEST_ART_BROKEN_TRACING_RUN_TESTS := \ + 087-gc-after-link \ 137-cfi \ + 141-class-unload \ 802-deoptimization ifneq (,$(filter trace stream,$(TRACE_TYPES))) @@ -417,19 +419,71 @@ endif TEST_ART_BROKEN_DEFAULT_RUN_TESTS := -# Known broken tests for the arm64 optimizing compiler backend. -TEST_ART_BROKEN_OPTIMIZING_ARM64_RUN_TESTS := - -ifneq (,$(filter optimizing,$(COMPILER_TYPES))) - ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES), \ - optimizing,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \ - $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES),$(TEST_ART_BROKEN_OPTIMIZING_ARM64_RUN_TESTS),64) +# Known broken tests for the mips32 optimizing compiler backend. +TEST_ART_BROKEN_OPTIMIZING_MIPS_RUN_TESTS := \ + 441-checker-inliner \ + 442-checker-constant-folding \ + 444-checker-nce \ + 445-checker-licm \ + 446-checker-inliner2 \ + 447-checker-inliner3 \ + 449-checker-bce \ + 450-checker-types \ + 455-checker-gvn \ + 458-checker-instruction-simplification \ + 462-checker-inlining-across-dex-files \ + 463-checker-boolean-simplifier \ + 464-checker-inline-sharpen-calls \ + 465-checker-clinit-gvn \ + 468-checker-bool-simplifier-regression \ + 473-checker-inliner-constants \ + 474-checker-boolean-input \ + 476-checker-ctor-memory-barrier \ + 477-checker-bound-type \ + 478-checker-clinit-check-pruning \ + 478-checker-inliner-nested-loop \ + 480-checker-dead-blocks \ + 482-checker-loop-back-edge-use \ + 484-checker-register-hints \ + 485-checker-dce-loop-update \ + 485-checker-dce-switch \ + 486-checker-must-do-null-check \ + 487-checker-inline-calls \ + 488-checker-inline-recursive-calls \ + 490-checker-inline \ + 492-checker-inline-invoke-interface \ + 493-checker-inline-invoke-interface \ + 494-checker-instanceof-tests \ + 495-checker-checkcast-tests \ + 496-checker-inlining-and-class-loader \ + 508-checker-disassembly \ + 510-checker-try-catch \ + 517-checker-builder-fallthrough \ + 521-checker-array-set-null \ + 522-checker-regression-monitor-exit \ + 523-checker-can-throw-regression \ + 525-checker-arrays-and-fields \ + 526-checker-caller-callee-regs \ + 529-checker-unresolved \ + 530-checker-loops \ + 530-checker-regression-reftype-final \ + 532-checker-nonnull-arrayset \ + 534-checker-bce-deoptimization \ + +ifeq (mips,$(TARGET_ARCH)) + ifneq (,$(filter optimizing,$(COMPILER_TYPES))) + ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES), \ + optimizing,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \ + $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \ + $(TEST_ART_BROKEN_OPTIMIZING_MIPS_RUN_TESTS),$(ALL_ADDRESS_SIZES)) + endif endif -TEST_ART_BROKEN_OPTIMIZING_ARM64_RUN_TESTS := +TEST_ART_BROKEN_OPTIMIZING_MIPS_RUN_TESTS := # Known broken tests for the optimizing compiler. -TEST_ART_BROKEN_OPTIMIZING_RUN_TESTS := +TEST_ART_BROKEN_OPTIMIZING_RUN_TESTS := \ + 455-set-vreg \ ifneq (,$(filter optimizing,$(COMPILER_TYPES))) ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \ @@ -437,20 +491,11 @@ ifneq (,$(filter optimizing,$(COMPILER_TYPES))) $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES),$(TEST_ART_BROKEN_OPTIMIZING_RUN_TESTS),$(ALL_ADDRESS_SIZES)) endif -# If ART_USE_OPTIMIZING_COMPILER is set to true, then the default core.art has been -# compiled with the optimizing compiler. -ifeq ($(ART_USE_OPTIMIZING_COMPILER),true) - ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \ - default,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \ - $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES),$(TEST_ART_BROKEN_OPTIMIZING_RUN_TESTS),$(ALL_ADDRESS_SIZES)) -endif - TEST_ART_BROKEN_OPTIMIZING_RUN_TESTS := # Tests that should fail when the optimizing compiler compiles them non-debuggable. TEST_ART_BROKEN_OPTIMIZING_NONDEBUGGABLE_RUN_TESTS := \ 454-get-vreg \ - 455-set-vreg \ 457-regs \ ifneq (,$(filter optimizing,$(COMPILER_TYPES))) diff --git a/test/common/runtime_state.cc b/test/common/runtime_state.cc new file mode 100644 index 0000000000..082c9b3c8d --- /dev/null +++ b/test/common/runtime_state.cc @@ -0,0 +1,119 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "jni.h" + +#include "base/logging.h" +#include "dex_file-inl.h" +#include "mirror/class-inl.h" +#include "nth_caller_visitor.h" +#include "runtime.h" +#include "scoped_thread_state_change.h" +#include "stack.h" +#include "thread-inl.h" + +namespace art { + +// public static native boolean hasOatFile(); + +extern "C" JNIEXPORT jboolean JNICALL Java_Main_hasOatFile(JNIEnv* env, jclass cls) { + ScopedObjectAccess soa(env); + + mirror::Class* klass = soa.Decode<mirror::Class*>(cls); + const DexFile& dex_file = klass->GetDexFile(); + const OatFile::OatDexFile* oat_dex_file = dex_file.GetOatDexFile(); + return (oat_dex_file != nullptr) ? JNI_TRUE : JNI_FALSE; +} + +// public static native boolean runtimeIsSoftFail(); + +extern "C" JNIEXPORT jboolean JNICALL Java_Main_runtimeIsSoftFail(JNIEnv* env ATTRIBUTE_UNUSED, + jclass cls ATTRIBUTE_UNUSED) { + return Runtime::Current()->IsVerificationSoftFail() ? JNI_TRUE : JNI_FALSE; +} + +// public static native boolean isDex2OatEnabled(); + +extern "C" JNIEXPORT jboolean JNICALL Java_Main_isDex2OatEnabled(JNIEnv* env ATTRIBUTE_UNUSED, + jclass cls ATTRIBUTE_UNUSED) { + return Runtime::Current()->IsDex2OatEnabled(); +} + +// public static native boolean hasImage(); + +extern "C" JNIEXPORT jboolean JNICALL Java_Main_hasImage(JNIEnv* env ATTRIBUTE_UNUSED, + jclass cls ATTRIBUTE_UNUSED) { + return Runtime::Current()->GetHeap()->HasImageSpace(); +} + +// public static native boolean isImageDex2OatEnabled(); + +extern "C" JNIEXPORT jboolean JNICALL Java_Main_isImageDex2OatEnabled(JNIEnv* env ATTRIBUTE_UNUSED, + jclass cls ATTRIBUTE_UNUSED) { + return Runtime::Current()->IsImageDex2OatEnabled(); +} + +// public static native boolean compiledWithOptimizing(); +// Did we use the optimizing compiler to compile this? + +extern "C" JNIEXPORT jboolean JNICALL Java_Main_compiledWithOptimizing(JNIEnv* env, jclass cls) { + ScopedObjectAccess soa(env); + + mirror::Class* klass = soa.Decode<mirror::Class*>(cls); + const DexFile& dex_file = klass->GetDexFile(); + const OatFile::OatDexFile* oat_dex_file = dex_file.GetOatDexFile(); + if (oat_dex_file == nullptr) { + // Could be JIT, which also uses optimizing, but conservatively say no. + return JNI_FALSE; + } + const OatFile* oat_file = oat_dex_file->GetOatFile(); + CHECK(oat_file != nullptr); + + const char* cmd_line = oat_file->GetOatHeader().GetStoreValueByKey(OatHeader::kDex2OatCmdLineKey); + CHECK(cmd_line != nullptr); // Huh? This should not happen. + + // Check the backend. + constexpr const char* kCompilerBackend = "--compiler-backend="; + const char* backend = strstr(cmd_line, kCompilerBackend); + if (backend != nullptr) { + // If it's set, make sure it's optimizing. + backend += strlen(kCompilerBackend); + if (strncmp(backend, "Optimizing", strlen("Optimizing")) != 0) { + return JNI_FALSE; + } + } + + // Check the filter. + constexpr const char* kCompilerFilter = "--compiler-filter="; + const char* filter = strstr(cmd_line, kCompilerFilter); + if (filter != nullptr) { + // If it's set, make sure it's not interpret-only|verify-none|verify-at-runtime. + // Note: The space filter might have an impact on the test, but ignore that for now. + filter += strlen(kCompilerFilter); + constexpr const char* kInterpretOnly = "interpret-only"; + constexpr const char* kVerifyNone = "verify-none"; + constexpr const char* kVerifyAtRuntime = "verify-at-runtime"; + if (strncmp(filter, kInterpretOnly, strlen(kInterpretOnly)) == 0 || + strncmp(filter, kVerifyNone, strlen(kVerifyNone)) == 0 || + strncmp(filter, kVerifyAtRuntime, strlen(kVerifyAtRuntime)) == 0) { + return JNI_FALSE; + } + } + + return JNI_TRUE; +} + +} // namespace art diff --git a/test/088-monitor-verification/stack_inspect.cc b/test/common/stack_inspect.cc index e2899c3d68..922eae61e2 100644 --- a/test/088-monitor-verification/stack_inspect.cc +++ b/test/common/stack_inspect.cc @@ -27,25 +27,39 @@ namespace art { -// public static native void assertCallerIsInterpreted(); +static bool asserts_enabled = true; -extern "C" JNIEXPORT void JNICALL Java_Main_assertCallerIsInterpreted(JNIEnv* env, jclass) { - LOG(INFO) << "assertCallerIsInterpreted"; +// public static native void disableStackFrameAsserts(); +// Note: to globally disable asserts in unsupported configurations. +extern "C" JNIEXPORT void JNICALL Java_Main_disableStackFrameAsserts(JNIEnv* env ATTRIBUTE_UNUSED, + jclass cls ATTRIBUTE_UNUSED) { + asserts_enabled = false; +} + + +// public static native boolean isInterpreted(); + +extern "C" JNIEXPORT jboolean JNICALL Java_Main_isInterpreted(JNIEnv* env, jclass) { ScopedObjectAccess soa(env); NthCallerVisitor caller(soa.Self(), 1, false); caller.WalkStack(); CHECK(caller.caller != nullptr); - LOG(INFO) << PrettyMethod(caller.caller); - CHECK(caller.GetCurrentShadowFrame() != nullptr); + return caller.GetCurrentShadowFrame() != nullptr ? JNI_TRUE : JNI_FALSE; } -// public static native void assertCallerIsManaged(); +// public static native void assertIsInterpreted(); -extern "C" JNIEXPORT void JNICALL Java_Main_assertCallerIsManaged(JNIEnv* env, jclass cls) { - // Note: needs some smarts to not fail if there is no managed code, at all. - LOG(INFO) << "assertCallerIsManaged"; +extern "C" JNIEXPORT void JNICALL Java_Main_assertIsInterpreted(JNIEnv* env, jclass klass) { + if (asserts_enabled) { + CHECK(Java_Main_isInterpreted(env, klass)); + } +} + +// public static native boolean isManaged(); + +extern "C" JNIEXPORT jboolean JNICALL Java_Main_isManaged(JNIEnv* env, jclass cls) { ScopedObjectAccess soa(env); mirror::Class* klass = soa.Decode<mirror::Class*>(cls); @@ -54,28 +68,22 @@ extern "C" JNIEXPORT void JNICALL Java_Main_assertCallerIsManaged(JNIEnv* env, j if (oat_dex_file == nullptr) { // No oat file, this must be a test configuration that doesn't compile at all. Ignore that the // result will be that we're running the interpreter. - return; + return JNI_FALSE; } NthCallerVisitor caller(soa.Self(), 1, false); caller.WalkStack(); CHECK(caller.caller != nullptr); - LOG(INFO) << PrettyMethod(caller.caller); - if (caller.GetCurrentShadowFrame() == nullptr) { - // Not a shadow frame, this looks good. - return; - } + return caller.GetCurrentShadowFrame() != nullptr ? JNI_FALSE : JNI_TRUE; +} + +// public static native void assertIsManaged(); - // This could be an interpret-only or a verify-at-runtime compilation, or a read-barrier variant, - // or... It's not really safe to just reject now. Let's look at the access flags. If the method - // was successfully verified, its access flags should be set to mark it preverified, except when - // we're running soft-fail tests. - if (Runtime::Current()->IsVerificationSoftFail()) { - // Soft-fail config. Everything should be running with interpreter access checks, potentially. - return; +extern "C" JNIEXPORT void JNICALL Java_Main_assertIsManaged(JNIEnv* env, jclass cls) { + if (asserts_enabled) { + CHECK(Java_Main_isManaged(env, cls)); } - CHECK(caller.caller->IsPreverified()); } } // namespace art diff --git a/test/run-test b/test/run-test index 828939d247..a5b6e92869 100755 --- a/test/run-test +++ b/test/run-test @@ -392,7 +392,7 @@ fi # Most interesting target architecture variables are Makefile variables, not environment variables. # Try to map the suffix64 flag and what we find in ${ANDROID_PRODUCT_OUT}/data/art-test to an architecture name. -function guess_arch_name() { +function guess_target_arch_name() { grep32bit=`ls ${ANDROID_PRODUCT_OUT}/data/art-test | grep -E '^(arm|x86|mips)$'` grep64bit=`ls ${ANDROID_PRODUCT_OUT}/data/art-test | grep -E '^(arm64|x86_64|mips64)$'` if [ "x${suffix64}" = "x64" ]; then @@ -402,6 +402,14 @@ function guess_arch_name() { fi } +function guess_host_arch_name() { + if [ "x${suffix64}" = "x64" ]; then + host_arch_name="x86_64" + else + host_arch_name="x86" + fi +} + if [ "$target_mode" = "no" ]; then if [ "$runtime" = "jvm" ]; then if [ "$prebuild_mode" = "yes" ]; then @@ -437,10 +445,11 @@ elif [ "$runtime" = "art" ]; then if [ -z "$ANDROID_HOST_OUT" ]; then export ANDROID_HOST_OUT=$ANDROID_BUILD_TOP/out/host/linux-x86 fi + guess_host_arch_name run_args="${run_args} --boot ${ANDROID_HOST_OUT}/framework/core${image_suffix}${pic_image_suffix}.art" run_args="${run_args} --runtime-option -Djava.library.path=${ANDROID_HOST_OUT}/lib${suffix64}" else - guess_arch_name + guess_target_arch_name run_args="${run_args} --runtime-option -Djava.library.path=/data/art-test/${target_arch_name}" run_args="${run_args} --boot /data/art-test/core${image_suffix}${pic_image_suffix}.art" fi @@ -635,7 +644,7 @@ if [[ "$TEST_NAME" =~ ^[0-9]+-checker- ]]; then run_checker="yes" if [ "$target_mode" = "no" ]; then cfg_output_dir="$tmp_dir" - checker_arch_option= + checker_arch_option="--arch=${host_arch_name^^}" else cfg_output_dir="$DEX_LOCATION" checker_arch_option="--arch=${target_arch_name^^}" diff --git a/tools/ahat/Android.mk b/tools/ahat/Android.mk index 3c1522c3d8..71366c1313 100644 --- a/tools/ahat/Android.mk +++ b/tools/ahat/Android.mk @@ -16,6 +16,8 @@ LOCAL_PATH := $(call my-dir) +include art/build/Android.common_test.mk + # --- ahat.jar ---------------- include $(CLEAR_VARS) LOCAL_SRC_FILES := $(call all-java-files-under, src) @@ -44,7 +46,7 @@ $(LOCAL_BUILT_MODULE): $(LOCAL_PATH)/ahat $(ACP) ahat: $(LOCAL_BUILT_MODULE) -# --- ahat-test.jar -------------- +# --- ahat-tests.jar -------------- include $(CLEAR_VARS) LOCAL_SRC_FILES := $(call all-java-files-under, test) LOCAL_JAR_MANIFEST := test/manifest.txt @@ -53,6 +55,42 @@ LOCAL_IS_HOST_MODULE := true LOCAL_MODULE_TAGS := tests LOCAL_MODULE := ahat-tests include $(BUILD_HOST_JAVA_LIBRARY) +AHAT_TEST_JAR := $(LOCAL_BUILT_MODULE) + +# --- ahat-test-dump.jar -------------- +include $(CLEAR_VARS) +LOCAL_MODULE := ahat-test-dump +LOCAL_MODULE_TAGS := tests +LOCAL_SRC_FILES := $(call all-java-files-under, test-dump) +include $(BUILD_HOST_DALVIK_JAVA_LIBRARY) + +# Determine the location of the test-dump.jar and test-dump.hprof files. +# These use variables set implicitly by the include of +# BUILD_HOST_DALVIK_JAVA_LIBRARY above. +AHAT_TEST_DUMP_JAR := $(LOCAL_BUILT_MODULE) +AHAT_TEST_DUMP_HPROF := $(intermediates.COMMON)/test-dump.hprof + +# Run ahat-test-dump.jar to generate test-dump.hprof +AHAT_TEST_DUMP_DEPENDENCIES := \ + $(ART_HOST_EXECUTABLES) \ + $(HOST_OUT_EXECUTABLES)/art \ + $(HOST_CORE_IMG_OUT_BASE)$(CORE_IMG_SUFFIX) + +$(AHAT_TEST_DUMP_HPROF): PRIVATE_AHAT_TEST_ART := $(HOST_OUT_EXECUTABLES)/art +$(AHAT_TEST_DUMP_HPROF): PRIVATE_AHAT_TEST_DUMP_JAR := $(AHAT_TEST_DUMP_JAR) +$(AHAT_TEST_DUMP_HPROF): PRIVATE_AHAT_TEST_DUMP_DEPENDENCIES := $(AHAT_TEST_DUMP_DEPENDENCIES) +$(AHAT_TEST_DUMP_HPROF): $(AHAT_TEST_DUMP_JAR) $(AHAT_TEST_DUMP_DEPENDENCIES) + $(PRIVATE_AHAT_TEST_ART) -cp $(PRIVATE_AHAT_TEST_DUMP_JAR) Main $@ + +.PHONY: ahat-test +ahat-test: PRIVATE_AHAT_TEST_DUMP_HPROF := $(AHAT_TEST_DUMP_HPROF) +ahat-test: PRIVATE_AHAT_TEST_JAR := $(AHAT_TEST_JAR) +ahat-test: $(AHAT_TEST_JAR) $(AHAT_TEST_DUMP_HPROF) + java -Dahat.test.dump.hprof=$(PRIVATE_AHAT_TEST_DUMP_HPROF) -jar $(PRIVATE_AHAT_TEST_JAR) + +# Clean up local variables. +AHAT_TEST_DUMP_DEPENDENCIES := +AHAT_TEST_DUMP_HPROF := +AHAT_TEST_DUMP_JAR := +AHAT_TEST_JAR := -ahat-test: $(LOCAL_BUILT_MODULE) - java -jar $< diff --git a/tools/ahat/src/AhatSnapshot.java b/tools/ahat/src/AhatSnapshot.java index 2437d0388c..3035ef75c9 100644 --- a/tools/ahat/src/AhatSnapshot.java +++ b/tools/ahat/src/AhatSnapshot.java @@ -18,13 +18,18 @@ package com.android.ahat; import com.android.tools.perflib.heap.ClassObj; import com.android.tools.perflib.heap.Heap; +import com.android.tools.perflib.heap.HprofParser; import com.android.tools.perflib.heap.Instance; import com.android.tools.perflib.heap.RootObj; import com.android.tools.perflib.heap.Snapshot; import com.android.tools.perflib.heap.StackFrame; import com.android.tools.perflib.heap.StackTrace; +import com.android.tools.perflib.heap.io.HprofBuffer; +import com.android.tools.perflib.heap.io.MemoryMappedFileBuffer; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; +import java.io.File; +import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -47,7 +52,22 @@ class AhatSnapshot { private Site mRootSite; private Map<Heap, Long> mHeapSizes; - public AhatSnapshot(Snapshot snapshot) { + /** + * Create an AhatSnapshot from an hprof file. + */ + public static AhatSnapshot fromHprof(File hprof) throws IOException { + HprofBuffer buffer = new MemoryMappedFileBuffer(hprof); + Snapshot snapshot = (new HprofParser(buffer)).parse(); + snapshot.computeDominators(); + return new AhatSnapshot(snapshot); + } + + /** + * Construct an AhatSnapshot for the given perflib snapshot. + * Ther user is responsible for calling snapshot.computeDominators before + * calling this AhatSnapshot constructor. + */ + private AhatSnapshot(Snapshot snapshot) { mSnapshot = snapshot; mHeaps = new ArrayList<Heap>(mSnapshot.getHeaps()); mDominated = new HashMap<Instance, List<Instance>>(); @@ -92,6 +112,11 @@ class AhatSnapshot { } } + // Note: This method is exposed for testing purposes. + public ClassObj findClass(String name) { + return mSnapshot.findClass(name); + } + public Instance findInstance(long id) { return mSnapshot.findInstance(id); } diff --git a/tools/ahat/src/InstanceUtils.java b/tools/ahat/src/InstanceUtils.java index 7ee3ff24ed..a6ac3b8765 100644 --- a/tools/ahat/src/InstanceUtils.java +++ b/tools/ahat/src/InstanceUtils.java @@ -32,7 +32,7 @@ class InstanceUtils { * given name. */ public static boolean isInstanceOfClass(Instance inst, String className) { - ClassObj cls = inst.getClassObj(); + ClassObj cls = (inst == null) ? null : inst.getClassObj(); return (cls != null && className.equals(cls.getClassName())); } @@ -132,7 +132,7 @@ class InstanceUtils { * Read a field of an instance. * Returns null if the field value is null or if the field couldn't be read. */ - private static Object getField(Instance inst, String fieldName) { + public static Object getField(Instance inst, String fieldName) { if (!(inst instanceof ClassInstance)) { return null; } diff --git a/tools/ahat/src/Main.java b/tools/ahat/src/Main.java index 2e2ddd299f..1563aa0262 100644 --- a/tools/ahat/src/Main.java +++ b/tools/ahat/src/Main.java @@ -16,10 +16,6 @@ package com.android.ahat; -import com.android.tools.perflib.heap.HprofParser; -import com.android.tools.perflib.heap.Snapshot; -import com.android.tools.perflib.heap.io.HprofBuffer; -import com.android.tools.perflib.heap.io.MemoryMappedFileBuffer; import com.sun.net.httpserver.HttpServer; import java.io.File; import java.io.IOException; @@ -71,15 +67,8 @@ public class Main { return; } - System.out.println("Reading hprof file..."); - HprofBuffer buffer = new MemoryMappedFileBuffer(hprof); - Snapshot snapshot = (new HprofParser(buffer)).parse(); - - System.out.println("Computing Dominators..."); - snapshot.computeDominators(); - - System.out.println("Processing snapshot for ahat..."); - AhatSnapshot ahat = new AhatSnapshot(snapshot); + System.out.println("Processing hprof file..."); + AhatSnapshot ahat = AhatSnapshot.fromHprof(hprof); InetAddress loopback = InetAddress.getLoopbackAddress(); InetSocketAddress addr = new InetSocketAddress(loopback, port); diff --git a/tools/ahat/test-dump/Main.java b/tools/ahat/test-dump/Main.java new file mode 100644 index 0000000000..cea1dc179e --- /dev/null +++ b/tools/ahat/test-dump/Main.java @@ -0,0 +1,52 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import dalvik.system.VMDebug; +import java.io.IOException; + +/** + * Program used to create a heap dump for test purposes. + */ +public class Main { + // Keep a reference to the DumpedStuff instance so that it is not garbage + // collected before we take the heap dump. + public static DumpedStuff stuff; + + // We will take a heap dump that includes a single instance of this + // DumpedStuff class. Objects stored as fields in this class can be easily + // found in the hprof dump by searching for the instance of the DumpedStuff + // class and reading the desired field. + public static class DumpedStuff { + public String basicString = "hello, world"; + public String nullString = null; + public Object anObject = new Object(); + } + + public static void main(String[] args) throws IOException { + if (args.length < 1) { + System.err.println("no output file specified"); + return; + } + String file = args[0]; + + // Allocate the instance of DumpedStuff. + stuff = new DumpedStuff(); + + // Take a heap dump that will include that instance of DumpedStuff. + System.err.println("Dumping hprof data to " + file); + VMDebug.dumpHprofData(file); + } +} diff --git a/tools/ahat/test/InstanceUtilsTest.java b/tools/ahat/test/InstanceUtilsTest.java new file mode 100644 index 0000000000..7613df4994 --- /dev/null +++ b/tools/ahat/test/InstanceUtilsTest.java @@ -0,0 +1,48 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.ahat; + +import com.android.tools.perflib.heap.Instance; +import java.io.IOException; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import org.junit.Test; + +public class InstanceUtilsTest { + @Test + public void basicString() throws IOException { + TestDump dump = TestDump.getTestDump(); + Instance str = (Instance)dump.getDumpedThing("basicString"); + assertEquals("hello, world", InstanceUtils.asString(str)); + } + + @Test + public void nullString() throws IOException { + TestDump dump = TestDump.getTestDump(); + Instance obj = (Instance)dump.getDumpedThing("nullString"); + assertNull(InstanceUtils.asString(obj)); + } + + @Test + public void notString() throws IOException { + TestDump dump = TestDump.getTestDump(); + Instance obj = (Instance)dump.getDumpedThing("anObject"); + assertNotNull(obj); + assertNull(InstanceUtils.asString(obj)); + } +} diff --git a/tools/ahat/test/TestDump.java b/tools/ahat/test/TestDump.java new file mode 100644 index 0000000000..c3a76e4f18 --- /dev/null +++ b/tools/ahat/test/TestDump.java @@ -0,0 +1,89 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.ahat; + +import com.android.tools.perflib.heap.ClassObj; +import com.android.tools.perflib.heap.Field; +import com.android.tools.perflib.heap.Instance; +import java.io.File; +import java.io.IOException; +import java.util.Map; + +/** + * The TestDump class is used to get an AhatSnapshot for the test-dump + * program. + */ +public class TestDump { + // It can take on the order of a second to parse and process the test-dump + // hprof. To avoid repeating this overhead for each test case, we cache the + // loaded instance of TestDump and reuse it when possible. In theory the + // test cases should not be able to modify the cached snapshot in a way that + // is visible to other test cases. + private static TestDump mCachedTestDump = null; + + private AhatSnapshot mSnapshot = null; + + /** + * Load the test-dump.hprof file. + * The location of the file is read from the system property + * "ahat.test.dump.hprof", which is expected to be set on the command line. + * For example: + * java -Dahat.test.dump.hprof=test-dump.hprof -jar ahat-tests.jar + * + * An IOException is thrown if there is a failure reading the hprof file. + */ + private TestDump() throws IOException { + String hprof = System.getProperty("ahat.test.dump.hprof"); + mSnapshot = AhatSnapshot.fromHprof(new File(hprof)); + } + + /** + * Get the AhatSnapshot for the test dump program. + */ + public AhatSnapshot getAhatSnapshot() { + return mSnapshot; + } + + /** + * Return the value of a field in the DumpedStuff instance in the + * snapshot for the test-dump program. + */ + public Object getDumpedThing(String name) { + ClassObj main = mSnapshot.findClass("Main"); + Instance stuff = null; + for (Map.Entry<Field, Object> fields : main.getStaticFieldValues().entrySet()) { + if ("stuff".equals(fields.getKey().getName())) { + stuff = (Instance) fields.getValue(); + } + } + return InstanceUtils.getField(stuff, name); + } + + /** + * Get the test dump. + * An IOException is thrown if there is an error reading the test dump hprof + * file. + * To improve performance, this returns a cached instance of the TestDump + * when possible. + */ + public static synchronized TestDump getTestDump() throws IOException { + if (mCachedTestDump == null) { + mCachedTestDump = new TestDump(); + } + return mCachedTestDump; + } +} diff --git a/tools/ahat/test/Tests.java b/tools/ahat/test/Tests.java index fb53d90801..bab712199c 100644 --- a/tools/ahat/test/Tests.java +++ b/tools/ahat/test/Tests.java @@ -22,6 +22,7 @@ public class Tests { public static void main(String[] args) { if (args.length == 0) { args = new String[]{ + "com.android.ahat.InstanceUtilsTest", "com.android.ahat.QueryTest", "com.android.ahat.SortTest" }; @@ -89,6 +89,7 @@ if [ z"$PERF" != z ]; then invoke_with="perf record -o $ANDROID_DATA/perf.data -e cycles:u $invoke_with" fi +# We use the PIC core image to work with perf. ANDROID_DATA=$ANDROID_DATA \ ANDROID_ROOT=$ANDROID_ROOT \ LD_LIBRARY_PATH=$LD_LIBRARY_PATH \ @@ -97,7 +98,7 @@ ANDROID_DATA=$ANDROID_DATA \ $invoke_with $ANDROID_ROOT/bin/$DALVIKVM $lib \ -XXlib:$LIBART \ -Xnorelocate \ - -Ximage:$ANDROID_ROOT/framework/core.art \ + -Ximage:$ANDROID_ROOT/framework/core-optimizing-pic.art \ -Xcompiler-option --generate-debug-info \ "$@" diff --git a/tools/buildbot-build.sh b/tools/buildbot-build.sh index a670fc7738..de9b35d3ea 100755 --- a/tools/buildbot-build.sh +++ b/tools/buildbot-build.sh @@ -20,21 +20,11 @@ if [ ! -d art ]; then fi common_targets="vogar vogar.jar core-tests apache-harmony-jdwp-tests-hostdex jsr166-tests" -android_root="/data/local/tmp/system" -linker="linker" mode="target" j_arg="-j$(nproc)" showcommands= make_command= -case "$TARGET_PRODUCT" in - (armv8|mips64r6) linker="linker64";; -esac - -if [[ "$ART_TEST_ANDROID_ROOT" != "" ]]; then - android_root="$ART_TEST_ANDROID_ROOT" -fi - while true; do if [[ "$1" == "--host" ]]; then mode="host" @@ -42,16 +32,6 @@ while true; do elif [[ "$1" == "--target" ]]; then mode="target" shift - elif [[ "$1" == "--32" ]]; then - linker="linker" - shift - elif [[ "$1" == "--64" ]]; then - linker="linker64" - shift - elif [[ "$1" == "--android-root" ]]; then - shift - android_root=$1 - shift elif [[ "$1" == -j* ]]; then j_arg=$1 shift @@ -64,25 +44,10 @@ while true; do done if [[ $mode == "host" ]]; then - make_command="make $j_arg build-art-host-tests $common_targets out/host/linux-x86/lib/libjavacoretests.so out/host/linux-x86/lib64/libjavacoretests.so" - echo "Executing $make_command" - $make_command + make_command="make $j_arg $showcommands build-art-host-tests $common_targets out/host/linux-x86/lib/libjavacoretests.so out/host/linux-x86/lib64/libjavacoretests.so" elif [[ $mode == "target" ]]; then - # We need to provide our own linker in case the linker on the device - # is out of date. - env="TARGET_GLOBAL_LDFLAGS=-Wl,-dynamic-linker=$android_root/bin/$linker" - # gcc gives a linker error, so compile with clang. - # TODO: investigate and fix? - if [[ $TARGET_PRODUCT == "mips32r2_fp" ]]; then - env="$env USE_CLANG_PLATFORM_BUILD=true" - fi - # Disable NINJA for building on target, it does not support the -e option to Makefile. - env="$env USE_NINJA=false" - # Use '-e' to force the override of TARGET_GLOBAL_LDFLAGS. - # Also, we build extra tools that will be used by tests, so that - # they are compiled with our own linker. - make_command="make -e $j_arg $showcommands build-art-target-tests $common_targets libjavacrypto libjavacoretests linker toybox toolbox sh out/host/linux-x86/bin/adb" - echo "Executing env $env $make_command" - env $env $make_command + make_command="make $j_arg $showcommands build-art-target-tests $common_targets libjavacrypto libjavacoretests linker toybox toolbox sh out/host/linux-x86/bin/adb" fi +echo "Executing $make_command" +$make_command diff --git a/tools/run-libcore-tests.sh b/tools/run-libcore-tests.sh index 26d9ad7c37..80f7a3737f 100755 --- a/tools/run-libcore-tests.sh +++ b/tools/run-libcore-tests.sh @@ -86,12 +86,6 @@ while true; do # Remove the --debug from the arguments. vogar_args=${vogar_args/$1} vogar_args="$vogar_args --vm-arg -XXlib:libartd.so" - if [ "$emulator" = "no" ]; then - # Increase the timeout, as vogar cannot set individual test - # timeout when being asked to run packages, and some tests go above - # the default timeout. - vogar_args="$vogar_args --timeout 240" - fi shift elif [[ "$1" == "" ]]; then break @@ -100,10 +94,10 @@ while true; do fi done -if [ "$emulator" = "yes" ]; then - # Be very patient with the emulator. - vogar_args="$vogar_args --timeout 480" -fi +# Increase the timeout, as vogar cannot set individual test +# timeout when being asked to run packages, and some tests go above +# the default timeout. +vogar_args="$vogar_args --timeout 480" # Run the tests using vogar. echo "Running tests for the following test packages:" |