diff options
225 files changed, 7582 insertions, 2484 deletions
diff --git a/benchmark/Android.mk b/benchmark/Android.mk index 09aca98337..a4a603ad04 100644 --- a/benchmark/Android.mk +++ b/benchmark/Android.mk @@ -19,6 +19,7 @@ LOCAL_PATH := $(call my-dir) include art/build/Android.common_build.mk LIBARTBENCHMARK_COMMON_SRC_FILES := \ + jobject-benchmark/jobject_benchmark.cc \ jni-perf/perf_jni.cc \ scoped-primitive-array/scoped_primitive_array.cc diff --git a/benchmark/jobject-benchmark/info.txt b/benchmark/jobject-benchmark/info.txt new file mode 100644 index 0000000000..f2a256a3e6 --- /dev/null +++ b/benchmark/jobject-benchmark/info.txt @@ -0,0 +1,7 @@ +Benchmark for jobject functions + +Measures performance of: +Add/RemoveLocalRef +Add/RemoveGlobalRef +Add/RemoveWeakGlobalRef +Decoding local, weak, global, handle scope jobjects. diff --git a/benchmark/jobject-benchmark/jobject_benchmark.cc b/benchmark/jobject-benchmark/jobject_benchmark.cc new file mode 100644 index 0000000000..e7ca9ebc1e --- /dev/null +++ b/benchmark/jobject-benchmark/jobject_benchmark.cc @@ -0,0 +1,103 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "jni.h" + +#include "mirror/class-inl.h" +#include "scoped_thread_state_change.h" + +namespace art { +namespace { + +extern "C" JNIEXPORT void JNICALL Java_JObjectBenchmark_timeAddRemoveLocal( + JNIEnv* env, jobject jobj, jint reps) { + ScopedObjectAccess soa(env); + mirror::Object* obj = soa.Decode<mirror::Object*>(jobj); + CHECK(obj != nullptr); + for (jint i = 0; i < reps; ++i) { + jobject ref = soa.Env()->AddLocalReference<jobject>(obj); + soa.Env()->DeleteLocalRef(ref); + } +} + +extern "C" JNIEXPORT void JNICALL Java_JObjectBenchmark_timeDecodeLocal( + JNIEnv* env, jobject jobj, jint reps) { + ScopedObjectAccess soa(env); + mirror::Object* obj = soa.Decode<mirror::Object*>(jobj); + CHECK(obj != nullptr); + jobject ref = soa.Env()->AddLocalReference<jobject>(obj); + for (jint i = 0; i < reps; ++i) { + CHECK_EQ(soa.Decode<mirror::Object*>(ref), obj); + } + soa.Env()->DeleteLocalRef(ref); +} + +extern "C" JNIEXPORT void JNICALL Java_JObjectBenchmark_timeAddRemoveGlobal( + JNIEnv* env, jobject jobj, jint reps) { + ScopedObjectAccess soa(env); + mirror::Object* obj = soa.Decode<mirror::Object*>(jobj); + CHECK(obj != nullptr); + for (jint i = 0; i < reps; ++i) { + jobject ref = soa.Vm()->AddGlobalRef(soa.Self(), obj); + soa.Vm()->DeleteGlobalRef(soa.Self(), ref); + } +} + +extern "C" JNIEXPORT void JNICALL Java_JObjectBenchmark_timeDecodeGlobal( + JNIEnv* env, jobject jobj, jint reps) { + ScopedObjectAccess soa(env); + mirror::Object* obj = soa.Decode<mirror::Object*>(jobj); + CHECK(obj != nullptr); + jobject ref = soa.Vm()->AddGlobalRef(soa.Self(), obj); + for (jint i = 0; i < reps; ++i) { + CHECK_EQ(soa.Decode<mirror::Object*>(ref), obj); + } + soa.Vm()->DeleteGlobalRef(soa.Self(), ref); +} + +extern "C" JNIEXPORT void JNICALL Java_JObjectBenchmark_timeAddRemoveWeakGlobal( + JNIEnv* env, jobject jobj, jint reps) { + ScopedObjectAccess soa(env); + mirror::Object* obj = soa.Decode<mirror::Object*>(jobj); + CHECK(obj != nullptr); + for (jint i = 0; i < reps; ++i) { + jobject ref = soa.Vm()->AddWeakGlobalRef(soa.Self(), obj); + soa.Vm()->DeleteWeakGlobalRef(soa.Self(), ref); + } +} + +extern "C" JNIEXPORT void JNICALL Java_JObjectBenchmark_timeDecodeWeakGlobal( + JNIEnv* env, jobject jobj, jint reps) { + ScopedObjectAccess soa(env); + mirror::Object* obj = soa.Decode<mirror::Object*>(jobj); + CHECK(obj != nullptr); + jobject ref = soa.Vm()->AddWeakGlobalRef(soa.Self(), obj); + for (jint i = 0; i < reps; ++i) { + CHECK_EQ(soa.Decode<mirror::Object*>(ref), obj); + } + soa.Vm()->DeleteWeakGlobalRef(soa.Self(), ref); +} + +extern "C" JNIEXPORT void JNICALL Java_JObjectBenchmark_timeDecodeHandleScopeRef( + JNIEnv* env, jobject jobj, jint reps) { + ScopedObjectAccess soa(env); + for (jint i = 0; i < reps; ++i) { + soa.Decode<mirror::Object*>(jobj); + } +} + +} // namespace +} // namespace art diff --git a/benchmark/jobject-benchmark/src/JObjectBenchmark.java b/benchmark/jobject-benchmark/src/JObjectBenchmark.java new file mode 100644 index 0000000000..f4c059c58b --- /dev/null +++ b/benchmark/jobject-benchmark/src/JObjectBenchmark.java @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import com.google.caliper.SimpleBenchmark; + +public class JObjectBenchmark extends SimpleBenchmark { + public JObjectBenchmark() { + // Make sure to link methods before benchmark starts. + System.loadLibrary("artbenchmark"); + timeAddRemoveLocal(1); + timeDecodeLocal(1); + timeAddRemoveGlobal(1); + timeDecodeGlobal(1); + timeAddRemoveWeakGlobal(1); + timeDecodeWeakGlobal(1); + timeDecodeHandleScopeRef(1); + } + + public native void timeAddRemoveLocal(int reps); + public native void timeDecodeLocal(int reps); + public native void timeAddRemoveGlobal(int reps); + public native void timeDecodeGlobal(int reps); + public native void timeAddRemoveWeakGlobal(int reps); + public native void timeDecodeWeakGlobal(int reps); + public native void timeDecodeHandleScopeRef(int reps); +} diff --git a/build/Android.executable.mk b/build/Android.executable.mk index 72cf978339..3b2d1cc93d 100644 --- a/build/Android.executable.mk +++ b/build/Android.executable.mk @@ -101,7 +101,10 @@ define build-art-executable # TODO: Having this is not ideal as it might obscure errors. Try to get rid of it. LOCAL_LDFLAGS += -z muldefs ifeq ($$(HOST_OS),linux) - LOCAL_LDLIBS += -lrt + LOCAL_LDLIBS += -lrt -lncurses -ltinfo + endif + ifeq ($$(HOST_OS),darwin) + LOCAL_LDLIBS += -lncurses -ltinfo endif endif diff --git a/build/Android.oat.mk b/build/Android.oat.mk index 3a3cb990ca..6e17ed38d6 100644 --- a/build/Android.oat.mk +++ b/build/Android.oat.mk @@ -74,6 +74,10 @@ define create-core-oat-host-rules core_compile_options += --compiler-filter=verify-at-runtime --runtime-arg -Xverify:softfail core_infix := -interp-ac endif + ifeq ($(1),jit) + core_compile_options += --compiler-filter=verify-at-runtime + core_infix := -jit + endif ifeq ($(1),default) # Default has no infix, no compile options. endif @@ -154,6 +158,7 @@ $(eval $(call create-core-oat-host-rule-combination,default,,)) $(eval $(call create-core-oat-host-rule-combination,optimizing,,)) $(eval $(call create-core-oat-host-rule-combination,interpreter,,)) $(eval $(call create-core-oat-host-rule-combination,interp-ac,,)) +$(eval $(call create-core-oat-host-rule-combination,jit,,)) valgrindHOST_CORE_IMG_OUTS := valgrindHOST_CORE_OAT_OUTS := @@ -161,6 +166,7 @@ $(eval $(call create-core-oat-host-rule-combination,default,valgrind,32)) $(eval $(call create-core-oat-host-rule-combination,optimizing,valgrind,32)) $(eval $(call create-core-oat-host-rule-combination,interpreter,valgrind,32)) $(eval $(call create-core-oat-host-rule-combination,interp-ac,valgrind,32)) +$(eval $(call create-core-oat-host-rule-combination,jit,valgrind,32)) valgrind-test-art-host-dex2oat-host: $(valgrindHOST_CORE_IMG_OUTS) @@ -194,6 +200,10 @@ define create-core-oat-target-rules core_compile_options += --compiler-filter=verify-at-runtime --runtime-arg -Xverify:softfail core_infix := -interp-ac endif + ifeq ($(1),jit) + core_compile_options += --compiler-filter=verify-at-runtime + core_infix := -jit + endif ifeq ($(1),default) # Default has no infix, no compile options. endif @@ -279,6 +289,7 @@ $(eval $(call create-core-oat-target-rule-combination,default,,)) $(eval $(call create-core-oat-target-rule-combination,optimizing,,)) $(eval $(call create-core-oat-target-rule-combination,interpreter,,)) $(eval $(call create-core-oat-target-rule-combination,interp-ac,,)) +$(eval $(call create-core-oat-target-rule-combination,jit,,)) valgrindTARGET_CORE_IMG_OUTS := valgrindTARGET_CORE_OAT_OUTS := @@ -286,6 +297,7 @@ $(eval $(call create-core-oat-target-rule-combination,default,valgrind,32)) $(eval $(call create-core-oat-target-rule-combination,optimizing,valgrind,32)) $(eval $(call create-core-oat-target-rule-combination,interpreter,valgrind,32)) $(eval $(call create-core-oat-target-rule-combination,interp-ac,valgrind,32)) +$(eval $(call create-core-oat-target-rule-combination,jit,valgrind,32)) valgrind-test-art-host-dex2oat-target: $(valgrindTARGET_CORE_IMG_OUTS) diff --git a/compiler/Android.mk b/compiler/Android.mk index 41e9744777..96e13ac9a3 100644 --- a/compiler/Android.mk +++ b/compiler/Android.mk @@ -158,6 +158,7 @@ LIBART_COMPILER_SRC_FILES_mips64 := \ $(LIBART_COMPILER_SRC_FILES_mips) \ jni/quick/mips64/calling_convention_mips64.cc \ optimizing/code_generator_mips64.cc \ + optimizing/intrinsics_mips64.cc \ utils/mips64/assembler_mips64.cc \ utils/mips64/managed_register_mips64.cc \ diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc index 7082bedc5e..d5ac34186b 100644 --- a/compiler/dex/quick/codegen_util.cc +++ b/compiler/dex/quick/codegen_util.cc @@ -1126,7 +1126,7 @@ CompiledMethod* Mir2Lir::GetCompiledMethod() { for (size_t i = 0 ; i < core_vmap_table_.size(); ++i) { // Copy, stripping out the phys register sort key. vmap_encoder.PushBackUnsigned( - ~(-1 << VREG_NUM_WIDTH) & (core_vmap_table_[i] + VmapTable::kEntryAdjustment)); + ~(~0u << VREG_NUM_WIDTH) & (core_vmap_table_[i] + VmapTable::kEntryAdjustment)); } // Push a marker to take place of lr. vmap_encoder.PushBackUnsigned(VmapTable::kAdjustedFpMarker); @@ -1141,7 +1141,7 @@ CompiledMethod* Mir2Lir::GetCompiledMethod() { for (size_t i = 0 ; i < fp_vmap_table_.size(); ++i) { // Copy, stripping out the phys register sort key. vmap_encoder.PushBackUnsigned( - ~(-1 << VREG_NUM_WIDTH) & (fp_vmap_table_[i] + VmapTable::kEntryAdjustment)); + ~(~0u << VREG_NUM_WIDTH) & (fp_vmap_table_[i] + VmapTable::kEntryAdjustment)); } } } else { diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc index b5ecf9c418..1cd742abac 100644 --- a/compiler/dex/quick/quick_compiler.cc +++ b/compiler/dex/quick/quick_compiler.cc @@ -391,9 +391,9 @@ static int kAllOpcodes[] = { Instruction::IGET_SHORT_QUICK, Instruction::INVOKE_LAMBDA, Instruction::UNUSED_F4, - Instruction::UNUSED_F5, + Instruction::CAPTURE_VARIABLE, Instruction::CREATE_LAMBDA, - Instruction::UNUSED_F7, + Instruction::LIBERATE_VARIABLE, Instruction::BOX_LAMBDA, Instruction::UNBOX_LAMBDA, Instruction::UNUSED_FA, diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc index 955c575198..d9f8fcb43a 100644 --- a/compiler/image_writer.cc +++ b/compiler/image_writer.cc @@ -1362,6 +1362,10 @@ void ImageWriter::FixupObject(Object* orig, Object* copy) { // If src is a ClassLoader, set the class table to null so that it gets recreated by the // ClassLoader. down_cast<mirror::ClassLoader*>(copy)->SetClassTable(nullptr); + // Also set allocator to null to be safe. The allocator is created when we create the class + // table. We also never expect to unload things in the image since they are held live as + // roots. + down_cast<mirror::ClassLoader*>(copy)->SetAllocator(nullptr); } } FixupVisitor visitor(this, copy); diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc index c96b1715d7..4ddd457ac5 100644 --- a/compiler/oat_writer.cc +++ b/compiler/oat_writer.cc @@ -1123,11 +1123,7 @@ size_t OatWriter::InitOatCodeDexFiles(size_t offset) { } while (false) VISIT(InitCodeMethodVisitor); - // InitImageMethodVisitor visits all methods, resolves them (failing if one cannot be resolved, - // which is a bad sign for a working boot image), and then install entrypoints. - // In case of VerifyAtRuntime, there won't be code, and we do not want to resolve the methods - // (this has been skipped in the driver for performance). - if (compiler_driver_->IsImage() && !compiler_driver_->GetCompilerOptions().VerifyAtRuntime()) { + if (compiler_driver_->IsImage()) { VISIT(InitImageMethodVisitor); } diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index 62f5b9aa52..960f4d9b7c 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -14,8 +14,11 @@ * limitations under the License. */ -#include "base/arena_containers.h" #include "bounds_check_elimination.h" + +#include <limits> + +#include "base/arena_containers.h" #include "induction_var_range.h" #include "nodes.h" @@ -48,11 +51,11 @@ class ValueBound : public ValueObject { if (right == 0) { return false; } - if ((right > 0) && (left <= INT_MAX - right)) { + if ((right > 0) && (left <= (std::numeric_limits<int32_t>::max() - right))) { // No overflow. return false; } - if ((right < 0) && (left >= INT_MIN - right)) { + if ((right < 0) && (left >= (std::numeric_limits<int32_t>::min() - right))) { // No underflow. return false; } @@ -120,8 +123,8 @@ class ValueBound : public ValueObject { return instruction_ == nullptr; } - static ValueBound Min() { return ValueBound(nullptr, INT_MIN); } - static ValueBound Max() { return ValueBound(nullptr, INT_MAX); } + static ValueBound Min() { return ValueBound(nullptr, std::numeric_limits<int32_t>::min()); } + static ValueBound Max() { return ValueBound(nullptr, std::numeric_limits<int32_t>::max()); } bool Equals(ValueBound bound) const { return instruction_ == bound.instruction_ && constant_ == bound.constant_; @@ -213,7 +216,7 @@ class ValueBound : public ValueObject { int32_t new_constant; if (c > 0) { - if (constant_ > INT_MAX - c) { + if (constant_ > (std::numeric_limits<int32_t>::max() - c)) { *overflow = true; return Max(); } @@ -227,7 +230,7 @@ class ValueBound : public ValueObject { *overflow = true; return Max(); } else { - if (constant_ < INT_MIN - c) { + if (constant_ < (std::numeric_limits<int32_t>::min() - c)) { *underflow = true; return Min(); } @@ -256,8 +259,8 @@ class ArrayAccessInsideLoopFinder : public ValueObject { explicit ArrayAccessInsideLoopFinder(HInstruction* induction_variable) : induction_variable_(induction_variable), found_array_length_(nullptr), - offset_low_(INT_MAX), - offset_high_(INT_MIN) { + offset_low_(std::numeric_limits<int32_t>::max()), + offset_high_(std::numeric_limits<int32_t>::min()) { Run(); } @@ -407,7 +410,7 @@ class ArrayAccessInsideLoopFinder : public ValueObject { * of an existing value range, NewArray or a loop phi corresponding to an * incrementing/decrementing array index (MonotonicValueRange). */ -class ValueRange : public ArenaObject<kArenaAllocMisc> { +class ValueRange : public ArenaObject<kArenaAllocBoundsCheckElimination> { public: ValueRange(ArenaAllocator* allocator, ValueBound lower, ValueBound upper) : allocator_(allocator), lower_(lower), upper_(upper) {} @@ -492,7 +495,7 @@ class MonotonicValueRange : public ValueRange { HInstruction* initial, int32_t increment, ValueBound bound) - // To be conservative, give it full range [INT_MIN, INT_MAX] in case it's + // To be conservative, give it full range [Min(), Max()] in case it's // used as a regular value range, due to possible overflow/underflow. : ValueRange(allocator, ValueBound::Min(), ValueBound::Max()), induction_variable_(induction_variable), @@ -554,19 +557,19 @@ class MonotonicValueRange : public ValueRange { if (increment_ > 0) { // Monotonically increasing. ValueBound lower = ValueBound::NarrowLowerBound(bound_, range->GetLower()); - if (!lower.IsConstant() || lower.GetConstant() == INT_MIN) { + if (!lower.IsConstant() || lower.GetConstant() == std::numeric_limits<int32_t>::min()) { // Lower bound isn't useful. Leave it to deoptimization. return this; } - // We currently conservatively assume max array length is INT_MAX. If we can - // make assumptions about the max array length, e.g. due to the max heap size, + // We currently conservatively assume max array length is Max(). + // If we can make assumptions about the max array length, e.g. due to the max heap size, // divided by the element size (such as 4 bytes for each integer array), we can // lower this number and rule out some possible overflows. - int32_t max_array_len = INT_MAX; + int32_t max_array_len = std::numeric_limits<int32_t>::max(); // max possible integer value of range's upper value. - int32_t upper = INT_MAX; + int32_t upper = std::numeric_limits<int32_t>::max(); // Try to lower upper. ValueBound upper_bound = range->GetUpper(); if (upper_bound.IsConstant()) { @@ -593,7 +596,7 @@ class MonotonicValueRange : public ValueRange { ((int64_t)upper - (int64_t)initial_constant) / increment_ * increment_; } } - if (last_num_in_sequence <= INT_MAX - increment_) { + if (last_num_in_sequence <= (std::numeric_limits<int32_t>::max() - increment_)) { // No overflow. The sequence will be stopped by the upper bound test as expected. return new (GetAllocator()) ValueRange(GetAllocator(), lower, range->GetUpper()); } @@ -604,7 +607,7 @@ class MonotonicValueRange : public ValueRange { DCHECK_NE(increment_, 0); // Monotonically decreasing. ValueBound upper = ValueBound::NarrowUpperBound(bound_, range->GetUpper()); - if ((!upper.IsConstant() || upper.GetConstant() == INT_MAX) && + if ((!upper.IsConstant() || upper.GetConstant() == std::numeric_limits<int32_t>::max()) && !upper.IsRelatedToArrayLength()) { // Upper bound isn't useful. Leave it to deoptimization. return this; @@ -614,7 +617,7 @@ class MonotonicValueRange : public ValueRange { // for common cases. if (range->GetLower().IsConstant()) { int32_t constant = range->GetLower().GetConstant(); - if (constant >= INT_MIN - increment_) { + if (constant >= (std::numeric_limits<int32_t>::min() - increment_)) { return new (GetAllocator()) ValueRange(GetAllocator(), range->GetLower(), upper); } } @@ -1099,7 +1102,8 @@ class BCEVisitor : public HGraphVisitor { // Very large constant index is considered as an anomaly. This is a threshold // beyond which we don't bother to apply the deoptimization technique since // it's likely some AIOOBE will be thrown. - static constexpr int32_t kMaxConstantForAddingDeoptimize = INT_MAX - 1024 * 1024; + static constexpr int32_t kMaxConstantForAddingDeoptimize = + std::numeric_limits<int32_t>::max() - 1024 * 1024; // Added blocks for loop body entry test. bool IsAddedBlock(HBasicBlock* block) const { @@ -1108,7 +1112,14 @@ class BCEVisitor : public HGraphVisitor { BCEVisitor(HGraph* graph, HInductionVarAnalysis* induction_analysis) : HGraphVisitor(graph), - maps_(graph->GetBlocks().size()), + maps_(graph->GetBlocks().size(), + ArenaSafeMap<int, ValueRange*>( + std::less<int>(), + graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)), + graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)), + first_constant_index_bounds_check_map_( + std::less<int>(), + graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)), need_to_revisit_block_(false), initial_block_size_(graph->GetBlocks().size()), induction_range_(induction_analysis) {} @@ -1133,14 +1144,9 @@ class BCEVisitor : public HGraphVisitor { // Added blocks don't keep value ranges. return nullptr; } - int block_id = basic_block->GetBlockId(); - if (maps_.at(block_id) == nullptr) { - std::unique_ptr<ArenaSafeMap<int, ValueRange*>> map( - new ArenaSafeMap<int, ValueRange*>( - std::less<int>(), GetGraph()->GetArena()->Adapter())); - maps_.at(block_id) = std::move(map); - } - return maps_.at(block_id).get(); + uint32_t block_id = basic_block->GetBlockId(); + DCHECK_LT(block_id, maps_.size()); + return &maps_[block_id]; } // Traverse up the dominator tree to look for value range info. @@ -1165,8 +1171,8 @@ class BCEVisitor : public HGraphVisitor { ValueRange* LookupInductionRange(HInstruction* context, HInstruction* instruction) { InductionVarRange::Value v1 = induction_range_.GetMinInduction(context, instruction); InductionVarRange::Value v2 = induction_range_.GetMaxInduction(context, instruction); - if ((v1.a_constant == 0 || v1.a_constant == 1) && v1.b_constant != INT_MIN && - (v2.a_constant == 0 || v2.a_constant == 1) && v2.b_constant != INT_MAX) { + if (v1.is_known && (v1.a_constant == 0 || v1.a_constant == 1) && + v2.is_known && (v2.a_constant == 0 || v2.a_constant == 1)) { DCHECK(v1.a_constant == 1 || v1.instruction == nullptr); DCHECK(v2.a_constant == 1 || v2.instruction == nullptr); ValueBound low = ValueBound(v1.instruction, v1.b_constant); @@ -1467,8 +1473,8 @@ class BCEVisitor : public HGraphVisitor { // Once we have an array access like 'array[5] = 1', we record array.length >= 6. // We currently don't do it for non-constant index since a valid array[i] can't prove // a valid array[i-1] yet due to the lower bound side. - if (constant == INT_MAX) { - // INT_MAX as an index will definitely throw AIOOBE. + if (constant == std::numeric_limits<int32_t>::max()) { + // Max() as an index will definitely throw AIOOBE. return; } ValueBound lower = ValueBound(nullptr, constant + 1); @@ -1690,8 +1696,8 @@ class BCEVisitor : public HGraphVisitor { // The value of left input of instruction equals (left + c). // (array_length + 1) or smaller divided by two or more - // always generate a value in [INT_MIN, array_length]. - // This is true even if array_length is INT_MAX. + // always generate a value in [Min(), array_length]. + // This is true even if array_length is Max(). if (left->IsArrayLength() && c <= 1) { if (instruction->IsUShr() && c < 0) { // Make sure for unsigned shift, left side is not negative. @@ -1701,7 +1707,7 @@ class BCEVisitor : public HGraphVisitor { } ValueRange* range = new (GetGraph()->GetArena()) ValueRange( GetGraph()->GetArena(), - ValueBound(nullptr, INT_MIN), + ValueBound(nullptr, std::numeric_limits<int32_t>::min()), ValueBound(left, 0)); GetValueRangeMap(instruction->GetBlock())->Overwrite(instruction->GetId(), range); } @@ -1811,7 +1817,7 @@ class BCEVisitor : public HGraphVisitor { continue; } HIntConstant* lower_bound_const_instr = nullptr; - int32_t lower_bound_const = INT_MIN; + int32_t lower_bound_const = std::numeric_limits<int32_t>::min(); size_t counter = 0; // Count the constant indexing for which bounds checks haven't // been removed yet. @@ -1838,11 +1844,11 @@ class BCEVisitor : public HGraphVisitor { } } - std::vector<std::unique_ptr<ArenaSafeMap<int, ValueRange*>>> maps_; + ArenaVector<ArenaSafeMap<int, ValueRange*>> maps_; // Map an HArrayLength instruction's id to the first HBoundsCheck instruction in // a block that checks a constant index against that HArrayLength. - SafeMap<int, HBoundsCheck*> first_constant_index_bounds_check_map_; + ArenaSafeMap<int, HBoundsCheck*> first_constant_index_bounds_check_map_; // For the block, there is at least one HArrayLength instruction for which there // is more than one bounds check instruction with constant indexing. And it's diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index 274a2a699f..cb36f62235 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -140,11 +140,11 @@ class SwitchTable : public ValueObject { void HGraphBuilder::InitializeLocals(uint16_t count) { graph_->SetNumberOfVRegs(count); - locals_.SetSize(count); + locals_.resize(count); for (int i = 0; i < count; i++) { HLocal* local = new (arena_) HLocal(i); entry_block_->AddInstruction(local); - locals_.Put(i, local); + locals_[i] = local; } } @@ -156,7 +156,7 @@ void HGraphBuilder::InitializeParameters(uint16_t number_of_parameters) { graph_->SetNumberOfInVRegs(number_of_parameters); const char* shorty = dex_compilation_unit_->GetShorty(); - int locals_index = locals_.Size() - number_of_parameters; + int locals_index = locals_.size() - number_of_parameters; int parameter_index = 0; if (!dex_compilation_unit_->IsStatic()) { @@ -262,22 +262,6 @@ bool HGraphBuilder::SkipCompilation(const DexFile::CodeItem& code_item, return false; } -static const DexFile::TryItem* GetTryItem(HBasicBlock* block, - const DexFile::CodeItem& code_item, - const ArenaBitVector& can_block_throw) { - DCHECK(!block->IsSingleTryBoundary()); - - // Block does not contain throwing instructions. Even if it is covered by - // a TryItem, we will consider it not in a try block. - if (!can_block_throw.IsBitSet(block->GetBlockId())) { - return nullptr; - } - - // Instructions in the block may throw. Find a TryItem covering this block. - int32_t try_item_idx = DexFile::FindTryItem(code_item, block->GetDexPc()); - return (try_item_idx == -1) ? nullptr : DexFile::GetTryItems(code_item, try_item_idx); -} - void HGraphBuilder::CreateBlocksForTryCatch(const DexFile::CodeItem& code_item) { if (code_item.tries_size_ == 0) { return; @@ -316,18 +300,18 @@ void HGraphBuilder::CreateBlocksForTryCatch(const DexFile::CodeItem& code_item) } } -void HGraphBuilder::SplitTryBoundaryEdge(HBasicBlock* predecessor, - HBasicBlock* successor, - HTryBoundary::BoundaryKind kind, - const DexFile::CodeItem& code_item, - const DexFile::TryItem& try_item) { - // Split the edge with a single TryBoundary instruction. - HTryBoundary* try_boundary = new (arena_) HTryBoundary(kind, successor->GetDexPc()); - HBasicBlock* try_entry_block = graph_->SplitEdge(predecessor, successor); - try_entry_block->AddInstruction(try_boundary); - - // Link the TryBoundary to the handlers of `try_item`. - for (CatchHandlerIterator it(code_item, try_item); it.HasNext(); it.Next()) { +// Returns the TryItem stored for `block` or nullptr if there is no info for it. +static const DexFile::TryItem* GetTryItem( + HBasicBlock* block, + const ArenaSafeMap<uint32_t, const DexFile::TryItem*>& try_block_info) { + auto iterator = try_block_info.find(block->GetBlockId()); + return (iterator == try_block_info.end()) ? nullptr : iterator->second; +} + +void HGraphBuilder::LinkToCatchBlocks(HTryBoundary* try_boundary, + const DexFile::CodeItem& code_item, + const DexFile::TryItem* try_item) { + for (CatchHandlerIterator it(code_item, *try_item); it.HasNext(); it.Next()) { try_boundary->AddExceptionHandler(FindBlockStartingAt(it.GetHandlerAddress())); } } @@ -337,132 +321,103 @@ void HGraphBuilder::InsertTryBoundaryBlocks(const DexFile::CodeItem& code_item) return; } - // Bit vector stores information on which blocks contain throwing instructions. - // Must be expandable because catch blocks may be split into two. - ArenaBitVector can_block_throw(arena_, graph_->GetBlocks().size(), /* expandable */ true); + // Keep a map of all try blocks and their respective TryItems. We do not use + // the block's pointer but rather its id to ensure deterministic iteration. + ArenaSafeMap<uint32_t, const DexFile::TryItem*> try_block_info( + std::less<uint32_t>(), arena_->Adapter(kArenaAllocGraphBuilder)); + + // Obtain TryItem information for blocks with throwing instructions, and split + // blocks which are both try & catch to simplify the graph. + // NOTE: We are appending new blocks inside the loop, so we need to use index + // because iterators can be invalidated. We remember the initial size to avoid + // iterating over the new blocks which cannot throw. + for (size_t i = 0, e = graph_->GetBlocks().size(); i < e; ++i) { + HBasicBlock* block = graph_->GetBlocks()[i]; + + // Do not bother creating exceptional edges for try blocks which have no + // throwing instructions. In that case we simply assume that the block is + // not covered by a TryItem. This prevents us from creating a throw-catch + // loop for synchronized blocks. + if (block->HasThrowingInstructions()) { + // Try to find a TryItem covering the block. + DCHECK_NE(block->GetDexPc(), kNoDexPc) << "Block must have a dec_pc to find its TryItem."; + const int32_t try_item_idx = DexFile::FindTryItem(code_item, block->GetDexPc()); + if (try_item_idx != -1) { + // Block throwing and in a TryItem. Store the try block information. + HBasicBlock* throwing_block = block; + if (block->IsCatchBlock()) { + // Simplify blocks which are both try and catch, otherwise we would + // need a strategy for splitting exceptional edges. We split the block + // after the move-exception (if present) and mark the first part not + // throwing. The normal-flow edge between them will be split later. + HInstruction* first_insn = block->GetFirstInstruction(); + if (first_insn->IsLoadException()) { + // Catch block starts with a LoadException. Split the block after + // the StoreLocal and ClearException which must come after the load. + DCHECK(first_insn->GetNext()->IsStoreLocal()); + DCHECK(first_insn->GetNext()->GetNext()->IsClearException()); + throwing_block = block->SplitBefore(first_insn->GetNext()->GetNext()->GetNext()); + } else { + // Catch block does not load the exception. Split at the beginning + // to create an empty catch block. + throwing_block = block->SplitBefore(first_insn); + } + } - // Scan blocks and mark those which contain throwing instructions. - // NOTE: We're appending new blocks inside the loop, so we need to use index because iterators - // can be invalidated. We remember the initial size to avoid iterating over the new blocks. - for (size_t block_id = 0u, end = graph_->GetBlocks().size(); block_id != end; ++block_id) { - HBasicBlock* block = graph_->GetBlocks()[block_id]; - bool can_throw = false; - for (HInstructionIterator insn(block->GetInstructions()); !insn.Done(); insn.Advance()) { - if (insn.Current()->CanThrow()) { - can_throw = true; - break; + try_block_info.Put(throwing_block->GetBlockId(), + DexFile::GetTryItems(code_item, try_item_idx)); } } + } - if (can_throw) { - if (block->IsCatchBlock()) { - // Catch blocks are always considered an entry point into the TryItem in - // order to avoid splitting exceptional edges. We split the block after - // the move-exception (if present) and mark the first part non-throwing. - // Later on, a TryBoundary will be inserted between the two blocks. - HInstruction* first_insn = block->GetFirstInstruction(); - if (first_insn->IsLoadException()) { - // Catch block starts with a LoadException. Split the block after the - // StoreLocal and ClearException which must come after the load. - DCHECK(first_insn->GetNext()->IsStoreLocal()); - DCHECK(first_insn->GetNext()->GetNext()->IsClearException()); - block = block->SplitBefore(first_insn->GetNext()->GetNext()->GetNext()); - } else { - // Catch block does not load the exception. Split at the beginning to - // create an empty catch block. - block = block->SplitBefore(first_insn); - } + // Do a pass over the try blocks and insert entering TryBoundaries where at + // least one predecessor is not covered by the same TryItem as the try block. + // We do not split each edge separately, but rather create one boundary block + // that all predecessors are relinked to. This preserves loop headers (b/23895756). + for (auto entry : try_block_info) { + HBasicBlock* try_block = graph_->GetBlock(entry.first); + for (HBasicBlock* predecessor : try_block->GetPredecessors()) { + if (GetTryItem(predecessor, try_block_info) != entry.second) { + // Found a predecessor not covered by the same TryItem. Insert entering + // boundary block. + HTryBoundary* try_entry = + new (arena_) HTryBoundary(HTryBoundary::kEntry, try_block->GetDexPc()); + try_block->CreateImmediateDominator()->AddInstruction(try_entry); + LinkToCatchBlocks(try_entry, code_item, entry.second); + break; } - can_block_throw.SetBit(block->GetBlockId()); - } - } - - // Iterate over all blocks, find those covered by some TryItem and: - // (a) split edges which enter/exit the try range, - // (b) create TryBoundary instructions in the new blocks, - // (c) link the new blocks to corresponding exception handlers. - // We cannot iterate only over blocks in `branch_targets_` because switch-case - // blocks share the same dex_pc. - // NOTE: We're appending new blocks inside the loop, so we need to use index because iterators - // can be invalidated. We remember the initial size to avoid iterating over the new blocks. - for (size_t block_id = 0u, end = graph_->GetBlocks().size(); block_id != end; ++block_id) { - HBasicBlock* try_block = graph_->GetBlocks()[block_id]; - // TryBoundary blocks are added at the end of the list and not iterated over. - DCHECK(!try_block->IsSingleTryBoundary()); - - // Find the TryItem for this block. - const DexFile::TryItem* try_item = GetTryItem(try_block, code_item, can_block_throw); - if (try_item == nullptr) { - continue; - } - - // Catch blocks were split earlier and cannot throw. - DCHECK(!try_block->IsCatchBlock()); - - // Find predecessors which are not covered by the same TryItem range. Such - // edges enter the try block and will have a TryBoundary inserted. - for (size_t i = 0; i < try_block->GetPredecessors().size(); ++i) { - HBasicBlock* predecessor = try_block->GetPredecessor(i); - if (predecessor->IsSingleTryBoundary()) { - // The edge was already split because of an exit from a neighbouring - // TryItem. We split it again and insert an entry point. - if (kIsDebugBuild) { - HTryBoundary* last_insn = predecessor->GetLastInstruction()->AsTryBoundary(); - const DexFile::TryItem* predecessor_try_item = - GetTryItem(predecessor->GetSinglePredecessor(), code_item, can_block_throw); - DCHECK(!last_insn->IsEntry()); - DCHECK_EQ(last_insn->GetNormalFlowSuccessor(), try_block); - DCHECK(try_block->IsFirstIndexOfPredecessor(predecessor, i)); - DCHECK_NE(try_item, predecessor_try_item); - } - } else if (GetTryItem(predecessor, code_item, can_block_throw) != try_item) { - // This is an entry point into the TryItem and the edge has not been - // split yet. That means that `predecessor` is not in a TryItem, or - // it is in a different TryItem and we happened to iterate over this - // block first. We split the edge and insert an entry point. - } else { - // Not an edge on the boundary of the try block. + } + } + + // Do a second pass over the try blocks and insert exit TryBoundaries where + // the successor is not in the same TryItem. + for (auto entry : try_block_info) { + HBasicBlock* try_block = graph_->GetBlock(entry.first); + // NOTE: Do not use iterators because SplitEdge would invalidate them. + for (size_t i = 0, e = try_block->GetSuccessors().size(); i < e; ++i) { + HBasicBlock* successor = try_block->GetSuccessor(i); + + // If the successor is a try block, all of its predecessors must be + // covered by the same TryItem. Otherwise the previous pass would have + // created a non-throwing boundary block. + if (GetTryItem(successor, try_block_info) != nullptr) { + DCHECK_EQ(entry.second, GetTryItem(successor, try_block_info)); continue; } - SplitTryBoundaryEdge(predecessor, try_block, HTryBoundary::kEntry, code_item, *try_item); - } - - // Find successors which are not covered by the same TryItem range. Such - // edges exit the try block and will have a TryBoundary inserted. - for (HBasicBlock* successor : try_block->GetSuccessors()) { - if (successor->IsCatchBlock()) { - // A catch block is always considered an entry point into its TryItem. - // We therefore assume this is an exit point, regardless of whether - // the catch block is in a different TryItem or not. - } else if (successor->IsSingleTryBoundary()) { - // The edge was already split because of an entry into a neighbouring - // TryItem. We split it again and insert an exit. - if (kIsDebugBuild) { - HTryBoundary* last_insn = successor->GetLastInstruction()->AsTryBoundary(); - const DexFile::TryItem* successor_try_item = - GetTryItem(last_insn->GetNormalFlowSuccessor(), code_item, can_block_throw); - DCHECK_EQ(try_block, successor->GetSinglePredecessor()); - DCHECK(last_insn->IsEntry()); - DCHECK_NE(try_item, successor_try_item); - } - } else if (GetTryItem(successor, code_item, can_block_throw) != try_item) { - // This is an exit out of the TryItem and the edge has not been split - // yet. That means that either `successor` is not in a TryItem, or it - // is in a different TryItem and we happened to iterate over this - // block first. We split the edge and insert an exit. - HInstruction* last_instruction = try_block->GetLastInstruction(); - if (last_instruction->IsReturn() || last_instruction->IsReturnVoid()) { - DCHECK_EQ(successor, exit_block_); - // Control flow exits the try block with a Return(Void). Because - // splitting the edge would invalidate the invariant that Return - // always jumps to Exit, we move the Return outside the try block. - successor = try_block->SplitBefore(last_instruction); - } - } else { - // Not an edge on the boundary of the try block. - continue; + + // Preserve the invariant that Return(Void) always jumps to Exit by moving + // it outside the try block if necessary. + HInstruction* last_instruction = try_block->GetLastInstruction(); + if (last_instruction->IsReturn() || last_instruction->IsReturnVoid()) { + DCHECK_EQ(successor, exit_block_); + successor = try_block->SplitBefore(last_instruction); } - SplitTryBoundaryEdge(try_block, successor, HTryBoundary::kExit, code_item, *try_item); + + // Insert TryBoundary and link to catch blocks. + HTryBoundary* try_exit = + new (arena_) HTryBoundary(HTryBoundary::kExit, successor->GetDexPc()); + graph_->SplitEdge(try_block, successor)->AddInstruction(try_exit); + LinkToCatchBlocks(try_exit, code_item, entry.second); } } } @@ -554,11 +509,11 @@ void HGraphBuilder::MaybeUpdateCurrentBlock(size_t dex_pc) { bool HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr, const uint16_t* code_end, size_t* number_of_branches) { - branch_targets_.SetSize(code_end - code_ptr); + branch_targets_.resize(code_end - code_ptr, nullptr); // Create the first block for the dex instructions, single successor of the entry block. HBasicBlock* block = new (arena_) HBasicBlock(graph_, 0); - branch_targets_.Put(0, block); + branch_targets_[0] = block; entry_block_->AddSuccessor(block); // Iterate over all instructions and find branching instructions. Create blocks for @@ -602,7 +557,7 @@ bool HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr, // Create a block for the switch-case logic. The block gets the dex_pc // of the SWITCH instruction because it is part of its semantics. block = new (arena_) HBasicBlock(graph_, dex_pc); - branch_targets_.Put(table.GetDexPcForIndex(i), block); + branch_targets_[table.GetDexPcForIndex(i)] = block; } // Fall-through. Add a block if there is more code afterwards. @@ -626,15 +581,15 @@ bool HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr, HBasicBlock* HGraphBuilder::FindBlockStartingAt(int32_t dex_pc) const { DCHECK_GE(dex_pc, 0); - DCHECK_LT(static_cast<size_t>(dex_pc), branch_targets_.Size()); - return branch_targets_.Get(dex_pc); + DCHECK_LT(static_cast<size_t>(dex_pc), branch_targets_.size()); + return branch_targets_[dex_pc]; } HBasicBlock* HGraphBuilder::FindOrCreateBlockStartingAt(int32_t dex_pc) { HBasicBlock* block = FindBlockStartingAt(dex_pc); if (block == nullptr) { block = new (arena_) HBasicBlock(graph_, dex_pc); - branch_targets_.Put(dex_pc, block); + branch_targets_[dex_pc] = block; } return block; } @@ -1685,6 +1640,34 @@ bool HGraphBuilder::NeedsAccessCheck(uint32_t type_index) const { dex_compilation_unit_->GetDexMethodIndex(), *dex_file_, type_index); } +void HGraphBuilder::BuildSwitchJumpTable(const SwitchTable& table, + const Instruction& instruction, + HInstruction* value, + uint32_t dex_pc) { + // Add the successor blocks to the current block. + uint16_t num_entries = table.GetNumEntries(); + for (size_t i = 1; i <= num_entries; i++) { + int32_t target_offset = table.GetEntryAt(i); + HBasicBlock* case_target = FindBlockStartingAt(dex_pc + target_offset); + DCHECK(case_target != nullptr); + + // Add the target block as a successor. + current_block_->AddSuccessor(case_target); + } + + // Add the default target block as the last successor. + HBasicBlock* default_target = FindBlockStartingAt(dex_pc + instruction.SizeInCodeUnits()); + DCHECK(default_target != nullptr); + current_block_->AddSuccessor(default_target); + + // Now add the Switch instruction. + int32_t starting_key = table.GetEntryAt(0); + current_block_->AddInstruction( + new (arena_) HPackedSwitch(starting_key, num_entries, value, dex_pc)); + // This block ends with control flow. + current_block_ = nullptr; +} + void HGraphBuilder::BuildPackedSwitch(const Instruction& instruction, uint32_t dex_pc) { // Verifier guarantees that the payload for PackedSwitch contains: // (a) number of entries (may be zero) @@ -1695,18 +1678,30 @@ void HGraphBuilder::BuildPackedSwitch(const Instruction& instruction, uint32_t d // Value to test against. HInstruction* value = LoadLocal(instruction.VRegA(), Primitive::kPrimInt, dex_pc); + // Starting key value. + int32_t starting_key = table.GetEntryAt(0); + // Retrieve number of entries. uint16_t num_entries = table.GetNumEntries(); if (num_entries == 0) { return; } - // Chained cmp-and-branch, starting from starting_key. - int32_t starting_key = table.GetEntryAt(0); - - for (size_t i = 1; i <= num_entries; i++) { - BuildSwitchCaseHelper(instruction, i, i == num_entries, table, value, starting_key + i - 1, - table.GetEntryAt(i), dex_pc); + // Don't use a packed switch if there are very few entries. + if (num_entries > kSmallSwitchThreshold) { + BuildSwitchJumpTable(table, instruction, value, dex_pc); + } else { + // Chained cmp-and-branch, starting from starting_key. + for (size_t i = 1; i <= num_entries; i++) { + BuildSwitchCaseHelper(instruction, + i, + i == num_entries, + table, + value, + starting_key + i - 1, + table.GetEntryAt(i), + dex_pc); + } } } @@ -2840,18 +2835,19 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 return true; } // NOLINT(readability/fn_size) -HLocal* HGraphBuilder::GetLocalAt(int register_index) const { - return locals_.Get(register_index); +HLocal* HGraphBuilder::GetLocalAt(uint32_t register_index) const { + DCHECK_LT(register_index, locals_.size()); + return locals_[register_index]; } -void HGraphBuilder::UpdateLocal(int register_index, +void HGraphBuilder::UpdateLocal(uint32_t register_index, HInstruction* instruction, uint32_t dex_pc) const { HLocal* local = GetLocalAt(register_index); current_block_->AddInstruction(new (arena_) HStoreLocal(local, instruction, dex_pc)); } -HInstruction* HGraphBuilder::LoadLocal(int register_index, +HInstruction* HGraphBuilder::LoadLocal(uint32_t register_index, Primitive::Type type, uint32_t dex_pc) const { HLocal* local = GetLocalAt(register_index); diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h index ae452f2589..4c8e3d0442 100644 --- a/compiler/optimizing/builder.h +++ b/compiler/optimizing/builder.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_BUILDER_H_ #define ART_COMPILER_OPTIMIZING_BUILDER_H_ +#include "base/arena_containers.h" #include "base/arena_object.h" #include "dex_file.h" #include "dex_file-inl.h" @@ -24,7 +25,6 @@ #include "driver/dex_compilation_unit.h" #include "optimizing_compiler_stats.h" #include "primitive.h" -#include "utils/growable_array.h" #include "nodes.h" namespace art { @@ -43,8 +43,8 @@ class HGraphBuilder : public ValueObject { const uint8_t* interpreter_metadata, Handle<mirror::DexCache> dex_cache) : arena_(graph->GetArena()), - branch_targets_(graph->GetArena(), 0), - locals_(graph->GetArena(), 0), + branch_targets_(graph->GetArena()->Adapter(kArenaAllocGraphBuilder)), + locals_(graph->GetArena()->Adapter(kArenaAllocGraphBuilder)), entry_block_(nullptr), exit_block_(nullptr), current_block_(nullptr), @@ -64,8 +64,8 @@ class HGraphBuilder : public ValueObject { // Only for unit testing. HGraphBuilder(HGraph* graph, Primitive::Type return_type = Primitive::kPrimInt) : arena_(graph->GetArena()), - branch_targets_(graph->GetArena(), 0), - locals_(graph->GetArena(), 0), + branch_targets_(graph->GetArena()->Adapter(kArenaAllocGraphBuilder)), + locals_(graph->GetArena()->Adapter(kArenaAllocGraphBuilder)), entry_block_(nullptr), exit_block_(nullptr), current_block_(nullptr), @@ -90,6 +90,9 @@ class HGraphBuilder : public ValueObject { static constexpr const char* kBuilderPassName = "builder"; + // The number of entries in a packed switch before we use a jump table. + static constexpr uint16_t kSmallSwitchThreshold = 5; + private: // Analyzes the dex instruction and adds HInstruction to the graph // to execute that instruction. Returns whether the instruction can @@ -118,21 +121,21 @@ class HGraphBuilder : public ValueObject { // instructions and links them to the corresponding catch blocks. void InsertTryBoundaryBlocks(const DexFile::CodeItem& code_item); - // Splits a single edge, inserting a TryBoundary of given `kind` and linking - // it to exception handlers of `try_item`. - void SplitTryBoundaryEdge(HBasicBlock* predecessor, - HBasicBlock* successor, - HTryBoundary::BoundaryKind kind, - const DexFile::CodeItem& code_item, - const DexFile::TryItem& try_item); + // Iterates over the exception handlers of `try_item`, finds the corresponding + // catch blocks and makes them successors of `try_boundary`. The order of + // successors matches the order in which runtime exception delivery searches + // for a handler. + void LinkToCatchBlocks(HTryBoundary* try_boundary, + const DexFile::CodeItem& code_item, + const DexFile::TryItem* try_item); bool CanDecodeQuickenedInfo() const; uint16_t LookupQuickenedInfo(uint32_t dex_pc); void InitializeLocals(uint16_t count); - HLocal* GetLocalAt(int register_index) const; - void UpdateLocal(int register_index, HInstruction* instruction, uint32_t dex_pc) const; - HInstruction* LoadLocal(int register_index, Primitive::Type type, uint32_t dex_pc) const; + HLocal* GetLocalAt(uint32_t register_index) const; + void UpdateLocal(uint32_t register_index, HInstruction* instruction, uint32_t dex_pc) const; + HInstruction* LoadLocal(uint32_t register_index, Primitive::Type type, uint32_t dex_pc) const; void PotentiallyAddSuspendCheck(HBasicBlock* target, uint32_t dex_pc); void InitializeParameters(uint16_t number_of_parameters); bool NeedsAccessCheck(uint32_t type_index) const; @@ -239,6 +242,12 @@ class HGraphBuilder : public ValueObject { // Builds an instruction sequence for a packed switch statement. void BuildPackedSwitch(const Instruction& instruction, uint32_t dex_pc); + // Build a switch instruction from a packed switch statement. + void BuildSwitchJumpTable(const SwitchTable& table, + const Instruction& instruction, + HInstruction* value, + uint32_t dex_pc); + // Builds an instruction sequence for a sparse switch statement. void BuildSparseSwitch(const Instruction& instruction, uint32_t dex_pc); @@ -304,9 +313,9 @@ class HGraphBuilder : public ValueObject { // A list of the size of the dex code holding block information for // the method. If an entry contains a block, then the dex instruction // starting at that entry is the first instruction of a new block. - GrowableArray<HBasicBlock*> branch_targets_; + ArenaVector<HBasicBlock*> branch_targets_; - GrowableArray<HLocal*> locals_; + ArenaVector<HLocal*> locals_; HBasicBlock* entry_block_; HBasicBlock* exit_block_; diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 3c6a41df34..be05691741 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -203,13 +203,13 @@ class DisassemblyScope { void CodeGenerator::GenerateSlowPaths() { size_t code_start = 0; - for (size_t i = 0, e = slow_paths_.Size(); i < e; ++i) { + for (SlowPathCode* slow_path : slow_paths_) { if (disasm_info_ != nullptr) { code_start = GetAssembler()->CodeSize(); } - slow_paths_.Get(i)->EmitNativeCode(this); + slow_path->EmitNativeCode(this); if (disasm_info_ != nullptr) { - disasm_info_->AddSlowPathInterval(slow_paths_.Get(i), code_start, GetAssembler()->CodeSize()); + disasm_info_->AddSlowPathInterval(slow_path, code_start, GetAssembler()->CodeSize()); } } } diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index a1c6db0a2c..5da0e59187 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -261,7 +261,7 @@ class CodeGenerator { bool IsImplicitNullCheckAllowed(HNullCheck* null_check) const; void AddSlowPath(SlowPathCode* slow_path) { - slow_paths_.Add(slow_path); + slow_paths_.push_back(slow_path); } void SetSrcMap(DefaultSrcMap* src_map) { src_map_ = src_map; } @@ -425,9 +425,12 @@ class CodeGenerator { core_spill_mask_(0), fpu_spill_mask_(0), first_register_slot_in_slow_path_(0), - blocked_core_registers_(graph->GetArena()->AllocArray<bool>(number_of_core_registers)), - blocked_fpu_registers_(graph->GetArena()->AllocArray<bool>(number_of_fpu_registers)), - blocked_register_pairs_(graph->GetArena()->AllocArray<bool>(number_of_register_pairs)), + blocked_core_registers_(graph->GetArena()->AllocArray<bool>(number_of_core_registers, + kArenaAllocCodeGenerator)), + blocked_fpu_registers_(graph->GetArena()->AllocArray<bool>(number_of_fpu_registers, + kArenaAllocCodeGenerator)), + blocked_register_pairs_(graph->GetArena()->AllocArray<bool>(number_of_register_pairs, + kArenaAllocCodeGenerator)), number_of_core_registers_(number_of_core_registers), number_of_fpu_registers_(number_of_fpu_registers), number_of_register_pairs_(number_of_register_pairs), @@ -441,10 +444,12 @@ class CodeGenerator { graph_(graph), compiler_options_(compiler_options), src_map_(nullptr), - slow_paths_(graph->GetArena(), 8), + slow_paths_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), current_block_index_(0), is_leaf_(true), - requires_current_method_(false) {} + requires_current_method_(false) { + slow_paths_.reserve(8); + } // Register allocation logic. void AllocateRegistersLocally(HInstruction* instruction) const; @@ -485,8 +490,20 @@ class CodeGenerator { return instruction_set == kX86 || instruction_set == kX86_64; } - // Arm64 has its own type for a label, so we need to templatize this method + // Arm64 has its own type for a label, so we need to templatize these methods // to share the logic. + + template <typename LabelType> + LabelType* CommonInitializeLabels() { + size_t size = GetGraph()->GetBlocks().size(); + LabelType* labels = GetGraph()->GetArena()->AllocArray<LabelType>(size, + kArenaAllocCodeGenerator); + for (size_t i = 0; i != size; ++i) { + new(labels + i) LabelType(); + } + return labels; + } + template <typename LabelType> LabelType* CommonGetLabelOf(LabelType* raw_pointer_to_labels_array, HBasicBlock* block) const { block = FirstNonEmptyBlock(block); @@ -539,7 +556,7 @@ class CodeGenerator { // Native to dex_pc map used for native debugging/profiling tools. DefaultSrcMap* src_map_; - GrowableArray<SlowPathCode*> slow_paths_; + ArenaVector<SlowPathCode*> slow_paths_; // The current block index in `block_order_` of the block // we are generating code for. diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index d431acfb53..c775e03717 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -428,19 +428,25 @@ CodeGeneratorARM::CodeGeneratorARM(HGraph* graph, kNumberOfRegisterPairs, ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves), arraysize(kCoreCalleeSaves)), - ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves), - arraysize(kFpuCalleeSaves)), + graph->IsDebuggable() + // If the graph is debuggable, we need to save the fpu registers ourselves, + // as the stubs do not do it. + ? 0 + : ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves), + arraysize(kFpuCalleeSaves)), compiler_options, stats), - block_labels_(graph->GetArena(), 0), + block_labels_(nullptr), location_builder_(graph, this), instruction_visitor_(graph, this), move_resolver_(graph->GetArena(), this), assembler_(), isa_features_(isa_features), - method_patches_(MethodReferenceComparator(), graph->GetArena()->Adapter()), - call_patches_(MethodReferenceComparator(), graph->GetArena()->Adapter()), - relative_call_patches_(graph->GetArena()->Adapter()) { + method_patches_(MethodReferenceComparator(), + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + call_patches_(MethodReferenceComparator(), + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { // Always save the LR register to mimic Quick. AddAllocatedRegister(Location::RegisterLocation(LR)); } @@ -459,8 +465,8 @@ void CodeGeneratorARM::Finalize(CodeAllocator* allocator) { for (HBasicBlock* block : *block_order_) { // Get the label directly from block_labels_ rather than through GetLabelOf() to avoid // FirstNonEmptyBlock() which could lead to adjusting a label more than once. - DCHECK_LT(static_cast<size_t>(block->GetBlockId()), block_labels_.Size()); - Label* block_label = &block_labels_.GetRawStorage()[block->GetBlockId()]; + DCHECK_LT(block->GetBlockId(), GetGraph()->GetBlocks().size()); + Label* block_label = &block_labels_[block->GetBlockId()]; DCHECK_EQ(block_label->IsBound(), !block->IsSingleJump()); if (block_label->IsBound()) { __ AdjustLabelPosition(block_label); @@ -4034,7 +4040,8 @@ ArmAssembler* ParallelMoveResolverARM::GetAssembler() const { } void ParallelMoveResolverARM::EmitMove(size_t index) { - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; Location source = move->GetSource(); Location destination = move->GetDestination(); @@ -4166,7 +4173,8 @@ void ParallelMoveResolverARM::Exchange(int mem1, int mem2) { } void ParallelMoveResolverARM::EmitSwap(size_t index) { - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; Location source = move->GetSource(); Location destination = move->GetDestination(); @@ -4477,7 +4485,11 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { break; } case TypeCheckKind::kArrayObjectCheck: { - // Just need to check that the object's class is a non primitive array. + // Do an exact check. + Label exact_check; + __ cmp(out, ShifterOperand(cls)); + __ b(&exact_check, EQ); + // Otherwise, we need to check that the object's class is a non primitive array. __ LoadFromOffset(kLoadWord, out, out, component_offset); __ MaybeUnpoisonHeapReference(out); // If `out` is null, we use it for the result, and jump to `done`. @@ -4485,6 +4497,7 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { __ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset); static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); __ CompareAndBranchIfNonZero(out, &zero); + __ Bind(&exact_check); __ LoadImmediate(out, 1); __ b(&done); break; @@ -4623,20 +4636,22 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { } case TypeCheckKind::kClassHierarchyCheck: { // Walk over the class hierarchy to find a match. - Label loop, success; + Label loop; __ Bind(&loop); __ cmp(temp, ShifterOperand(cls)); - __ b(&success, EQ); + __ b(&done, EQ); __ LoadFromOffset(kLoadWord, temp, temp, super_offset); __ MaybeUnpoisonHeapReference(temp); __ CompareAndBranchIfNonZero(temp, &loop); // Jump to the slow path to throw the exception. __ b(slow_path->GetEntryLabel()); - __ Bind(&success); break; } case TypeCheckKind::kArrayObjectCheck: { - // Just need to check that the object's class is a non primitive array. + // Do an exact check. + __ cmp(temp, ShifterOperand(cls)); + __ b(&done, EQ); + // Otherwise, we need to check that the object's class is a non primitive array. __ LoadFromOffset(kLoadWord, temp, temp, component_offset); __ MaybeUnpoisonHeapReference(temp); __ CompareAndBranchIfZero(temp, slow_path->GetEntryLabel()); @@ -4946,6 +4961,33 @@ void InstructionCodeGeneratorARM::VisitFakeString(HFakeString* instruction ATTRI // Will be generated at use site. } +// Simple implementation of packed switch - generate cascaded compare/jumps. +void LocationsBuilderARM::VisitPackedSwitch(HPackedSwitch* switch_instr) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); +} + +void InstructionCodeGeneratorARM::VisitPackedSwitch(HPackedSwitch* switch_instr) { + int32_t lower_bound = switch_instr->GetStartValue(); + int32_t num_entries = switch_instr->GetNumEntries(); + LocationSummary* locations = switch_instr->GetLocations(); + Register value_reg = locations->InAt(0).AsRegister<Register>(); + HBasicBlock* default_block = switch_instr->GetDefaultBlock(); + + // Create a series of compare/jumps. + const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); + for (int32_t i = 0; i < num_entries; i++) { + GenerateCompareWithImmediate(value_reg, lower_bound + i); + __ b(codegen_->GetLabelOf(successors.at(i)), EQ); + } + + // And the default for any other value. + if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) { + __ b(codegen_->GetLabelOf(default_block)); + } +} + void CodeGeneratorARM::MoveFromReturnRegister(Location trg, Primitive::Type type) { if (!trg.IsValid()) { DCHECK(type == Primitive::kPrimVoid); diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 1d98789213..111112e9b2 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -299,11 +299,11 @@ class CodeGeneratorARM : public CodeGenerator { void MarkGCCard(Register temp, Register card, Register object, Register value, bool can_be_null); Label* GetLabelOf(HBasicBlock* block) const { - return CommonGetLabelOf<Label>(block_labels_.GetRawStorage(), block); + return CommonGetLabelOf<Label>(block_labels_, block); } void Initialize() OVERRIDE { - block_labels_.SetSize(GetGraph()->GetBlocks().size()); + block_labels_ = CommonInitializeLabels<Label>(); } void Finalize(CodeAllocator* allocator) OVERRIDE; @@ -335,7 +335,7 @@ class CodeGeneratorARM : public CodeGenerator { Literal* DeduplicateMethodCodeLiteral(MethodReference target_method); // Labels for each block that will be compiled. - GrowableArray<Label> block_labels_; + Label* block_labels_; // Indexed by block id. Label frame_entry_label_; LocationsBuilderARM location_builder_; InstructionCodeGeneratorARM instruction_visitor_; diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 580e93e9c4..70327af878 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -119,8 +119,11 @@ static void SaveRestoreLiveRegistersHelper(CodeGenerator* codegen, CPURegList core_list = CPURegList(CPURegister::kRegister, kXRegSize, register_set->GetCoreRegisters() & (~callee_saved_core_registers.list())); - CPURegList fp_list = CPURegList(CPURegister::kFPRegister, kDRegSize, - register_set->GetFloatingPointRegisters() & (~callee_saved_fp_registers.list())); + CPURegList fp_list = CPURegList( + CPURegister::kFPRegister, + kDRegSize, + register_set->GetFloatingPointRegisters() + & (~(codegen->GetGraph()->IsDebuggable() ? 0 : callee_saved_fp_registers.list()))); MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler(); UseScratchRegisterScope temps(masm); @@ -534,7 +537,9 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, kNumberOfAllocatableFPRegisters, kNumberOfAllocatableRegisterPairs, callee_saved_core_registers.list(), - callee_saved_fp_registers.list(), + // If the graph is debuggable, we need to save the fpu registers ourselves, + // as the stubs do not do it. + graph->IsDebuggable() ? 0 : callee_saved_fp_registers.list(), compiler_options, stats), block_labels_(nullptr), @@ -542,11 +547,14 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, instruction_visitor_(graph, this), move_resolver_(graph->GetArena(), this), isa_features_(isa_features), - uint64_literals_(std::less<uint64_t>(), graph->GetArena()->Adapter()), - method_patches_(MethodReferenceComparator(), graph->GetArena()->Adapter()), - call_patches_(MethodReferenceComparator(), graph->GetArena()->Adapter()), - relative_call_patches_(graph->GetArena()->Adapter()), - pc_rel_dex_cache_patches_(graph->GetArena()->Adapter()) { + uint64_literals_(std::less<uint64_t>(), + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + method_patches_(MethodReferenceComparator(), + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + call_patches_(MethodReferenceComparator(), + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_rel_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { // Save the link register (containing the return address) to mimic Quick. AddAllocatedRegister(LocationFrom(lr)); } @@ -610,7 +618,8 @@ void ParallelMoveResolverARM64::FreeScratchLocation(Location loc) { } void ParallelMoveResolverARM64::EmitMove(size_t index) { - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; codegen_->MoveLocation(move->GetDestination(), move->GetSource()); } @@ -2342,7 +2351,11 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { break; } case TypeCheckKind::kArrayObjectCheck: { - // Just need to check that the object's class is a non primitive array. + // Do an exact check. + vixl::Label exact_check; + __ Cmp(out, cls); + __ B(eq, &exact_check); + // Otherwise, we need to check that the object's class is a non primitive array. __ Ldr(out, HeapOperand(out, component_offset)); GetAssembler()->MaybeUnpoisonHeapReference(out); // If `out` is null, we use it for the result, and jump to `done`. @@ -2350,6 +2363,7 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { __ Ldrh(out, HeapOperand(out, primitive_offset)); static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); __ Cbnz(out, &zero); + __ Bind(&exact_check); __ Mov(out, 1); __ B(&done); break; @@ -2489,20 +2503,22 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { } case TypeCheckKind::kClassHierarchyCheck: { // Walk over the class hierarchy to find a match. - vixl::Label loop, success; + vixl::Label loop; __ Bind(&loop); __ Cmp(temp, cls); - __ B(eq, &success); + __ B(eq, &done); __ Ldr(temp, HeapOperand(temp, super_offset)); GetAssembler()->MaybeUnpoisonHeapReference(temp); __ Cbnz(temp, &loop); // Jump to the slow path to throw the exception. __ B(slow_path->GetEntryLabel()); - __ Bind(&success); break; } case TypeCheckKind::kArrayObjectCheck: { - // Just need to check that the object's class is a non primitive array. + // Do an exact check. + __ Cmp(temp, cls); + __ B(eq, &done); + // Otherwise, we need to check that the object's class is a non primitive array. __ Ldr(temp, HeapOperand(temp, component_offset)); GetAssembler()->MaybeUnpoisonHeapReference(temp); __ Cbz(temp, slow_path->GetEntryLabel()); @@ -3533,6 +3549,38 @@ void InstructionCodeGeneratorARM64::VisitFakeString(HFakeString* instruction ATT // Will be generated at use site. } +// Simple implementation of packed switch - generate cascaded compare/jumps. +void LocationsBuilderARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); +} + +void InstructionCodeGeneratorARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) { + int32_t lower_bound = switch_instr->GetStartValue(); + int32_t num_entries = switch_instr->GetNumEntries(); + Register value_reg = InputRegisterAt(switch_instr, 0); + HBasicBlock* default_block = switch_instr->GetDefaultBlock(); + + // Create a series of compare/jumps. + const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); + for (int32_t i = 0; i < num_entries; i++) { + int32_t case_value = lower_bound + i; + vixl::Label* succ = codegen_->GetLabelOf(successors.at(i)); + if (case_value == 0) { + __ Cbz(value_reg, succ); + } else { + __ Cmp(value_reg, vixl::Operand(case_value)); + __ B(eq, succ); + } + } + + // And the default for any other value. + if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) { + __ B(codegen_->GetLabelOf(default_block)); + } +} + #undef __ #undef QUICK_ENTRY_POINT diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 89671088c7..7178081bf8 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -326,12 +326,7 @@ class CodeGeneratorARM64 : public CodeGenerator { } void Initialize() OVERRIDE { - HGraph* graph = GetGraph(); - int length = graph->GetBlocks().size(); - block_labels_ = graph->GetArena()->AllocArray<vixl::Label>(length); - for (int i = 0; i < length; ++i) { - new(block_labels_ + i) vixl::Label(); - } + block_labels_ = CommonInitializeLabels<vixl::Label>(); } void Finalize(CodeAllocator* allocator) OVERRIDE; @@ -400,7 +395,7 @@ class CodeGeneratorARM64 : public CodeGenerator { }; // Labels for each block that will be compiled. - vixl::Label* block_labels_; + vixl::Label* block_labels_; // Indexed by block id. vixl::Label frame_entry_label_; LocationsBuilderARM64 location_builder_; diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 4722e42694..c9f849318c 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -20,7 +20,9 @@ #include "entrypoints/quick/quick_entrypoints_enum.h" #include "gc/accounting/card_table.h" #include "intrinsics.h" +#include "intrinsics_mips64.h" #include "art_method.h" +#include "code_generator_utils.h" #include "mirror/array-inl.h" #include "mirror/class-inl.h" #include "offsets.h" @@ -36,7 +38,6 @@ static constexpr int kCurrentMethodStackOffset = 0; static constexpr GpuRegister kMethodRegisterArgument = A0; // We need extra temporary/scratch registers (in addition to AT) in some cases. -static constexpr GpuRegister TMP = T8; static constexpr FpuRegister FTMP = F8; // ART Thread Register. @@ -430,7 +431,7 @@ CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph, arraysize(kFpuCalleeSaves)), compiler_options, stats), - block_labels_(graph->GetArena(), 0), + block_labels_(nullptr), location_builder_(graph, this), instruction_visitor_(graph, this), move_resolver_(graph->GetArena(), this), @@ -452,12 +453,14 @@ Mips64Assembler* ParallelMoveResolverMIPS64::GetAssembler() const { } void ParallelMoveResolverMIPS64::EmitMove(size_t index) { - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; codegen_->MoveLocation(move->GetDestination(), move->GetSource(), move->GetType()); } void ParallelMoveResolverMIPS64::EmitSwap(size_t index) { - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; codegen_->SwapLocations(move->GetDestination(), move->GetSource(), move->GetType()); } @@ -971,11 +974,11 @@ size_t CodeGeneratorMIPS64::RestoreFloatingPointRegister(size_t stack_index, uin } void CodeGeneratorMIPS64::DumpCoreRegister(std::ostream& stream, int reg) const { - stream << Mips64ManagedRegister::FromGpuRegister(GpuRegister(reg)); + stream << GpuRegister(reg); } void CodeGeneratorMIPS64::DumpFloatingPointRegister(std::ostream& stream, int reg) const { - stream << Mips64ManagedRegister::FromFpuRegister(FpuRegister(reg)); + stream << FpuRegister(reg); } void CodeGeneratorMIPS64::InvokeRuntime(QuickEntrypointEnum entrypoint, @@ -1444,12 +1447,11 @@ void InstructionCodeGeneratorMIPS64::VisitArrayLength(HArrayLength* instruction) } void LocationsBuilderMIPS64::VisitArraySet(HArraySet* instruction) { - Primitive::Type value_type = instruction->GetComponentType(); - bool is_object = value_type == Primitive::kPrimNot; + bool needs_runtime_call = instruction->NeedsTypeCheck(); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( instruction, - is_object ? LocationSummary::kCall : LocationSummary::kNoCall); - if (is_object) { + needs_runtime_call ? LocationSummary::kCall : LocationSummary::kNoCall); + if (needs_runtime_call) { InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); @@ -2396,7 +2398,11 @@ void InstructionCodeGeneratorMIPS64::VisitInvokeInterface(HInvokeInterface* invo } void LocationsBuilderMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) { - // TODO intrinsic function + IntrinsicLocationsBuilderMIPS64 intrinsic(codegen_); + if (intrinsic.TryDispatch(invoke)) { + return; + } + HandleInvoke(invoke); } @@ -2405,7 +2411,11 @@ void LocationsBuilderMIPS64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* in // invokes must have been pruned by art::PrepareForRegisterAllocation. DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck()); - // TODO - intrinsic function + IntrinsicLocationsBuilderMIPS64 intrinsic(codegen_); + if (intrinsic.TryDispatch(invoke)) { + return; + } + HandleInvoke(invoke); // While SetupBlockedRegisters() blocks registers S2-S8 due to their @@ -2420,10 +2430,10 @@ void LocationsBuilderMIPS64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* in } } -static bool TryGenerateIntrinsicCode(HInvoke* invoke, - CodeGeneratorMIPS64* codegen ATTRIBUTE_UNUSED) { +static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorMIPS64* codegen) { if (invoke->GetLocations()->Intrinsified()) { - // TODO - intrinsic function + IntrinsicCodeGeneratorMIPS64 intrinsic(codegen); + intrinsic.Dispatch(invoke); return true; } return false; @@ -2532,7 +2542,10 @@ void InstructionCodeGeneratorMIPS64::VisitInvokeStaticOrDirect(HInvokeStaticOrDi } void InstructionCodeGeneratorMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) { - // TODO: Try to generate intrinsics code. + if (TryGenerateIntrinsicCode(invoke, codegen_)) { + return; + } + LocationSummary* locations = invoke->GetLocations(); Location receiver = locations->InAt(0); GpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<GpuRegister>(); @@ -3365,5 +3378,38 @@ void InstructionCodeGeneratorMIPS64::VisitFakeString(HFakeString* instruction AT // Will be generated at use site. } +// Simple implementation of packed switch - generate cascaded compare/jumps. +void LocationsBuilderMIPS64::VisitPackedSwitch(HPackedSwitch* switch_instr) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); +} + +void InstructionCodeGeneratorMIPS64::VisitPackedSwitch(HPackedSwitch* switch_instr) { + int32_t lower_bound = switch_instr->GetStartValue(); + int32_t num_entries = switch_instr->GetNumEntries(); + LocationSummary* locations = switch_instr->GetLocations(); + GpuRegister value_reg = locations->InAt(0).AsRegister<GpuRegister>(); + HBasicBlock* default_block = switch_instr->GetDefaultBlock(); + + // Create a series of compare/jumps. + const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); + for (int32_t i = 0; i < num_entries; i++) { + int32_t case_value = lower_bound + i; + Label* succ = codegen_->GetLabelOf(successors.at(i)); + if (case_value == 0) { + __ Beqzc(value_reg, succ); + } else { + __ LoadConst32(TMP, case_value); + __ Beqc(value_reg, TMP, succ); + } + } + + // And the default for any other value. + if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) { + __ B(codegen_->GetLabelOf(default_block)); + } +} + } // namespace mips64 } // namespace art diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index f66ecb3711..16461d6c04 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -270,11 +270,11 @@ class CodeGeneratorMIPS64 : public CodeGenerator { } Label* GetLabelOf(HBasicBlock* block) const { - return CommonGetLabelOf<Label>(block_labels_.GetRawStorage(), block); + return CommonGetLabelOf<Label>(block_labels_, block); } void Initialize() OVERRIDE { - block_labels_.SetSize(GetGraph()->GetBlocks().size()); + block_labels_ = CommonInitializeLabels<Label>(); } void Finalize(CodeAllocator* allocator) OVERRIDE; @@ -315,7 +315,7 @@ class CodeGeneratorMIPS64 : public CodeGenerator { private: // Labels for each block that will be compiled. - GrowableArray<Label> block_labels_; + Label* block_labels_; // Indexed by block id. Label frame_entry_label_; LocationsBuilderMIPS64 location_builder_; InstructionCodeGeneratorMIPS64 instruction_visitor_; diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 3d03dd8146..a47a95e3be 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -470,13 +470,13 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, 0, compiler_options, stats), - block_labels_(graph->GetArena(), 0), + block_labels_(nullptr), location_builder_(graph, this), instruction_visitor_(graph, this), move_resolver_(graph->GetArena(), this), isa_features_(isa_features), - method_patches_(graph->GetArena()->Adapter()), - relative_call_patches_(graph->GetArena()->Adapter()) { + method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { // Use a fake return address register to mimic Quick. AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister)); } @@ -1314,7 +1314,7 @@ void InstructionCodeGeneratorX86::VisitCondition(HCondition* cond) { default: { // Integer case. - // Clear output register: setcc only sets the low byte. + // Clear output register: setb only sets the low byte. __ xorl(reg, reg); if (rhs.IsRegister()) { @@ -4630,7 +4630,8 @@ void ParallelMoveResolverX86::MoveMemoryToMemory64(int dst, int src) { } void ParallelMoveResolverX86::EmitMove(size_t index) { - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; Location source = move->GetSource(); Location destination = move->GetDestination(); @@ -4782,7 +4783,8 @@ void ParallelMoveResolverX86::Exchange(int mem1, int mem2) { } void ParallelMoveResolverX86::EmitSwap(size_t index) { - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; Location source = move->GetSource(); Location destination = move->GetDestination(); @@ -5038,6 +5040,7 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { DCHECK(cls.IsStackSlot()) << cls; __ cmpl(out, Address(ESP, cls.GetStackIndex())); } + // Classes must be equal for the instanceof to succeed. __ j(kNotEqual, &zero); __ movl(out, Immediate(1)); @@ -5092,7 +5095,16 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { break; } case TypeCheckKind::kArrayObjectCheck: { - // Just need to check that the object's class is a non primitive array. + // Do an exact check. + NearLabel exact_check; + if (cls.IsRegister()) { + __ cmpl(out, cls.AsRegister<Register>()); + } else { + DCHECK(cls.IsStackSlot()) << cls; + __ cmpl(out, Address(ESP, cls.GetStackIndex())); + } + __ j(kEqual, &exact_check); + // Otherwise, we need to check that the object's class is a non primitive array. __ movl(out, Address(out, component_offset)); __ MaybeUnpoisonHeapReference(out); __ testl(out, out); @@ -5100,6 +5112,7 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { __ j(kEqual, &done); __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot)); __ j(kNotEqual, &zero); + __ Bind(&exact_check); __ movl(out, Immediate(1)); __ jmp(&done); break; @@ -5255,7 +5268,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { } case TypeCheckKind::kClassHierarchyCheck: { // Walk over the class hierarchy to find a match. - NearLabel loop, success; + NearLabel loop; __ Bind(&loop); if (cls.IsRegister()) { __ cmpl(temp, cls.AsRegister<Register>()); @@ -5263,18 +5276,25 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { DCHECK(cls.IsStackSlot()) << cls; __ cmpl(temp, Address(ESP, cls.GetStackIndex())); } - __ j(kEqual, &success); + __ j(kEqual, &done); __ movl(temp, Address(temp, super_offset)); __ MaybeUnpoisonHeapReference(temp); __ testl(temp, temp); __ j(kNotEqual, &loop); // Jump to the slow path to throw the exception. __ jmp(slow_path->GetEntryLabel()); - __ Bind(&success); break; } case TypeCheckKind::kArrayObjectCheck: { - // Just need to check that the object's class is a non primitive array. + // Do an exact check. + if (cls.IsRegister()) { + __ cmpl(temp, cls.AsRegister<Register>()); + } else { + DCHECK(cls.IsStackSlot()) << cls; + __ cmpl(temp, Address(ESP, cls.GetStackIndex())); + } + __ j(kEqual, &done); + // Otherwise, we need to check that the object's class is a non primitive array. __ movl(temp, Address(temp, component_offset)); __ MaybeUnpoisonHeapReference(temp); __ testl(temp, temp); @@ -5470,6 +5490,38 @@ void InstructionCodeGeneratorX86::VisitFakeString(HFakeString* instruction ATTRI // Will be generated at use site. } +// Simple implementation of packed switch - generate cascaded compare/jumps. +void LocationsBuilderX86::VisitPackedSwitch(HPackedSwitch* switch_instr) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); +} + +void InstructionCodeGeneratorX86::VisitPackedSwitch(HPackedSwitch* switch_instr) { + int32_t lower_bound = switch_instr->GetStartValue(); + int32_t num_entries = switch_instr->GetNumEntries(); + LocationSummary* locations = switch_instr->GetLocations(); + Register value_reg = locations->InAt(0).AsRegister<Register>(); + HBasicBlock* default_block = switch_instr->GetDefaultBlock(); + + // Create a series of compare/jumps. + const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); + for (int i = 0; i < num_entries; i++) { + int32_t case_value = lower_bound + i; + if (case_value == 0) { + __ testl(value_reg, value_reg); + } else { + __ cmpl(value_reg, Immediate(case_value)); + } + __ j(kEqual, codegen_->GetLabelOf(successors.at(i))); + } + + // And the default for any other value. + if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) { + __ jmp(codegen_->GetLabelOf(default_block)); + } +} + void LocationsBuilderX86::VisitX86ComputeBaseMethodAddress( HX86ComputeBaseMethodAddress* insn) { LocationSummary* locations = @@ -5571,7 +5623,7 @@ void CodeGeneratorX86::Finalize(CodeAllocator* allocator) { /** * Class to handle late fixup of offsets into constant area. */ -class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocMisc> { +class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> { public: RIPFixup(const CodeGeneratorX86& codegen, int offset) : codegen_(codegen), offset_into_constant_area_(offset) {} diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index f38e1ea09c..2c2fc65444 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -316,11 +316,11 @@ class CodeGeneratorX86 : public CodeGenerator { bool value_can_be_null); Label* GetLabelOf(HBasicBlock* block) const { - return CommonGetLabelOf<Label>(block_labels_.GetRawStorage(), block); + return CommonGetLabelOf<Label>(block_labels_, block); } void Initialize() OVERRIDE { - block_labels_.SetSize(GetGraph()->GetBlocks().size()); + block_labels_ = CommonInitializeLabels<Label>(); } bool NeedsTwoRegisters(Primitive::Type type) const OVERRIDE { @@ -356,7 +356,7 @@ class CodeGeneratorX86 : public CodeGenerator { private: // Labels for each block that will be compiled. - GrowableArray<Label> block_labels_; + Label* block_labels_; // Indexed by block id. Label frame_entry_label_; LocationsBuilderX86 location_builder_; InstructionCodeGeneratorX86 instruction_visitor_; diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 32a1db5475..b845a271d0 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -620,15 +620,15 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, arraysize(kFpuCalleeSaves)), compiler_options, stats), - block_labels_(graph->GetArena(), 0), + block_labels_(nullptr), location_builder_(graph, this), instruction_visitor_(graph, this), move_resolver_(graph->GetArena(), this), isa_features_(isa_features), constant_area_start_(0), - method_patches_(graph->GetArena()->Adapter()), - relative_call_patches_(graph->GetArena()->Adapter()), - pc_rel_dex_cache_patches_(graph->GetArena()->Adapter()) { + method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_rel_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister)); } @@ -4373,7 +4373,8 @@ X86_64Assembler* ParallelMoveResolverX86_64::GetAssembler() const { } void ParallelMoveResolverX86_64::EmitMove(size_t index) { - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; Location source = move->GetSource(); Location destination = move->GetDestination(); @@ -4531,7 +4532,8 @@ void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) { } void ParallelMoveResolverX86_64::EmitSwap(size_t index) { - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; Location source = move->GetSource(); Location destination = move->GetDestination(); @@ -4766,10 +4768,16 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { DCHECK(cls.IsStackSlot()) << cls; __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex())); } - // Classes must be equal for the instanceof to succeed. - __ j(kNotEqual, &zero); - __ movl(out, Immediate(1)); - __ jmp(&done); + if (zero.IsLinked()) { + // Classes must be equal for the instanceof to succeed. + __ j(kNotEqual, &zero); + __ movl(out, Immediate(1)); + __ jmp(&done); + } else { + __ setcc(kEqual, out); + // setcc only sets the low byte. + __ andl(out, Immediate(1)); + } break; } case TypeCheckKind::kAbstractClassCheck: { @@ -4820,7 +4828,16 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { break; } case TypeCheckKind::kArrayObjectCheck: { - // Just need to check that the object's class is a non primitive array. + // Do an exact check. + NearLabel exact_check; + if (cls.IsRegister()) { + __ cmpl(out, cls.AsRegister<CpuRegister>()); + } else { + DCHECK(cls.IsStackSlot()) << cls; + __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex())); + } + __ j(kEqual, &exact_check); + // Otherwise, we need to check that the object's class is a non primitive array. __ movl(out, Address(out, component_offset)); __ MaybeUnpoisonHeapReference(out); __ testl(out, out); @@ -4828,6 +4845,7 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { __ j(kEqual, &done); __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot)); __ j(kNotEqual, &zero); + __ Bind(&exact_check); __ movl(out, Immediate(1)); __ jmp(&done); break; @@ -4983,7 +5001,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { } case TypeCheckKind::kClassHierarchyCheck: { // Walk over the class hierarchy to find a match. - NearLabel loop, success; + NearLabel loop; __ Bind(&loop); if (cls.IsRegister()) { __ cmpl(temp, cls.AsRegister<CpuRegister>()); @@ -4991,18 +5009,25 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { DCHECK(cls.IsStackSlot()) << cls; __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex())); } - __ j(kEqual, &success); + __ j(kEqual, &done); __ movl(temp, Address(temp, super_offset)); __ MaybeUnpoisonHeapReference(temp); __ testl(temp, temp); __ j(kNotEqual, &loop); // Jump to the slow path to throw the exception. __ jmp(slow_path->GetEntryLabel()); - __ Bind(&success); break; } case TypeCheckKind::kArrayObjectCheck: { - // Just need to check that the object's class is a non primitive array. + // Do an exact check. + if (cls.IsRegister()) { + __ cmpl(temp, cls.AsRegister<CpuRegister>()); + } else { + DCHECK(cls.IsStackSlot()) << cls; + __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex())); + } + __ j(kEqual, &done); + // Otherwise, we need to check that the object's class is a non primitive array. __ movl(temp, Address(temp, component_offset)); __ MaybeUnpoisonHeapReference(temp); __ testl(temp, temp); @@ -5180,6 +5205,38 @@ void InstructionCodeGeneratorX86_64::VisitFakeString(HFakeString* instruction AT // Will be generated at use site. } +// Simple implementation of packed switch - generate cascaded compare/jumps. +void LocationsBuilderX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); +} + +void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) { + int32_t lower_bound = switch_instr->GetStartValue(); + int32_t num_entries = switch_instr->GetNumEntries(); + LocationSummary* locations = switch_instr->GetLocations(); + CpuRegister value_reg = locations->InAt(0).AsRegister<CpuRegister>(); + HBasicBlock* default_block = switch_instr->GetDefaultBlock(); + + // Create a series of compare/jumps. + const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); + for (int i = 0; i < num_entries; i++) { + int32_t case_value = lower_bound + i; + if (case_value == 0) { + __ testl(value_reg, value_reg); + } else { + __ cmpl(value_reg, Immediate(case_value)); + } + __ j(kEqual, codegen_->GetLabelOf(successors.at(i))); + } + + // And the default for any other value. + if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) { + __ jmp(codegen_->GetLabelOf(default_block)); + } +} + void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) { if (value == 0) { __ xorl(dest, dest); @@ -5222,7 +5279,7 @@ void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) { /** * Class to handle late fixup of offsets into constant area. */ -class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocMisc> { +class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> { public: RIPFixup(const CodeGeneratorX86_64& codegen, int offset) : codegen_(codegen), offset_into_constant_area_(offset) {} diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 1ec3580040..197ce63847 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -286,11 +286,11 @@ class CodeGeneratorX86_64 : public CodeGenerator { void Move(Location destination, Location source); Label* GetLabelOf(HBasicBlock* block) const { - return CommonGetLabelOf<Label>(block_labels_.GetRawStorage(), block); + return CommonGetLabelOf<Label>(block_labels_, block); } void Initialize() OVERRIDE { - block_labels_.SetSize(GetGraph()->GetBlocks().size()); + block_labels_ = CommonInitializeLabels<Label>(); } bool NeedsTwoRegisters(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE { @@ -334,7 +334,7 @@ class CodeGeneratorX86_64 : public CodeGenerator { }; // Labels for each block that will be compiled. - GrowableArray<Label> block_labels_; + Label* block_labels_; // Indexed by block id. Label frame_entry_label_; LocationsBuilderX86_64 location_builder_; InstructionCodeGeneratorX86_64 instruction_visitor_; diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc index 7d509a22a6..007d0e3332 100644 --- a/compiler/optimizing/dead_code_elimination.cc +++ b/compiler/optimizing/dead_code_elimination.cc @@ -16,34 +16,67 @@ #include "dead_code_elimination.h" +#include "utils/array_ref.h" #include "base/bit_vector-inl.h" #include "ssa_phi_elimination.h" namespace art { -static void MarkReachableBlocks(HBasicBlock* block, ArenaBitVector* visited) { - int block_id = block->GetBlockId(); - if (visited->IsBitSet(block_id)) { - return; - } - visited->SetBit(block_id); - - HInstruction* last_instruction = block->GetLastInstruction(); - if (last_instruction->IsIf()) { - HIf* if_instruction = last_instruction->AsIf(); - HInstruction* condition = if_instruction->InputAt(0); - if (!condition->IsIntConstant()) { - MarkReachableBlocks(if_instruction->IfTrueSuccessor(), visited); - MarkReachableBlocks(if_instruction->IfFalseSuccessor(), visited); - } else if (condition->AsIntConstant()->IsOne()) { - MarkReachableBlocks(if_instruction->IfTrueSuccessor(), visited); - } else { - DCHECK(condition->AsIntConstant()->IsZero()); - MarkReachableBlocks(if_instruction->IfFalseSuccessor(), visited); +static void MarkReachableBlocks(HGraph* graph, ArenaBitVector* visited) { + ArenaVector<HBasicBlock*> worklist(graph->GetArena()->Adapter()); + constexpr size_t kDefaultWorlistSize = 8; + worklist.reserve(kDefaultWorlistSize); + visited->SetBit(graph->GetEntryBlock()->GetBlockId()); + worklist.push_back(graph->GetEntryBlock()); + + while (!worklist.empty()) { + HBasicBlock* block = worklist.back(); + worklist.pop_back(); + int block_id = block->GetBlockId(); + DCHECK(visited->IsBitSet(block_id)); + + ArrayRef<HBasicBlock* const> live_successors(block->GetSuccessors()); + HInstruction* last_instruction = block->GetLastInstruction(); + if (last_instruction->IsIf()) { + HIf* if_instruction = last_instruction->AsIf(); + HInstruction* condition = if_instruction->InputAt(0); + if (condition->IsIntConstant()) { + if (condition->AsIntConstant()->IsOne()) { + live_successors = live_successors.SubArray(0u, 1u); + DCHECK_EQ(live_successors[0], if_instruction->IfTrueSuccessor()); + } else { + DCHECK(condition->AsIntConstant()->IsZero()); + live_successors = live_successors.SubArray(1u, 1u); + DCHECK_EQ(live_successors[0], if_instruction->IfFalseSuccessor()); + } + } + } else if (last_instruction->IsPackedSwitch()) { + HPackedSwitch* switch_instruction = last_instruction->AsPackedSwitch(); + HInstruction* switch_input = switch_instruction->InputAt(0); + if (switch_input->IsIntConstant()) { + int32_t switch_value = switch_input->AsIntConstant()->GetValue(); + int32_t start_value = switch_instruction->GetStartValue(); + // Note: Though the spec forbids packed-switch values to wrap around, we leave + // that task to the verifier and use unsigned arithmetic with it's "modulo 2^32" + // semantics to check if the value is in range, wrapped or not. + uint32_t switch_index = + static_cast<uint32_t>(switch_value) - static_cast<uint32_t>(start_value); + if (switch_index < switch_instruction->GetNumEntries()) { + live_successors = live_successors.SubArray(switch_index, 1u); + DCHECK_EQ(live_successors[0], block->GetSuccessor(switch_index)); + } else { + live_successors = live_successors.SubArray(switch_instruction->GetNumEntries(), 1u); + DCHECK_EQ(live_successors[0], switch_instruction->GetDefaultBlock()); + } + } } - } else { - for (HBasicBlock* successor : block->GetSuccessors()) { - MarkReachableBlocks(successor, visited); + + for (HBasicBlock* successor : live_successors) { + // Add only those successors that have not been visited yet. + if (!visited->IsBitSet(successor->GetBlockId())) { + visited->SetBit(successor->GetBlockId()); + worklist.push_back(successor); + } } } } @@ -67,7 +100,7 @@ void HDeadCodeElimination::RemoveDeadBlocks() { ArenaBitVector live_blocks(allocator, graph_->GetBlocks().size(), false); ArenaBitVector affected_loops(allocator, graph_->GetBlocks().size(), false); - MarkReachableBlocks(graph_->GetEntryBlock(), &live_blocks); + MarkReachableBlocks(graph_, &live_blocks); bool removed_one_or_more_blocks = false; // Remove all dead blocks. Iterate in post order because removal needs the diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index 583da30438..4e1cafee66 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -743,6 +743,22 @@ void SSAChecker::HandleBooleanInput(HInstruction* instruction, size_t input_inde } } +void SSAChecker::VisitPackedSwitch(HPackedSwitch* instruction) { + VisitInstruction(instruction); + // Check that the number of block successors matches the switch count plus + // one for the default block. + HBasicBlock* block = instruction->GetBlock(); + if (instruction->GetNumEntries() + 1u != block->GetSuccessors().size()) { + AddError(StringPrintf( + "%s instruction %d in block %d expects %u successors to the block, but found: %zu.", + instruction->DebugName(), + instruction->GetId(), + block->GetBlockId(), + instruction->GetNumEntries() + 1u, + block->GetSuccessors().size())); + } +} + void SSAChecker::VisitIf(HIf* instruction) { VisitInstruction(instruction); HandleBooleanInput(instruction, 0); diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h index 0e270dbe18..7ddffc136a 100644 --- a/compiler/optimizing/graph_checker.h +++ b/compiler/optimizing/graph_checker.h @@ -125,6 +125,7 @@ class SSAChecker : public GraphChecker { void VisitBinaryOperation(HBinaryOperation* op) OVERRIDE; void VisitCondition(HCondition* op) OVERRIDE; void VisitIf(HIf* instruction) OVERRIDE; + void VisitPackedSwitch(HPackedSwitch* instruction) OVERRIDE; void VisitBooleanNot(HBooleanNot* instruction) OVERRIDE; void VisitConstant(HConstant* instruction) OVERRIDE; diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index d05c514912..2c6c3b726a 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -374,6 +374,11 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { << instance_of->MustDoNullCheck() << std::noboolalpha; } + void VisitArraySet(HArraySet* array_set) OVERRIDE { + StartAttributeStream("value_can_be_null") << std::boolalpha + << array_set->GetValueCanBeNull() << std::noboolalpha; + } + void VisitInvoke(HInvoke* invoke) OVERRIDE { StartAttributeStream("dex_file_index") << invoke->GetDexMethodIndex(); StartAttributeStream("method_name") << PrettyMethod( diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc index 1ee8648533..7cf061773f 100644 --- a/compiler/optimizing/gvn.cc +++ b/compiler/optimizing/gvn.cc @@ -15,11 +15,12 @@ */ #include "gvn.h" + +#include "base/arena_containers.h" +#include "base/bit_vector-inl.h" #include "side_effects_analysis.h" #include "utils.h" - #include "utils/arena_bit_vector.h" -#include "base/bit_vector-inl.h" namespace art { @@ -32,13 +33,13 @@ namespace art { * if there is one in the set. In GVN, we would say those instructions have the * same "number". */ -class ValueSet : public ArenaObject<kArenaAllocMisc> { +class ValueSet : public ArenaObject<kArenaAllocGvn> { public: // Constructs an empty ValueSet which owns all its buckets. explicit ValueSet(ArenaAllocator* allocator) : allocator_(allocator), num_buckets_(kMinimumNumberOfBuckets), - buckets_(allocator->AllocArray<Node*>(num_buckets_)), + buckets_(allocator->AllocArray<Node*>(num_buckets_, kArenaAllocGvn)), buckets_owned_(allocator, num_buckets_, false), num_entries_(0) { // ArenaAllocator returns zeroed memory, so no need to set buckets to null. @@ -51,7 +52,7 @@ class ValueSet : public ArenaObject<kArenaAllocMisc> { ValueSet(ArenaAllocator* allocator, const ValueSet& to_copy) : allocator_(allocator), num_buckets_(to_copy.IdealBucketCount()), - buckets_(allocator->AllocArray<Node*>(num_buckets_)), + buckets_(allocator->AllocArray<Node*>(num_buckets_, kArenaAllocGvn)), buckets_owned_(allocator, num_buckets_, false), num_entries_(to_copy.num_entries_) { // ArenaAllocator returns zeroed memory, so entries of buckets_ and @@ -143,7 +144,7 @@ class ValueSet : public ArenaObject<kArenaAllocMisc> { size_t GetNumberOfEntries() const { return num_entries_; } private: - class Node : public ArenaObject<kArenaAllocMisc> { + class Node : public ArenaObject<kArenaAllocGvn> { public: Node(HInstruction* instruction, size_t hash_code, Node* next) : instruction_(instruction), hash_code_(hash_code), next_(next) {} @@ -306,7 +307,7 @@ class GlobalValueNumberer : public ValueObject { : graph_(graph), allocator_(allocator), side_effects_(side_effects), - sets_(allocator, graph->GetBlocks().size(), nullptr) {} + sets_(graph->GetBlocks().size(), nullptr, allocator->Adapter(kArenaAllocGvn)) {} void Run(); @@ -322,14 +323,14 @@ class GlobalValueNumberer : public ValueObject { // ValueSet for blocks. Initially null, but for an individual block they // are allocated and populated by the dominator, and updated by all blocks // in the path from the dominator to the block. - GrowableArray<ValueSet*> sets_; + ArenaVector<ValueSet*> sets_; DISALLOW_COPY_AND_ASSIGN(GlobalValueNumberer); }; void GlobalValueNumberer::Run() { DCHECK(side_effects_.HasRun()); - sets_.Put(graph_->GetEntryBlock()->GetBlockId(), new (allocator_) ValueSet(allocator_)); + sets_[graph_->GetEntryBlock()->GetBlockId()] = new (allocator_) ValueSet(allocator_); // Use the reverse post order to ensure the non back-edge predecessors of a block are // visited before the block itself. @@ -348,7 +349,7 @@ void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) { set = new (allocator_) ValueSet(allocator_); } else { HBasicBlock* dominator = block->GetDominator(); - ValueSet* dominator_set = sets_.Get(dominator->GetBlockId()); + ValueSet* dominator_set = sets_[dominator->GetBlockId()]; if (dominator->GetSuccessors().size() == 1) { DCHECK_EQ(dominator->GetSuccessor(0), block); set = dominator_set; @@ -363,7 +364,7 @@ void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) { set->Kill(side_effects_.GetLoopEffects(block)); } else if (predecessors.size() > 1) { for (HBasicBlock* predecessor : predecessors) { - set->IntersectWith(sets_.Get(predecessor->GetBlockId())); + set->IntersectWith(sets_[predecessor->GetBlockId()]); if (set->IsEmpty()) { break; } @@ -372,7 +373,7 @@ void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) { } } - sets_.Put(block->GetBlockId(), set); + sets_[block->GetBlockId()] = set; HInstruction* current = block->GetFirstInstruction(); while (current != nullptr) { diff --git a/compiler/optimizing/induction_var_analysis.cc b/compiler/optimizing/induction_var_analysis.cc index 92c732c0c3..e5123deed6 100644 --- a/compiler/optimizing/induction_var_analysis.cc +++ b/compiler/optimizing/induction_var_analysis.cc @@ -33,17 +33,6 @@ static bool IsLoopInvariant(HLoopInformation* loop, HInstruction* instruction) { } /** - * Returns true if instruction is proper entry-phi-operation for given loop - * (referred to as mu-operation in Gerlek's paper). - */ -static bool IsEntryPhi(HLoopInformation* loop, HInstruction* instruction) { - return - instruction->IsPhi() && - instruction->InputCount() == 2 && - instruction->GetBlock() == loop->GetHeader(); -} - -/** * Since graph traversal may enter a SCC at any position, an initial representation may be rotated, * along dependences, viz. any of (a, b, c, d), (d, a, b, c) (c, d, a, b), (b, c, d, a) assuming * a chain of dependences (mutual independent items may occur in arbitrary order). For proper @@ -58,8 +47,9 @@ static void RotateEntryPhiFirst(HLoopInformation* loop, size_t phi_pos = -1; const size_t size = scc->size(); for (size_t i = 0; i < size; i++) { - if (IsEntryPhi(loop, scc->at(i)) && (phi == nullptr || phis.FoundBefore(scc->at(i), phi))) { - phi = scc->at(i); + HInstruction* other = scc->at(i); + if (other->IsLoopHeaderPhi() && (phi == nullptr || phis.FoundBefore(other, phi))) { + phi = other; phi_pos = i; } } @@ -84,11 +74,14 @@ static void RotateEntryPhiFirst(HLoopInformation* loop, HInductionVarAnalysis::HInductionVarAnalysis(HGraph* graph) : HOptimization(graph, kInductionPassName), global_depth_(0), - stack_(graph->GetArena()->Adapter()), - scc_(graph->GetArena()->Adapter()), - map_(std::less<HInstruction*>(), graph->GetArena()->Adapter()), - cycle_(std::less<HInstruction*>(), graph->GetArena()->Adapter()), - induction_(std::less<HLoopInformation*>(), graph->GetArena()->Adapter()) { + stack_(graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)), + scc_(graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)), + map_(std::less<HInstruction*>(), + graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)), + cycle_(std::less<HInstruction*>(), + graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)), + induction_(std::less<HLoopInformation*>(), + graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)) { } void HInductionVarAnalysis::Run() { @@ -168,7 +161,7 @@ void HInductionVarAnalysis::VisitNode(HLoopInformation* loop, HInstruction* inst } // Classify the SCC. - if (scc_.size() == 1 && !IsEntryPhi(loop, scc_[0])) { + if (scc_.size() == 1 && !scc_[0]->IsLoopHeaderPhi()) { ClassifyTrivial(loop, scc_[0]); } else { ClassifyNonTrivial(loop); @@ -200,10 +193,7 @@ uint32_t HInductionVarAnalysis::VisitDescendant(HLoopInformation* loop, HInstruc void HInductionVarAnalysis::ClassifyTrivial(HLoopInformation* loop, HInstruction* instruction) { InductionInfo* info = nullptr; if (instruction->IsPhi()) { - for (size_t i = 1, count = instruction->InputCount(); i < count; i++) { - info = TransferPhi(LookupInfo(loop, instruction->InputAt(0)), - LookupInfo(loop, instruction->InputAt(i))); - } + info = TransferPhi(loop, instruction, /* input_index */ 0); } else if (instruction->IsAdd()) { info = TransferAddSub(LookupInfo(loop, instruction->InputAt(0)), LookupInfo(loop, instruction->InputAt(1)), kAdd); @@ -241,25 +231,25 @@ void HInductionVarAnalysis::ClassifyNonTrivial(HLoopInformation* loop) { // Rotate proper entry-phi to front. if (size > 1) { - ArenaVector<HInstruction*> other(graph_->GetArena()->Adapter()); + ArenaVector<HInstruction*> other(graph_->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)); RotateEntryPhiFirst(loop, &scc_, &other); } - // Analyze from phi onwards. + // Analyze from entry-phi onwards. HInstruction* phi = scc_[0]; - if (!IsEntryPhi(loop, phi)) { + if (!phi->IsLoopHeaderPhi()) { return; } - HInstruction* external = phi->InputAt(0); - HInstruction* internal = phi->InputAt(1); - InductionInfo* initial = LookupInfo(loop, external); + + // External link should be loop invariant. + InductionInfo* initial = LookupInfo(loop, phi->InputAt(0)); if (initial == nullptr || initial->induction_class != kInvariant) { return; } - // Singleton entry-phi-operation may be a wrap-around induction. + // Singleton is wrap-around induction if all internal links have the same meaning. if (size == 1) { - InductionInfo* update = LookupInfo(loop, internal); + InductionInfo* update = TransferPhi(loop, phi, /* input_index */ 1); if (update != nullptr) { AssignInfo(loop, phi, CreateInduction(kWrapAround, initial, update)); } @@ -272,7 +262,7 @@ void HInductionVarAnalysis::ClassifyNonTrivial(HLoopInformation* loop) { HInstruction* instruction = scc_[i]; InductionInfo* update = nullptr; if (instruction->IsPhi()) { - update = SolvePhi(loop, phi, instruction); + update = SolvePhiAllInputs(loop, phi, instruction); } else if (instruction->IsAdd()) { update = SolveAddSub( loop, phi, instruction, instruction->InputAt(0), instruction->InputAt(1), kAdd, true); @@ -286,10 +276,9 @@ void HInductionVarAnalysis::ClassifyNonTrivial(HLoopInformation* loop) { cycle_.Put(instruction, update); } - // Success if the internal link received a meaning. - auto it = cycle_.find(internal); - if (it != cycle_.end()) { - InductionInfo* induction = it->second; + // Success if all internal links received the same temporary meaning. + InductionInfo* induction = SolvePhi(phi, /* input_index */ 1); + if (induction != nullptr) { switch (induction->induction_class) { case kInvariant: // Classify first phi and then the rest of the cycle "on-demand". @@ -329,13 +318,20 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::RotatePeriodicInduc return CreateInduction(kPeriodic, induction->op_a, RotatePeriodicInduction(induction->op_b, last)); } -HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::TransferPhi(InductionInfo* a, - InductionInfo* b) { - // Transfer over a phi: if both inputs are identical, result is input. - if (InductionEqual(a, b)) { - return a; - } - return nullptr; +HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::TransferPhi(HLoopInformation* loop, + HInstruction* phi, + size_t input_index) { + // Match all phi inputs from input_index onwards exactly. + const size_t count = phi->InputCount(); + DCHECK_LT(input_index, count); + InductionInfo* a = LookupInfo(loop, phi->InputAt(input_index)); + for (size_t i = input_index + 1; i < count; i++) { + InductionInfo* b = LookupInfo(loop, phi->InputAt(i)); + if (!InductionEqual(a, b)) { + return nullptr; + } + } + return a; } HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::TransferAddSub(InductionInfo* a, @@ -421,47 +417,56 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::TransferNeg(Inducti return nullptr; } -HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::SolvePhi(HLoopInformation* loop, - HInstruction* phi, - HInstruction* instruction) { - // Solve within a cycle over a phi: identical inputs are combined into that input as result. - const size_t count = instruction->InputCount(); - DCHECK_GT(count, 0u); - auto ita = cycle_.find(instruction->InputAt(0)); +HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::SolvePhi(HInstruction* phi, + size_t input_index) { + // Match all phi inputs from input_index onwards exactly. + const size_t count = phi->InputCount(); + DCHECK_LT(input_index, count); + auto ita = cycle_.find(phi->InputAt(input_index)); if (ita != cycle_.end()) { - InductionInfo* a = ita->second; - for (size_t i = 1; i < count; i++) { - auto itb = cycle_.find(instruction->InputAt(i)); - if (itb == cycle_.end() || !HInductionVarAnalysis::InductionEqual(a, itb->second)) { + for (size_t i = input_index + 1; i < count; i++) { + auto itb = cycle_.find(phi->InputAt(i)); + if (itb == cycle_.end() || + !HInductionVarAnalysis::InductionEqual(ita->second, itb->second)) { return nullptr; } } - return a; + return ita->second; } + return nullptr; +} - // Solve within a cycle over another entry-phi: add invariants into a periodic. - if (IsEntryPhi(loop, instruction)) { - InductionInfo* a = LookupInfo(loop, instruction->InputAt(0)); +HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::SolvePhiAllInputs( + HLoopInformation* loop, + HInstruction* entry_phi, + HInstruction* phi) { + // Match all phi inputs. + InductionInfo* match = SolvePhi(phi, /* input_index */ 0); + if (match != nullptr) { + return match; + } + + // Otherwise, try to solve for a periodic seeded from phi onward. + // Only tight multi-statement cycles are considered in order to + // simplify rotating the periodic during the final classification. + if (phi->IsLoopHeaderPhi() && phi->InputCount() == 2) { + InductionInfo* a = LookupInfo(loop, phi->InputAt(0)); if (a != nullptr && a->induction_class == kInvariant) { - if (instruction->InputAt(1) == phi) { - InductionInfo* initial = LookupInfo(loop, phi->InputAt(0)); + if (phi->InputAt(1) == entry_phi) { + InductionInfo* initial = LookupInfo(loop, entry_phi->InputAt(0)); return CreateInduction(kPeriodic, a, initial); } - auto it = cycle_.find(instruction->InputAt(1)); - if (it != cycle_.end()) { - InductionInfo* b = it->second; - if (b->induction_class == kPeriodic) { - return CreateInduction(kPeriodic, a, b); - } + InductionInfo* b = SolvePhi(phi, /* input_index */ 1); + if (b != nullptr && b->induction_class == kPeriodic) { + return CreateInduction(kPeriodic, a, b); } } } - return nullptr; } HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::SolveAddSub(HLoopInformation* loop, - HInstruction* phi, + HInstruction* entry_phi, HInstruction* instruction, HInstruction* x, HInstruction* y, @@ -471,7 +476,7 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::SolveAddSub(HLoopIn // invariant value, seeded from phi, keeps adding to the stride of the induction. InductionInfo* b = LookupInfo(loop, y); if (b != nullptr && b->induction_class == kInvariant) { - if (x == phi) { + if (x == entry_phi) { return (op == kAdd) ? b : CreateInvariantOp(kNeg, nullptr, b); } auto it = cycle_.find(x); @@ -487,14 +492,15 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::SolveAddSub(HLoopIn if (op == kAdd) { // Try the other way around for an addition if considered for first time. if (is_first_call) { - return SolveAddSub(loop, phi, instruction, y, x, op, false); + return SolveAddSub(loop, entry_phi, instruction, y, x, op, false); } } else if (op == kSub) { - // Solve within a tight cycle for a periodic idiom k = c - k; - if (y == phi && instruction == phi->InputAt(1)) { + // Solve within a tight cycle that is formed by exactly two instructions, + // one phi and one update, for a periodic idiom of the form k = c - k; + if (y == entry_phi && entry_phi->InputCount() == 2 && instruction == entry_phi->InputAt(1)) { InductionInfo* a = LookupInfo(loop, x); if (a != nullptr && a->induction_class == kInvariant) { - InductionInfo* initial = LookupInfo(loop, phi->InputAt(0)); + InductionInfo* initial = LookupInfo(loop, entry_phi->InputAt(0)); return CreateInduction(kPeriodic, CreateInvariantOp(kSub, a, initial), initial); } } @@ -539,42 +545,46 @@ void HInductionVarAnalysis::VisitCondition(HLoopInformation* loop, Primitive::Type type, IfCondition cmp) { if (a->induction_class == kInvariant && b->induction_class == kLinear) { - // Swap conditions (e.g. U > i is same as i < U). + // Swap condition if induction is at right-hand-side (e.g. U > i is same as i < U). switch (cmp) { case kCondLT: VisitCondition(loop, b, a, type, kCondGT); break; case kCondLE: VisitCondition(loop, b, a, type, kCondGE); break; case kCondGT: VisitCondition(loop, b, a, type, kCondLT); break; case kCondGE: VisitCondition(loop, b, a, type, kCondLE); break; + case kCondNE: VisitCondition(loop, b, a, type, kCondNE); break; default: break; } } else if (a->induction_class == kLinear && b->induction_class == kInvariant) { - // Normalize a linear loop control with a constant, nonzero stride: + // Analyze condition with induction at left-hand-side (e.g. i < U). + InductionInfo* lower_expr = a->op_b; + InductionInfo* upper_expr = b; + InductionInfo* stride = a->op_a; + int64_t stride_value = 0; + if (!IsIntAndGet(stride, &stride_value)) { + return; + } + // Rewrite condition i != U into i < U or i > U if end condition is reached exactly. + if (cmp == kCondNE && ((stride_value == +1 && IsTaken(lower_expr, upper_expr, kCondLT)) || + (stride_value == -1 && IsTaken(lower_expr, upper_expr, kCondGT)))) { + cmp = stride_value > 0 ? kCondLT : kCondGT; + } + // Normalize a linear loop control with a nonzero stride: // stride > 0, either i < U or i <= U // stride < 0, either i > U or i >= U - InductionInfo* stride = a->op_a; - InductionInfo* lo_val = a->op_b; - InductionInfo* hi_val = b; - // Analyze the stride thoroughly, since its representation may be compound at this point. - InductionVarRange::Value v1 = InductionVarRange::GetMin(stride, nullptr); - InductionVarRange::Value v2 = InductionVarRange::GetMax(stride, nullptr); - if (v1.a_constant == 0 && v2.a_constant == 0 && v1.b_constant == v2.b_constant) { - const int32_t stride_value = v1.b_constant; - if ((stride_value > 0 && (cmp == kCondLT || cmp == kCondLE)) || - (stride_value < 0 && (cmp == kCondGT || cmp == kCondGE))) { - bool is_strict = cmp == kCondLT || cmp == kCondGT; - VisitTripCount(loop, lo_val, hi_val, stride, stride_value, type, is_strict); - } + if ((stride_value > 0 && (cmp == kCondLT || cmp == kCondLE)) || + (stride_value < 0 && (cmp == kCondGT || cmp == kCondGE))) { + VisitTripCount(loop, lower_expr, upper_expr, stride, stride_value, type, cmp); } } } void HInductionVarAnalysis::VisitTripCount(HLoopInformation* loop, - InductionInfo* lo_val, - InductionInfo* hi_val, + InductionInfo* lower_expr, + InductionInfo* upper_expr, InductionInfo* stride, - int32_t stride_value, + int64_t stride_value, Primitive::Type type, - bool is_strict) { + IfCondition cmp) { // Any loop of the general form: // // for (i = L; i <= U; i += S) // S > 0 @@ -586,29 +596,95 @@ void HInductionVarAnalysis::VisitTripCount(HLoopInformation* loop, // for (n = 0; n < TC; n++) // where TC = (U + S - L) / S // .. L + S * n .. // - // NOTE: The TC (trip-count) expression is only valid if the top-test path is taken at - // least once. Otherwise TC is 0. Also, the expression assumes the loop does not - // have any early-exits. Otherwise, TC is an upper bound. + // taking the following into consideration: // - bool cancels = is_strict && std::abs(stride_value) == 1; // compensation cancels conversion? + // (1) Using the same precision, the TC (trip-count) expression should be interpreted as + // an unsigned entity, for example, as in the following loop that uses the full range: + // for (int i = INT_MIN; i < INT_MAX; i++) // TC = UINT_MAX + // (2) The TC is only valid if the loop is taken, otherwise TC = 0, as in: + // for (int i = 12; i < U; i++) // TC = 0 when U >= 12 + // If this cannot be determined at compile-time, the TC is only valid within the + // loop-body proper, not the loop-header unless enforced with an explicit condition. + // (3) The TC is only valid if the loop is finite, otherwise TC has no value, as in: + // for (int i = 0; i <= U; i++) // TC = Inf when U = INT_MAX + // If this cannot be determined at compile-time, the TC is only valid when enforced + // with an explicit condition. + // (4) For loops which early-exits, the TC forms an upper bound, as in: + // for (int i = 0; i < 10 && ....; i++) // TC <= 10 + const bool is_taken = IsTaken(lower_expr, upper_expr, cmp); + const bool is_finite = IsFinite(upper_expr, stride_value, type, cmp); + const bool cancels = (cmp == kCondLT || cmp == kCondGT) && std::abs(stride_value) == 1; if (!cancels) { // Convert exclusive integral inequality into inclusive integral inequality, // viz. condition i < U is i <= U - 1 and condition i > U is i >= U + 1. - if (is_strict) { - const InductionOp op = stride_value > 0 ? kSub : kAdd; - hi_val = CreateInvariantOp(op, hi_val, CreateConstant(1, type)); + if (cmp == kCondLT) { + upper_expr = CreateInvariantOp(kSub, upper_expr, CreateConstant(1, type)); + } else if (cmp == kCondGT) { + upper_expr = CreateInvariantOp(kAdd, upper_expr, CreateConstant(1, type)); } // Compensate for stride. - hi_val = CreateInvariantOp(kAdd, hi_val, stride); + upper_expr = CreateInvariantOp(kAdd, upper_expr, stride); } - + InductionInfo* trip_count + = CreateInvariantOp(kDiv, CreateInvariantOp(kSub, upper_expr, lower_expr), stride); // Assign the trip-count expression to the loop control. Clients that use the information - // should be aware that due to the top-test assumption, the expression is only valid in the - // loop-body proper, and not yet in the loop-header. If the loop has any early exits, the - // trip-count forms a conservative upper bound on the number of loop iterations. - InductionInfo* trip_count = - CreateInvariantOp(kDiv, CreateInvariantOp(kSub, hi_val, lo_val), stride); - AssignInfo(loop, loop->GetHeader()->GetLastInstruction(), trip_count); + // should be aware that the expression is only valid under the conditions listed above. + InductionOp tcKind = kTripCountInBodyUnsafe; + if (is_taken && is_finite) { + tcKind = kTripCountInLoop; + } else if (is_finite) { + tcKind = kTripCountInBody; + } else if (is_taken) { + tcKind = kTripCountInLoopUnsafe; + } + AssignInfo(loop, loop->GetHeader()->GetLastInstruction(), CreateTripCount(tcKind, trip_count)); +} + +bool HInductionVarAnalysis::IsTaken(InductionInfo* lower_expr, + InductionInfo* upper_expr, + IfCondition cmp) { + int64_t lower_value; + int64_t upper_value; + if (IsIntAndGet(lower_expr, &lower_value) && IsIntAndGet(upper_expr, &upper_value)) { + switch (cmp) { + case kCondLT: return lower_value < upper_value; + case kCondLE: return lower_value <= upper_value; + case kCondGT: return lower_value > upper_value; + case kCondGE: return lower_value >= upper_value; + case kCondEQ: + case kCondNE: LOG(FATAL) << "CONDITION UNREACHABLE"; + } + } + return false; // not certain, may be untaken +} + +bool HInductionVarAnalysis::IsFinite(InductionInfo* upper_expr, + int64_t stride_value, + Primitive::Type type, + IfCondition cmp) { + const int64_t min = type == Primitive::kPrimInt + ? std::numeric_limits<int32_t>::min() + : std::numeric_limits<int64_t>::min(); + const int64_t max = type == Primitive::kPrimInt + ? std::numeric_limits<int32_t>::max() + : std::numeric_limits<int64_t>::max(); + // Some rules under which it is certain at compile-time that the loop is finite. + int64_t value; + switch (cmp) { + case kCondLT: + return stride_value == 1 || + (IsIntAndGet(upper_expr, &value) && value <= (max - stride_value + 1)); + case kCondLE: + return (IsIntAndGet(upper_expr, &value) && value <= (max - stride_value)); + case kCondGT: + return stride_value == -1 || + (IsIntAndGet(upper_expr, &value) && value >= (min - stride_value - 1)); + case kCondGE: + return (IsIntAndGet(upper_expr, &value) && value >= (min - stride_value)); + case kCondEQ: + case kCondNE: LOG(FATAL) << "CONDITION UNREACHABLE"; + } + return false; // not certain, may be infinite } void HInductionVarAnalysis::AssignInfo(HLoopInformation* loop, @@ -618,7 +694,8 @@ void HInductionVarAnalysis::AssignInfo(HLoopInformation* loop, if (it == induction_.end()) { it = induction_.Put(loop, ArenaSafeMap<HInstruction*, InductionInfo*>( - std::less<HInstruction*>(), graph_->GetArena()->Adapter())); + std::less<HInstruction*>(), + graph_->GetArena()->Adapter(kArenaAllocInductionVarAnalysis))); } it->second.Put(instruction, info); } @@ -725,13 +802,22 @@ bool HInductionVarAnalysis::InductionEqual(InductionInfo* info1, } bool HInductionVarAnalysis::IsIntAndGet(InductionInfo* info, int64_t* value) { - if (info != nullptr && info->induction_class == kInvariant && info->operation == kFetch) { - DCHECK(info->fetch); - if (info->fetch->IsIntConstant()) { - *value = info->fetch->AsIntConstant()->GetValue(); - return true; - } else if (info->fetch->IsLongConstant()) { - *value = info->fetch->AsLongConstant()->GetValue(); + if (info != nullptr && info->induction_class == kInvariant) { + // A direct constant fetch. + if (info->operation == kFetch) { + DCHECK(info->fetch); + if (info->fetch->IsIntConstant()) { + *value = info->fetch->AsIntConstant()->GetValue(); + return true; + } else if (info->fetch->IsLongConstant()) { + *value = info->fetch->AsLongConstant()->GetValue(); + return true; + } + } + // Use range analysis to resolve compound values. + int32_t range_value; + if (InductionVarRange::GetConstant(info, &range_value)) { + *value = range_value; return true; } } @@ -759,6 +845,10 @@ std::string HInductionVarAnalysis::InductionToString(InductionInfo* info) { inv += std::to_string(info->fetch->GetId()) + ":" + info->fetch->DebugName(); } break; + case kTripCountInLoop: inv += "TC-loop:"; break; + case kTripCountInBody: inv += "TC-body:"; break; + case kTripCountInLoopUnsafe: inv += "TC-loop-unsafe:"; break; + case kTripCountInBodyUnsafe: inv += "TC-body-unsafe:"; break; } inv += InductionToString(info->op_b); return inv + ")"; diff --git a/compiler/optimizing/induction_var_analysis.h b/compiler/optimizing/induction_var_analysis.h index 8eccf925c1..7ab80cd676 100644 --- a/compiler/optimizing/induction_var_analysis.h +++ b/compiler/optimizing/induction_var_analysis.h @@ -56,13 +56,20 @@ class HInductionVarAnalysis : public HOptimization { }; enum InductionOp { - kNop, // no-operation: a true induction + // No-operation: a true induction. + kNop, + // Various invariant operations. kAdd, kSub, kNeg, kMul, kDiv, - kFetch + kFetch, + // Trip counts (valid in full loop or only body proper; unsafe implies loop may be infinite). + kTripCountInLoop, + kTripCountInBody, + kTripCountInLoopUnsafe, + kTripCountInBodyUnsafe }; /** @@ -77,8 +84,10 @@ class HInductionVarAnalysis : public HOptimization { * nop: a, then defined by b * (4) periodic * nop: a, then defined by b (repeated when exhausted) + * (5) trip-count: + * tc: defined by b */ - struct InductionInfo : public ArenaObject<kArenaAllocMisc> { + struct InductionInfo : public ArenaObject<kArenaAllocInductionVarAnalysis> { InductionInfo(InductionClass ic, InductionOp op, InductionInfo* a, @@ -110,6 +119,10 @@ class HInductionVarAnalysis : public HOptimization { return new (graph_->GetArena()) InductionInfo(kInvariant, kFetch, nullptr, nullptr, f); } + InductionInfo* CreateTripCount(InductionOp op, InductionInfo* b) { + return new (graph_->GetArena()) InductionInfo(kInvariant, op, nullptr, b, nullptr); + } + InductionInfo* CreateInduction(InductionClass ic, InductionInfo* a, InductionInfo* b) { DCHECK(a != nullptr && b != nullptr); return new (graph_->GetArena()) InductionInfo(ic, kNop, a, b, nullptr); @@ -121,26 +134,27 @@ class HInductionVarAnalysis : public HOptimization { uint32_t VisitDescendant(HLoopInformation* loop, HInstruction* instruction); void ClassifyTrivial(HLoopInformation* loop, HInstruction* instruction); void ClassifyNonTrivial(HLoopInformation* loop); + InductionInfo* RotatePeriodicInduction(InductionInfo* induction, InductionInfo* last); // Transfer operations. - InductionInfo* TransferPhi(InductionInfo* a, InductionInfo* b); + InductionInfo* TransferPhi(HLoopInformation* loop, HInstruction* phi, size_t input_index); InductionInfo* TransferAddSub(InductionInfo* a, InductionInfo* b, InductionOp op); InductionInfo* TransferMul(InductionInfo* a, InductionInfo* b); InductionInfo* TransferShl(InductionInfo* a, InductionInfo* b, Primitive::Type type); InductionInfo* TransferNeg(InductionInfo* a); // Solvers. - InductionInfo* SolvePhi(HLoopInformation* loop, - HInstruction* phi, - HInstruction* instruction); + InductionInfo* SolvePhi(HInstruction* phi, size_t input_index); + InductionInfo* SolvePhiAllInputs(HLoopInformation* loop, + HInstruction* entry_phi, + HInstruction* phi); InductionInfo* SolveAddSub(HLoopInformation* loop, - HInstruction* phi, + HInstruction* entry_phi, HInstruction* instruction, HInstruction* x, HInstruction* y, InductionOp op, bool is_first_call); - InductionInfo* RotatePeriodicInduction(InductionInfo* induction, InductionInfo* last); // Trip count information. void VisitControl(HLoopInformation* loop); @@ -150,12 +164,17 @@ class HInductionVarAnalysis : public HOptimization { Primitive::Type type, IfCondition cmp); void VisitTripCount(HLoopInformation* loop, - InductionInfo* lo_val, - InductionInfo* hi_val, + InductionInfo* lower_expr, + InductionInfo* upper_expr, InductionInfo* stride, - int32_t stride_value, + int64_t stride_value, Primitive::Type type, - bool is_strict); + IfCondition cmp); + bool IsTaken(InductionInfo* lower_expr, InductionInfo* upper_expr, IfCondition cmp); + bool IsFinite(InductionInfo* upper_expr, + int64_t stride_value, + Primitive::Type type, + IfCondition cmp); // Assign and lookup. void AssignInfo(HLoopInformation* loop, HInstruction* instruction, InductionInfo* info); diff --git a/compiler/optimizing/induction_var_analysis_test.cc b/compiler/optimizing/induction_var_analysis_test.cc index fca1ca55e5..20492e7152 100644 --- a/compiler/optimizing/induction_var_analysis_test.cc +++ b/compiler/optimizing/induction_var_analysis_test.cc @@ -20,6 +20,7 @@ #include "builder.h" #include "gtest/gtest.h" #include "induction_var_analysis.h" +#include "induction_var_range.h" #include "nodes.h" #include "optimizing_unit_test.h" @@ -233,7 +234,8 @@ TEST_F(InductionVarAnalysisTest, FindBasicInduction) { EXPECT_STREQ("((1) * i + (1))", GetInductionInfo(increment_[0], 0).c_str()); // Trip-count. - EXPECT_STREQ("(100)", GetInductionInfo(loop_header_[0]->GetLastInstruction(), 0).c_str()); + EXPECT_STREQ("(TC-loop:(100))", + GetInductionInfo(loop_header_[0]->GetLastInstruction(), 0).c_str()); } TEST_F(InductionVarAnalysisTest, FindDerivedInduction) { @@ -388,7 +390,7 @@ TEST_F(InductionVarAnalysisTest, FindSecondOrderWrapAroundInduction) { HInstruction* store = InsertArrayStore(induc_, 0); InsertLocalStore(induc_, InsertLocalLoad(tmp_, 0), 0); HInstruction *sub = InsertInstruction( - new (&allocator_) HSub(Primitive::kPrimInt, constant100_, InsertLocalLoad(basic_[0], 0)), 0); + new (&allocator_) HSub(Primitive::kPrimInt, constant100_, InsertLocalLoad(basic_[0], 0)), 0); InsertLocalStore(tmp_, sub, 0); PerformInductionVarAnalysis(); @@ -412,16 +414,16 @@ TEST_F(InductionVarAnalysisTest, FindWrapAroundDerivedInduction) { new (&allocator_) HAdd(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant100_), 0); InsertLocalStore(tmp_, add, 0); HInstruction *sub = InsertInstruction( - new (&allocator_) HSub(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant100_), 0); + new (&allocator_) HSub(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant100_), 0); InsertLocalStore(tmp_, sub, 0); HInstruction *mul = InsertInstruction( - new (&allocator_) HMul(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant100_), 0); + new (&allocator_) HMul(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant100_), 0); InsertLocalStore(tmp_, mul, 0); HInstruction *shl = InsertInstruction( - new (&allocator_) HShl(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant1_), 0); + new (&allocator_) HShl(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant1_), 0); InsertLocalStore(tmp_, shl, 0); HInstruction *neg = InsertInstruction( - new (&allocator_) HNeg(Primitive::kPrimInt, InsertLocalLoad(induc_, 0)), 0); + new (&allocator_) HNeg(Primitive::kPrimInt, InsertLocalLoad(induc_, 0)), 0); InsertLocalStore(tmp_, neg, 0); InsertLocalStore( induc_, @@ -471,7 +473,7 @@ TEST_F(InductionVarAnalysisTest, FindIdiomaticPeriodicInduction) { BuildLoopNest(1); HInstruction* store = InsertArrayStore(induc_, 0); HInstruction *sub = InsertInstruction( - new (&allocator_) HSub(Primitive::kPrimInt, constant1_, InsertLocalLoad(induc_, 0)), 0); + new (&allocator_) HSub(Primitive::kPrimInt, constant1_, InsertLocalLoad(induc_, 0)), 0); InsertLocalStore(induc_, sub, 0); PerformInductionVarAnalysis(); @@ -497,19 +499,19 @@ TEST_F(InductionVarAnalysisTest, FindDerivedPeriodicInduction) { HSub(Primitive::kPrimInt, constant1_, InsertLocalLoad(induc_, 0)), 0), 0); // Derived expressions. HInstruction *add = InsertInstruction( - new (&allocator_) HAdd(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant100_), 0); + new (&allocator_) HAdd(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant100_), 0); InsertLocalStore(tmp_, add, 0); HInstruction *sub = InsertInstruction( - new (&allocator_) HSub(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant100_), 0); + new (&allocator_) HSub(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant100_), 0); InsertLocalStore(tmp_, sub, 0); HInstruction *mul = InsertInstruction( - new (&allocator_) HMul(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant100_), 0); + new (&allocator_) HMul(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant100_), 0); InsertLocalStore(tmp_, mul, 0); HInstruction *shl = InsertInstruction( - new (&allocator_) HShl(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant1_), 0); + new (&allocator_) HShl(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant1_), 0); InsertLocalStore(tmp_, shl, 0); HInstruction *neg = InsertInstruction( - new (&allocator_) HNeg(Primitive::kPrimInt, InsertLocalLoad(induc_, 0)), 0); + new (&allocator_) HNeg(Primitive::kPrimInt, InsertLocalLoad(induc_, 0)), 0); InsertLocalStore(tmp_, neg, 0); PerformInductionVarAnalysis(); @@ -520,6 +522,36 @@ TEST_F(InductionVarAnalysisTest, FindDerivedPeriodicInduction) { EXPECT_STREQ("periodic(( - (1)), (0))", GetInductionInfo(neg, 0).c_str()); } +TEST_F(InductionVarAnalysisTest, FindRange) { + // Setup: + // for (int i = 0; i < 100; i++) { + // k = i << 1; + // k = k + 1; + // a[k] = 0; + // } + BuildLoopNest(1); + HInstruction *shl = InsertInstruction( + new (&allocator_) HShl(Primitive::kPrimInt, InsertLocalLoad(basic_[0], 0), constant1_), 0); + InsertLocalStore(induc_, shl, 0); + HInstruction *add = InsertInstruction( + new (&allocator_) HAdd(Primitive::kPrimInt, InsertLocalLoad(induc_, 0), constant1_), 0); + InsertLocalStore(induc_, add, 0); + HInstruction* store = InsertArrayStore(induc_, 0); + PerformInductionVarAnalysis(); + + EXPECT_STREQ("((2) * i + (1))", GetInductionInfo(store->InputAt(1), 0).c_str()); + + InductionVarRange range(iva_); + InductionVarRange::Value v_min = range.GetMinInduction(store, store->InputAt(1)); + InductionVarRange::Value v_max = range.GetMaxInduction(store, store->InputAt(1)); + ASSERT_TRUE(v_min.is_known); + EXPECT_EQ(0, v_min.a_constant); + EXPECT_EQ(1, v_min.b_constant); + ASSERT_TRUE(v_max.is_known); + EXPECT_EQ(0, v_max.a_constant); + EXPECT_EQ(199, v_max.b_constant); +} + TEST_F(InductionVarAnalysisTest, FindDeepLoopInduction) { // Setup: // k = 0; @@ -550,7 +582,8 @@ TEST_F(InductionVarAnalysisTest, FindDeepLoopInduction) { } EXPECT_STREQ("((1) * i + (1))", GetInductionInfo(increment_[d], d).c_str()); // Trip-count. - EXPECT_STREQ("(100)", GetInductionInfo(loop_header_[d]->GetLastInstruction(), d).c_str()); + EXPECT_STREQ("(TC-loop:(100))", + GetInductionInfo(loop_header_[d]->GetLastInstruction(), d).c_str()); } } diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc index 486e904bd1..db12819060 100644 --- a/compiler/optimizing/induction_var_range.cc +++ b/compiler/optimizing/induction_var_range.cc @@ -14,120 +14,109 @@ * limitations under the License. */ -#include <limits.h> - #include "induction_var_range.h" -namespace art { +#include <limits> -static bool IsValidConstant32(int32_t c) { - return INT_MIN < c && c < INT_MAX; -} +namespace art { -static bool IsValidConstant64(int64_t c) { - return INT_MIN < c && c < INT_MAX; +/** Returns true if 64-bit constant fits in 32-bit constant. */ +static bool CanLongValueFitIntoInt(int64_t c) { + return std::numeric_limits<int32_t>::min() <= c && c <= std::numeric_limits<int32_t>::max(); } -/** Returns true if 32-bit addition can be done safely (and is not an unknown range). */ +/** Returns true if 32-bit addition can be done safely. */ static bool IsSafeAdd(int32_t c1, int32_t c2) { - if (IsValidConstant32(c1) && IsValidConstant32(c2)) { - return IsValidConstant64(static_cast<int64_t>(c1) + static_cast<int64_t>(c2)); - } - return false; + return CanLongValueFitIntoInt(static_cast<int64_t>(c1) + static_cast<int64_t>(c2)); } -/** Returns true if 32-bit subtraction can be done safely (and is not an unknown range). */ +/** Returns true if 32-bit subtraction can be done safely. */ static bool IsSafeSub(int32_t c1, int32_t c2) { - if (IsValidConstant32(c1) && IsValidConstant32(c2)) { - return IsValidConstant64(static_cast<int64_t>(c1) - static_cast<int64_t>(c2)); - } - return false; + return CanLongValueFitIntoInt(static_cast<int64_t>(c1) - static_cast<int64_t>(c2)); } -/** Returns true if 32-bit multiplication can be done safely (and is not an unknown range). */ +/** Returns true if 32-bit multiplication can be done safely. */ static bool IsSafeMul(int32_t c1, int32_t c2) { - if (IsValidConstant32(c1) && IsValidConstant32(c2)) { - return IsValidConstant64(static_cast<int64_t>(c1) * static_cast<int64_t>(c2)); - } - return false; + return CanLongValueFitIntoInt(static_cast<int64_t>(c1) * static_cast<int64_t>(c2)); } -/** Returns true if 32-bit division can be done safely (and is not an unknown range). */ +/** Returns true if 32-bit division can be done safely. */ static bool IsSafeDiv(int32_t c1, int32_t c2) { - if (IsValidConstant32(c1) && IsValidConstant32(c2) && c2 != 0) { - return IsValidConstant64(static_cast<int64_t>(c1) / static_cast<int64_t>(c2)); - } - return false; + return c2 != 0 && CanLongValueFitIntoInt(static_cast<int64_t>(c1) / static_cast<int64_t>(c2)); } -/** Returns true for 32/64-bit integral constant within known range. */ +/** Returns true for 32/64-bit integral constant. */ static bool IsIntAndGet(HInstruction* instruction, int32_t* value) { if (instruction->IsIntConstant()) { - const int32_t c = instruction->AsIntConstant()->GetValue(); - if (IsValidConstant32(c)) { - *value = c; - return true; - } + *value = instruction->AsIntConstant()->GetValue(); + return true; } else if (instruction->IsLongConstant()) { const int64_t c = instruction->AsLongConstant()->GetValue(); - if (IsValidConstant64(c)) { - *value = c; + if (CanLongValueFitIntoInt(c)) { + *value = static_cast<int32_t>(c); return true; } } return false; } +/** + * An upper bound a * (length / a) + b, where a > 0, can be conservatively rewritten as length + b + * because length >= 0 is true. This makes it more likely the bound is useful to clients. + */ +static InductionVarRange::Value SimplifyMax(InductionVarRange::Value v) { + int32_t value; + if (v.a_constant > 1 && + v.instruction->IsDiv() && + v.instruction->InputAt(0)->IsArrayLength() && + IsIntAndGet(v.instruction->InputAt(1), &value) && v.a_constant == value) { + return InductionVarRange::Value(v.instruction->InputAt(0), 1, v.b_constant); + } + return v; +} + // // Public class methods. // InductionVarRange::InductionVarRange(HInductionVarAnalysis* induction_analysis) : induction_analysis_(induction_analysis) { + DCHECK(induction_analysis != nullptr); } InductionVarRange::Value InductionVarRange::GetMinInduction(HInstruction* context, HInstruction* instruction) { - HLoopInformation* loop = context->GetBlock()->GetLoopInformation(); - if (loop != nullptr && induction_analysis_ != nullptr) { - return GetMin(induction_analysis_->LookupInfo(loop, instruction), GetTripCount(loop, context)); - } - return Value(INT_MIN); + return GetInduction(context, instruction, /* is_min */ true); } InductionVarRange::Value InductionVarRange::GetMaxInduction(HInstruction* context, HInstruction* instruction) { - HLoopInformation* loop = context->GetBlock()->GetLoopInformation(); - if (loop != nullptr && induction_analysis_ != nullptr) { - return GetMax(induction_analysis_->LookupInfo(loop, instruction), GetTripCount(loop, context)); - } - return Value(INT_MAX); + return SimplifyMax(GetInduction(context, instruction, /* is_min */ false)); } // // Private class methods. // -HInductionVarAnalysis::InductionInfo* InductionVarRange::GetTripCount(HLoopInformation* loop, - HInstruction* context) { - // The trip-count expression is only valid when the top-test is taken at least once, - // that means, when the analyzed context appears outside the loop header itself. - // Early-exit loops are okay, since in those cases, the trip-count is conservative. - if (context->GetBlock() != loop->GetHeader()) { - HInductionVarAnalysis::InductionInfo* trip = - induction_analysis_->LookupInfo(loop, loop->GetHeader()->GetLastInstruction()); - if (trip != nullptr) { - // Wrap the trip-count representation in its own unusual NOP node, so that range analysis - // is able to determine the [0, TC - 1] interval without having to construct constants. - return induction_analysis_->CreateInvariantOp(HInductionVarAnalysis::kNop, trip, trip); - } +InductionVarRange::Value InductionVarRange::GetInduction(HInstruction* context, + HInstruction* instruction, + bool is_min) { + HLoopInformation* loop = context->GetBlock()->GetLoopInformation(); // closest enveloping loop + if (loop != nullptr) { + HBasicBlock* header = loop->GetHeader(); + bool in_body = context->GetBlock() != header; + return GetVal(induction_analysis_->LookupInfo(loop, instruction), + induction_analysis_->LookupInfo(loop, header->GetLastInstruction()), + in_body, + is_min); } - return nullptr; + return Value(); } InductionVarRange::Value InductionVarRange::GetFetch(HInstruction* instruction, HInductionVarAnalysis::InductionInfo* trip, - int32_t fail_value) { + bool in_body, + bool is_min) { // Detect constants and chase the fetch a bit deeper into the HIR tree, so that it becomes // more likely range analysis will compare the same instructions as terminal nodes. int32_t value; @@ -135,157 +124,147 @@ InductionVarRange::Value InductionVarRange::GetFetch(HInstruction* instruction, return Value(value); } else if (instruction->IsAdd()) { if (IsIntAndGet(instruction->InputAt(0), &value)) { - return AddValue(Value(value), - GetFetch(instruction->InputAt(1), trip, fail_value), fail_value); + return AddValue(Value(value), GetFetch(instruction->InputAt(1), trip, in_body, is_min)); } else if (IsIntAndGet(instruction->InputAt(1), &value)) { - return AddValue(GetFetch(instruction->InputAt(0), trip, fail_value), - Value(value), fail_value); + return AddValue(GetFetch(instruction->InputAt(0), trip, in_body, is_min), Value(value)); } - } else if (fail_value < 0) { - // Special case: within the loop-body, minimum of trip-count is 1. - if (trip != nullptr && instruction == trip->op_b->fetch) { + } else if (is_min) { + // Special case for finding minimum: minimum of trip-count in loop-body is 1. + if (trip != nullptr && in_body && instruction == trip->op_b->fetch) { return Value(1); } } return Value(instruction, 1, 0); } -InductionVarRange::Value InductionVarRange::GetMin(HInductionVarAnalysis::InductionInfo* info, - HInductionVarAnalysis::InductionInfo* trip) { - if (info != nullptr) { - switch (info->induction_class) { - case HInductionVarAnalysis::kInvariant: - // Invariants. - switch (info->operation) { - case HInductionVarAnalysis::kNop: // normalized: 0 - DCHECK_EQ(info->op_a, info->op_b); - return Value(0); - case HInductionVarAnalysis::kAdd: - return AddValue(GetMin(info->op_a, trip), GetMin(info->op_b, trip), INT_MIN); - case HInductionVarAnalysis::kSub: // second max! - return SubValue(GetMin(info->op_a, trip), GetMax(info->op_b, trip), INT_MIN); - case HInductionVarAnalysis::kNeg: // second max! - return SubValue(Value(0), GetMax(info->op_b, trip), INT_MIN); - case HInductionVarAnalysis::kMul: - return GetMul(info->op_a, info->op_b, trip, INT_MIN); - case HInductionVarAnalysis::kDiv: - return GetDiv(info->op_a, info->op_b, trip, INT_MIN); - case HInductionVarAnalysis::kFetch: - return GetFetch(info->fetch, trip, INT_MIN); - } - break; - case HInductionVarAnalysis::kLinear: - // Minimum over linear induction a * i + b, for normalized 0 <= i < TC. - return AddValue(GetMul(info->op_a, trip, trip, INT_MIN), - GetMin(info->op_b, trip), INT_MIN); - case HInductionVarAnalysis::kWrapAround: - case HInductionVarAnalysis::kPeriodic: - // Minimum over all values in the wrap-around/periodic. - return MinValue(GetMin(info->op_a, trip), GetMin(info->op_b, trip)); - } - } - return Value(INT_MIN); -} - -InductionVarRange::Value InductionVarRange::GetMax(HInductionVarAnalysis::InductionInfo* info, - HInductionVarAnalysis::InductionInfo* trip) { +InductionVarRange::Value InductionVarRange::GetVal(HInductionVarAnalysis::InductionInfo* info, + HInductionVarAnalysis::InductionInfo* trip, + bool in_body, + bool is_min) { if (info != nullptr) { switch (info->induction_class) { case HInductionVarAnalysis::kInvariant: // Invariants. switch (info->operation) { - case HInductionVarAnalysis::kNop: // normalized: TC - 1 - DCHECK_EQ(info->op_a, info->op_b); - return SubValue(GetMax(info->op_b, trip), Value(1), INT_MAX); case HInductionVarAnalysis::kAdd: - return AddValue(GetMax(info->op_a, trip), GetMax(info->op_b, trip), INT_MAX); - case HInductionVarAnalysis::kSub: // second min! - return SubValue(GetMax(info->op_a, trip), GetMin(info->op_b, trip), INT_MAX); - case HInductionVarAnalysis::kNeg: // second min! - return SubValue(Value(0), GetMin(info->op_b, trip), INT_MAX); + return AddValue(GetVal(info->op_a, trip, in_body, is_min), + GetVal(info->op_b, trip, in_body, is_min)); + case HInductionVarAnalysis::kSub: // second reversed! + return SubValue(GetVal(info->op_a, trip, in_body, is_min), + GetVal(info->op_b, trip, in_body, !is_min)); + case HInductionVarAnalysis::kNeg: // second reversed! + return SubValue(Value(0), + GetVal(info->op_b, trip, in_body, !is_min)); case HInductionVarAnalysis::kMul: - return GetMul(info->op_a, info->op_b, trip, INT_MAX); + return GetMul(info->op_a, info->op_b, trip, in_body, is_min); case HInductionVarAnalysis::kDiv: - return GetDiv(info->op_a, info->op_b, trip, INT_MAX); + return GetDiv(info->op_a, info->op_b, trip, in_body, is_min); case HInductionVarAnalysis::kFetch: - return GetFetch(info->fetch, trip, INT_MAX); + return GetFetch(info->fetch, trip, in_body, is_min); + case HInductionVarAnalysis::kTripCountInLoop: + if (!in_body) { + return is_min ? Value(0) + : GetVal(info->op_b, trip, in_body, is_min); // one extra! + } + FALLTHROUGH_INTENDED; + case HInductionVarAnalysis::kTripCountInBody: + if (in_body) { + return is_min ? Value(0) + : SubValue(GetVal(info->op_b, trip, in_body, is_min), Value(1)); + } + break; + default: + break; } break; case HInductionVarAnalysis::kLinear: - // Maximum over linear induction a * i + b, for normalized 0 <= i < TC. - return AddValue(GetMul(info->op_a, trip, trip, INT_MAX), - GetMax(info->op_b, trip), INT_MAX); + // Linear induction a * i + b, for normalized 0 <= i < TC. + return AddValue(GetMul(info->op_a, trip, trip, in_body, is_min), + GetVal(info->op_b, trip, in_body, is_min)); case HInductionVarAnalysis::kWrapAround: case HInductionVarAnalysis::kPeriodic: - // Maximum over all values in the wrap-around/periodic. - return MaxValue(GetMax(info->op_a, trip), GetMax(info->op_b, trip)); + // Merge values in the wrap-around/periodic. + return MergeVal(GetVal(info->op_a, trip, in_body, is_min), + GetVal(info->op_b, trip, in_body, is_min), is_min); } } - return Value(INT_MAX); + return Value(); } InductionVarRange::Value InductionVarRange::GetMul(HInductionVarAnalysis::InductionInfo* info1, HInductionVarAnalysis::InductionInfo* info2, HInductionVarAnalysis::InductionInfo* trip, - int32_t fail_value) { - Value v1_min = GetMin(info1, trip); - Value v1_max = GetMax(info1, trip); - Value v2_min = GetMin(info2, trip); - Value v2_max = GetMax(info2, trip); - if (v1_min.a_constant == 0 && v1_min.b_constant >= 0) { + bool in_body, + bool is_min) { + Value v1_min = GetVal(info1, trip, in_body, /* is_min */ true); + Value v1_max = GetVal(info1, trip, in_body, /* is_min */ false); + Value v2_min = GetVal(info2, trip, in_body, /* is_min */ true); + Value v2_max = GetVal(info2, trip, in_body, /* is_min */ false); + if (v1_min.is_known && v1_min.a_constant == 0 && v1_min.b_constant >= 0) { // Positive range vs. positive or negative range. - if (v2_min.a_constant == 0 && v2_min.b_constant >= 0) { - return (fail_value < 0) ? MulValue(v1_min, v2_min, fail_value) - : MulValue(v1_max, v2_max, fail_value); - } else if (v2_max.a_constant == 0 && v2_max.b_constant <= 0) { - return (fail_value < 0) ? MulValue(v1_max, v2_min, fail_value) - : MulValue(v1_min, v2_max, fail_value); + if (v2_min.is_known && v2_min.a_constant == 0 && v2_min.b_constant >= 0) { + return is_min ? MulValue(v1_min, v2_min) + : MulValue(v1_max, v2_max); + } else if (v2_max.is_known && v2_max.a_constant == 0 && v2_max.b_constant <= 0) { + return is_min ? MulValue(v1_max, v2_min) + : MulValue(v1_min, v2_max); } - } else if (v1_min.a_constant == 0 && v1_min.b_constant <= 0) { + } else if (v1_min.is_known && v1_min.a_constant == 0 && v1_min.b_constant <= 0) { // Negative range vs. positive or negative range. - if (v2_min.a_constant == 0 && v2_min.b_constant >= 0) { - return (fail_value < 0) ? MulValue(v1_min, v2_max, fail_value) - : MulValue(v1_max, v2_min, fail_value); - } else if (v2_max.a_constant == 0 && v2_max.b_constant <= 0) { - return (fail_value < 0) ? MulValue(v1_max, v2_max, fail_value) - : MulValue(v1_min, v2_min, fail_value); + if (v2_min.is_known && v2_min.a_constant == 0 && v2_min.b_constant >= 0) { + return is_min ? MulValue(v1_min, v2_max) + : MulValue(v1_max, v2_min); + } else if (v2_max.is_known && v2_max.a_constant == 0 && v2_max.b_constant <= 0) { + return is_min ? MulValue(v1_max, v2_max) + : MulValue(v1_min, v2_min); } } - return Value(fail_value); + return Value(); } InductionVarRange::Value InductionVarRange::GetDiv(HInductionVarAnalysis::InductionInfo* info1, HInductionVarAnalysis::InductionInfo* info2, HInductionVarAnalysis::InductionInfo* trip, - int32_t fail_value) { - Value v1_min = GetMin(info1, trip); - Value v1_max = GetMax(info1, trip); - Value v2_min = GetMin(info2, trip); - Value v2_max = GetMax(info2, trip); - if (v1_min.a_constant == 0 && v1_min.b_constant >= 0) { + bool in_body, + bool is_min) { + Value v1_min = GetVal(info1, trip, in_body, /* is_min */ true); + Value v1_max = GetVal(info1, trip, in_body, /* is_min */ false); + Value v2_min = GetVal(info2, trip, in_body, /* is_min */ true); + Value v2_max = GetVal(info2, trip, in_body, /* is_min */ false); + if (v1_min.is_known && v1_min.a_constant == 0 && v1_min.b_constant >= 0) { // Positive range vs. positive or negative range. - if (v2_min.a_constant == 0 && v2_min.b_constant >= 0) { - return (fail_value < 0) ? DivValue(v1_min, v2_max, fail_value) - : DivValue(v1_max, v2_min, fail_value); - } else if (v2_max.a_constant == 0 && v2_max.b_constant <= 0) { - return (fail_value < 0) ? DivValue(v1_max, v2_max, fail_value) - : DivValue(v1_min, v2_min, fail_value); + if (v2_min.is_known && v2_min.a_constant == 0 && v2_min.b_constant >= 0) { + return is_min ? DivValue(v1_min, v2_max) + : DivValue(v1_max, v2_min); + } else if (v2_max.is_known && v2_max.a_constant == 0 && v2_max.b_constant <= 0) { + return is_min ? DivValue(v1_max, v2_max) + : DivValue(v1_min, v2_min); } - } else if (v1_min.a_constant == 0 && v1_min.b_constant <= 0) { + } else if (v1_min.is_known && v1_min.a_constant == 0 && v1_min.b_constant <= 0) { // Negative range vs. positive or negative range. - if (v2_min.a_constant == 0 && v2_min.b_constant >= 0) { - return (fail_value < 0) ? DivValue(v1_min, v2_min, fail_value) - : DivValue(v1_max, v2_max, fail_value); - } else if (v2_max.a_constant == 0 && v2_max.b_constant <= 0) { - return (fail_value < 0) ? DivValue(v1_max, v2_min, fail_value) - : DivValue(v1_min, v2_max, fail_value); + if (v2_min.is_known && v2_min.a_constant == 0 && v2_min.b_constant >= 0) { + return is_min ? DivValue(v1_min, v2_min) + : DivValue(v1_max, v2_max); + } else if (v2_max.is_known && v2_max.a_constant == 0 && v2_max.b_constant <= 0) { + return is_min ? DivValue(v1_max, v2_min) + : DivValue(v1_min, v2_max); } } - return Value(fail_value); + return Value(); } -InductionVarRange::Value InductionVarRange::AddValue(Value v1, Value v2, int32_t fail_value) { - if (IsSafeAdd(v1.b_constant, v2.b_constant)) { +bool InductionVarRange::GetConstant(HInductionVarAnalysis::InductionInfo* info, int32_t *value) { + Value v_min = GetVal(info, nullptr, false, /* is_min */ true); + Value v_max = GetVal(info, nullptr, false, /* is_min */ false); + if (v_min.a_constant == 0 && v_max.a_constant == 0 && v_min.b_constant == v_max.b_constant) { + *value = v_min.b_constant; + return true; + } + return false; +} + +InductionVarRange::Value InductionVarRange::AddValue(Value v1, Value v2) { + if (v1.is_known && v2.is_known && IsSafeAdd(v1.b_constant, v2.b_constant)) { const int32_t b = v1.b_constant + v2.b_constant; if (v1.a_constant == 0) { return Value(v2.instruction, v2.a_constant, b); @@ -295,11 +274,11 @@ InductionVarRange::Value InductionVarRange::AddValue(Value v1, Value v2, int32_t return Value(v1.instruction, v1.a_constant + v2.a_constant, b); } } - return Value(fail_value); + return Value(); } -InductionVarRange::Value InductionVarRange::SubValue(Value v1, Value v2, int32_t fail_value) { - if (IsSafeSub(v1.b_constant, v2.b_constant)) { +InductionVarRange::Value InductionVarRange::SubValue(Value v1, Value v2) { + if (v1.is_known && v2.is_known && IsSafeSub(v1.b_constant, v2.b_constant)) { const int32_t b = v1.b_constant - v2.b_constant; if (v1.a_constant == 0 && IsSafeSub(0, v2.a_constant)) { return Value(v2.instruction, -v2.a_constant, b); @@ -309,43 +288,42 @@ InductionVarRange::Value InductionVarRange::SubValue(Value v1, Value v2, int32_t return Value(v1.instruction, v1.a_constant - v2.a_constant, b); } } - return Value(fail_value); + return Value(); } -InductionVarRange::Value InductionVarRange::MulValue(Value v1, Value v2, int32_t fail_value) { - if (v1.a_constant == 0) { - if (IsSafeMul(v1.b_constant, v2.a_constant) && IsSafeMul(v1.b_constant, v2.b_constant)) { - return Value(v2.instruction, v1.b_constant * v2.a_constant, v1.b_constant * v2.b_constant); - } - } else if (v2.a_constant == 0) { - if (IsSafeMul(v1.a_constant, v2.b_constant) && IsSafeMul(v1.b_constant, v2.b_constant)) { - return Value(v1.instruction, v1.a_constant * v2.b_constant, v1.b_constant * v2.b_constant); +InductionVarRange::Value InductionVarRange::MulValue(Value v1, Value v2) { + if (v1.is_known && v2.is_known) { + if (v1.a_constant == 0) { + if (IsSafeMul(v1.b_constant, v2.a_constant) && IsSafeMul(v1.b_constant, v2.b_constant)) { + return Value(v2.instruction, v1.b_constant * v2.a_constant, v1.b_constant * v2.b_constant); + } + } else if (v2.a_constant == 0) { + if (IsSafeMul(v1.a_constant, v2.b_constant) && IsSafeMul(v1.b_constant, v2.b_constant)) { + return Value(v1.instruction, v1.a_constant * v2.b_constant, v1.b_constant * v2.b_constant); + } } } - return Value(fail_value); + return Value(); } -InductionVarRange::Value InductionVarRange::DivValue(Value v1, Value v2, int32_t fail_value) { - if (v1.a_constant == 0 && v2.a_constant == 0) { +InductionVarRange::Value InductionVarRange::DivValue(Value v1, Value v2) { + if (v1.is_known && v2.is_known && v1.a_constant == 0 && v2.a_constant == 0) { if (IsSafeDiv(v1.b_constant, v2.b_constant)) { return Value(v1.b_constant / v2.b_constant); } } - return Value(fail_value); -} - -InductionVarRange::Value InductionVarRange::MinValue(Value v1, Value v2) { - if (v1.instruction == v2.instruction && v1.a_constant == v2.a_constant) { - return Value(v1.instruction, v1.a_constant, std::min(v1.b_constant, v2.b_constant)); - } - return Value(INT_MIN); + return Value(); } -InductionVarRange::Value InductionVarRange::MaxValue(Value v1, Value v2) { - if (v1.instruction == v2.instruction && v1.a_constant == v2.a_constant) { - return Value(v1.instruction, v1.a_constant, std::max(v1.b_constant, v2.b_constant)); +InductionVarRange::Value InductionVarRange::MergeVal(Value v1, Value v2, bool is_min) { + if (v1.is_known && v2.is_known) { + if (v1.instruction == v2.instruction && v1.a_constant == v2.a_constant) { + return Value(v1.instruction, v1.a_constant, + is_min ? std::min(v1.b_constant, v2.b_constant) + : std::max(v1.b_constant, v2.b_constant)); + } } - return Value(INT_MAX); + return Value(); } } // namespace art diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h index e002e5ff6c..dbdd2eedac 100644 --- a/compiler/optimizing/induction_var_range.h +++ b/compiler/optimizing/induction_var_range.h @@ -22,30 +22,36 @@ namespace art { /** - * This class implements induction variable based range analysis on expressions within loops. - * It takes the results of induction variable analysis in the constructor and provides a public - * API to obtain a conservative lower and upper bound value on each instruction in the HIR. + * This class implements range analysis on expressions within loops. It takes the results + * of induction variable analysis in the constructor and provides a public API to obtain + * a conservative lower and upper bound value on each instruction in the HIR. * - * For example, given a linear induction 2 * i + x where 0 <= i <= 10, range analysis yields lower - * bound value x and upper bound value x + 20 for the expression, thus, the range [x, x + 20]. + * The range analysis is done with a combination of symbolic and partial integral evaluation + * of expressions. The analysis avoids complications with wrap-around arithmetic on the integral + * parts but all clients should be aware that wrap-around may occur on any of the symbolic parts. + * For example, given a known range for [0,100] for i, the evaluation yields range [-100,100] + * for expression -2*i+100, which is exact, and range [x,x+100] for expression i+x, which may + * wrap-around anywhere in the range depending on the actual value of x. */ class InductionVarRange { public: /* * A value that can be represented as "a * instruction + b" for 32-bit constants, where - * Value(INT_MIN) and Value(INT_MAX) denote an unknown lower and upper bound, respectively. - * Although range analysis could yield more complex values, the format is sufficiently powerful - * to represent useful cases and feeds directly into optimizations like bounds check elimination. + * Value() denotes an unknown lower and upper bound. Although range analysis could yield + * more complex values, the format is sufficiently powerful to represent useful cases + * and feeds directly into optimizations like bounds check elimination. */ struct Value { + Value() : instruction(nullptr), a_constant(0), b_constant(0), is_known(false) {} Value(HInstruction* i, int32_t a, int32_t b) - : instruction(a != 0 ? i : nullptr), - a_constant(a), - b_constant(b) {} + : instruction(a != 0 ? i : nullptr), a_constant(a), b_constant(b), is_known(true) {} explicit Value(int32_t b) : Value(nullptr, 0, b) {} + // Representation as: a_constant x instruction + b_constant. HInstruction* instruction; int32_t a_constant; int32_t b_constant; + // If true, represented by prior fields. Otherwise unknown value. + bool is_known; }; explicit InductionVarRange(HInductionVarAnalysis* induction); @@ -67,32 +73,34 @@ class InductionVarRange { // Private helper methods. // - HInductionVarAnalysis::InductionInfo* GetTripCount(HLoopInformation* loop, - HInstruction* context); + Value GetInduction(HInstruction* context, HInstruction* instruction, bool is_min); static Value GetFetch(HInstruction* instruction, HInductionVarAnalysis::InductionInfo* trip, - int32_t fail_value); - - static Value GetMin(HInductionVarAnalysis::InductionInfo* info, - HInductionVarAnalysis::InductionInfo* trip); - static Value GetMax(HInductionVarAnalysis::InductionInfo* info, - HInductionVarAnalysis::InductionInfo* trip); + bool in_body, + bool is_min); + static Value GetVal(HInductionVarAnalysis::InductionInfo* info, + HInductionVarAnalysis::InductionInfo* trip, + bool in_body, + bool is_min); static Value GetMul(HInductionVarAnalysis::InductionInfo* info1, HInductionVarAnalysis::InductionInfo* info2, HInductionVarAnalysis::InductionInfo* trip, - int32_t fail_value); + bool in_body, + bool is_min); static Value GetDiv(HInductionVarAnalysis::InductionInfo* info1, HInductionVarAnalysis::InductionInfo* info2, HInductionVarAnalysis::InductionInfo* trip, - int32_t fail_value); - - static Value AddValue(Value v1, Value v2, int32_t fail_value); - static Value SubValue(Value v1, Value v2, int32_t fail_value); - static Value MulValue(Value v1, Value v2, int32_t fail_value); - static Value DivValue(Value v1, Value v2, int32_t fail_value); - static Value MinValue(Value v1, Value v2); - static Value MaxValue(Value v1, Value v2); + bool in_body, + bool is_min); + + static bool GetConstant(HInductionVarAnalysis::InductionInfo* info, int32_t *value); + + static Value AddValue(Value v1, Value v2); + static Value SubValue(Value v1, Value v2); + static Value MulValue(Value v1, Value v2); + static Value DivValue(Value v1, Value v2); + static Value MergeVal(Value v1, Value v2, bool is_min); /** Results of prior induction variable analysis. */ HInductionVarAnalysis *induction_analysis_; diff --git a/compiler/optimizing/induction_var_range_test.cc b/compiler/optimizing/induction_var_range_test.cc index d3c3518193..4497a884d9 100644 --- a/compiler/optimizing/induction_var_range_test.cc +++ b/compiler/optimizing/induction_var_range_test.cc @@ -14,8 +14,6 @@ * limitations under the License. */ -#include <limits.h> - #include "base/arena_allocator.h" #include "builder.h" #include "gtest/gtest.h" @@ -45,6 +43,7 @@ class InductionVarRangeTest : public testing::Test { EXPECT_EQ(v1.instruction, v2.instruction); EXPECT_EQ(v1.a_constant, v2.a_constant); EXPECT_EQ(v1.b_constant, v2.b_constant); + EXPECT_EQ(v1.is_known, v2.is_known); } /** Constructs bare minimum graph. */ @@ -86,8 +85,7 @@ class InductionVarRangeTest : public testing::Test { /** Constructs a trip-count. */ HInductionVarAnalysis::InductionInfo* CreateTripCount(int32_t tc) { - HInductionVarAnalysis::InductionInfo* trip = CreateConst(tc); - return CreateInvariant('@', trip, trip); + return iva_->CreateTripCount(HInductionVarAnalysis::kTripCountInLoop, CreateConst(tc)); } /** Constructs a linear a * i + b induction. */ @@ -113,30 +111,36 @@ class InductionVarRangeTest : public testing::Test { Value GetMin(HInductionVarAnalysis::InductionInfo* info, HInductionVarAnalysis::InductionInfo* induc) { - return InductionVarRange::GetMin(info, induc); + return InductionVarRange::GetVal(info, induc, /* in_body */ true, /* is_min */ true); } Value GetMax(HInductionVarAnalysis::InductionInfo* info, HInductionVarAnalysis::InductionInfo* induc) { - return InductionVarRange::GetMax(info, induc); + return InductionVarRange::GetVal(info, induc, /* in_body */ true, /* is_min */ false); } Value GetMul(HInductionVarAnalysis::InductionInfo* info1, - HInductionVarAnalysis::InductionInfo* info2, int32_t fail_value) { - return InductionVarRange::GetMul(info1, info2, nullptr, fail_value); + HInductionVarAnalysis::InductionInfo* info2, + bool is_min) { + return InductionVarRange::GetMul(info1, info2, nullptr, /* in_body */ true, is_min); } Value GetDiv(HInductionVarAnalysis::InductionInfo* info1, - HInductionVarAnalysis::InductionInfo* info2, int32_t fail_value) { - return InductionVarRange::GetDiv(info1, info2, nullptr, fail_value); + HInductionVarAnalysis::InductionInfo* info2, + bool is_min) { + return InductionVarRange::GetDiv(info1, info2, nullptr, /* in_body */ true, is_min); + } + + bool GetConstant(HInductionVarAnalysis::InductionInfo* info, int32_t* value) { + return InductionVarRange::GetConstant(info, value); } - Value AddValue(Value v1, Value v2) { return InductionVarRange::AddValue(v1, v2, INT_MIN); } - Value SubValue(Value v1, Value v2) { return InductionVarRange::SubValue(v1, v2, INT_MIN); } - Value MulValue(Value v1, Value v2) { return InductionVarRange::MulValue(v1, v2, INT_MIN); } - Value DivValue(Value v1, Value v2) { return InductionVarRange::DivValue(v1, v2, INT_MIN); } - Value MinValue(Value v1, Value v2) { return InductionVarRange::MinValue(v1, v2); } - Value MaxValue(Value v1, Value v2) { return InductionVarRange::MaxValue(v1, v2); } + Value AddValue(Value v1, Value v2) { return InductionVarRange::AddValue(v1, v2); } + Value SubValue(Value v1, Value v2) { return InductionVarRange::SubValue(v1, v2); } + Value MulValue(Value v1, Value v2) { return InductionVarRange::MulValue(v1, v2); } + Value DivValue(Value v1, Value v2) { return InductionVarRange::DivValue(v1, v2); } + Value MinValue(Value v1, Value v2) { return InductionVarRange::MergeVal(v1, v2, true); } + Value MaxValue(Value v1, Value v2) { return InductionVarRange::MergeVal(v1, v2, false); } // General building fields. ArenaPool pool_; @@ -154,8 +158,8 @@ class InductionVarRangeTest : public testing::Test { // TEST_F(InductionVarRangeTest, GetMinMaxNull) { - ExpectEqual(Value(INT_MIN), GetMin(nullptr, nullptr)); - ExpectEqual(Value(INT_MAX), GetMax(nullptr, nullptr)); + ExpectEqual(Value(), GetMin(nullptr, nullptr)); + ExpectEqual(Value(), GetMax(nullptr, nullptr)); } TEST_F(InductionVarRangeTest, GetMinMaxAdd) { @@ -251,91 +255,98 @@ TEST_F(InductionVarRangeTest, GetMinMaxPeriodic) { } TEST_F(InductionVarRangeTest, GetMulMin) { - ExpectEqual(Value(6), GetMul(CreateRange(2, 10), CreateRange(3, 5), INT_MIN)); - ExpectEqual(Value(-50), GetMul(CreateRange(2, 10), CreateRange(-5, -3), INT_MIN)); - ExpectEqual(Value(-50), GetMul(CreateRange(-10, -2), CreateRange(3, 5), INT_MIN)); - ExpectEqual(Value(6), GetMul(CreateRange(-10, -2), CreateRange(-5, -3), INT_MIN)); + ExpectEqual(Value(6), GetMul(CreateRange(2, 10), CreateRange(3, 5), true)); + ExpectEqual(Value(-50), GetMul(CreateRange(2, 10), CreateRange(-5, -3), true)); + ExpectEqual(Value(-50), GetMul(CreateRange(-10, -2), CreateRange(3, 5), true)); + ExpectEqual(Value(6), GetMul(CreateRange(-10, -2), CreateRange(-5, -3), true)); } TEST_F(InductionVarRangeTest, GetMulMax) { - ExpectEqual(Value(50), GetMul(CreateRange(2, 10), CreateRange(3, 5), INT_MAX)); - ExpectEqual(Value(-6), GetMul(CreateRange(2, 10), CreateRange(-5, -3), INT_MAX)); - ExpectEqual(Value(-6), GetMul(CreateRange(-10, -2), CreateRange(3, 5), INT_MAX)); - ExpectEqual(Value(50), GetMul(CreateRange(-10, -2), CreateRange(-5, -3), INT_MAX)); + ExpectEqual(Value(50), GetMul(CreateRange(2, 10), CreateRange(3, 5), false)); + ExpectEqual(Value(-6), GetMul(CreateRange(2, 10), CreateRange(-5, -3), false)); + ExpectEqual(Value(-6), GetMul(CreateRange(-10, -2), CreateRange(3, 5), false)); + ExpectEqual(Value(50), GetMul(CreateRange(-10, -2), CreateRange(-5, -3), false)); } TEST_F(InductionVarRangeTest, GetDivMin) { - ExpectEqual(Value(10), GetDiv(CreateRange(40, 1000), CreateRange(2, 4), INT_MIN)); - ExpectEqual(Value(-500), GetDiv(CreateRange(40, 1000), CreateRange(-4, -2), INT_MIN)); - ExpectEqual(Value(-500), GetDiv(CreateRange(-1000, -40), CreateRange(2, 4), INT_MIN)); - ExpectEqual(Value(10), GetDiv(CreateRange(-1000, -40), CreateRange(-4, -2), INT_MIN)); + ExpectEqual(Value(10), GetDiv(CreateRange(40, 1000), CreateRange(2, 4), true)); + ExpectEqual(Value(-500), GetDiv(CreateRange(40, 1000), CreateRange(-4, -2), true)); + ExpectEqual(Value(-500), GetDiv(CreateRange(-1000, -40), CreateRange(2, 4), true)); + ExpectEqual(Value(10), GetDiv(CreateRange(-1000, -40), CreateRange(-4, -2), true)); } TEST_F(InductionVarRangeTest, GetDivMax) { - ExpectEqual(Value(500), GetDiv(CreateRange(40, 1000), CreateRange(2, 4), INT_MAX)); - ExpectEqual(Value(-10), GetDiv(CreateRange(40, 1000), CreateRange(-4, -2), INT_MAX)); - ExpectEqual(Value(-10), GetDiv(CreateRange(-1000, -40), CreateRange(2, 4), INT_MAX)); - ExpectEqual(Value(500), GetDiv(CreateRange(-1000, -40), CreateRange(-4, -2), INT_MAX)); + ExpectEqual(Value(500), GetDiv(CreateRange(40, 1000), CreateRange(2, 4), false)); + ExpectEqual(Value(-10), GetDiv(CreateRange(40, 1000), CreateRange(-4, -2), false)); + ExpectEqual(Value(-10), GetDiv(CreateRange(-1000, -40), CreateRange(2, 4), false)); + ExpectEqual(Value(500), GetDiv(CreateRange(-1000, -40), CreateRange(-4, -2), false)); +} + +TEST_F(InductionVarRangeTest, GetConstant) { + int32_t value; + ASSERT_TRUE(GetConstant(CreateConst(12345), &value)); + EXPECT_EQ(12345, value); + EXPECT_FALSE(GetConstant(CreateRange(1, 2), &value)); } TEST_F(InductionVarRangeTest, AddValue) { ExpectEqual(Value(110), AddValue(Value(10), Value(100))); ExpectEqual(Value(-5), AddValue(Value(&x_, 1, -4), Value(&x_, -1, -1))); ExpectEqual(Value(&x_, 3, -5), AddValue(Value(&x_, 2, -4), Value(&x_, 1, -1))); - ExpectEqual(Value(INT_MIN), AddValue(Value(&x_, 1, 5), Value(&y_, 1, -7))); + ExpectEqual(Value(), AddValue(Value(&x_, 1, 5), Value(&y_, 1, -7))); ExpectEqual(Value(&x_, 1, 23), AddValue(Value(&x_, 1, 20), Value(3))); ExpectEqual(Value(&y_, 1, 5), AddValue(Value(55), Value(&y_, 1, -50))); - // Unsafe. - ExpectEqual(Value(INT_MIN), AddValue(Value(INT_MAX - 5), Value(6))); + const int32_t max_value = std::numeric_limits<int32_t>::max(); + ExpectEqual(Value(max_value), AddValue(Value(max_value - 5), Value(5))); + ExpectEqual(Value(), AddValue(Value(max_value - 5), Value(6))); // unsafe } TEST_F(InductionVarRangeTest, SubValue) { ExpectEqual(Value(-90), SubValue(Value(10), Value(100))); ExpectEqual(Value(-3), SubValue(Value(&x_, 1, -4), Value(&x_, 1, -1))); ExpectEqual(Value(&x_, 2, -3), SubValue(Value(&x_, 3, -4), Value(&x_, 1, -1))); - ExpectEqual(Value(INT_MIN), SubValue(Value(&x_, 1, 5), Value(&y_, 1, -7))); + ExpectEqual(Value(), SubValue(Value(&x_, 1, 5), Value(&y_, 1, -7))); ExpectEqual(Value(&x_, 1, 17), SubValue(Value(&x_, 1, 20), Value(3))); ExpectEqual(Value(&y_, -4, 105), SubValue(Value(55), Value(&y_, 4, -50))); - // Unsafe. - ExpectEqual(Value(INT_MIN), SubValue(Value(INT_MIN + 5), Value(6))); + const int32_t min_value = std::numeric_limits<int32_t>::min(); + ExpectEqual(Value(min_value), SubValue(Value(min_value + 5), Value(5))); + ExpectEqual(Value(), SubValue(Value(min_value + 5), Value(6))); // unsafe } TEST_F(InductionVarRangeTest, MulValue) { ExpectEqual(Value(1000), MulValue(Value(10), Value(100))); - ExpectEqual(Value(INT_MIN), MulValue(Value(&x_, 1, -4), Value(&x_, 1, -1))); - ExpectEqual(Value(INT_MIN), MulValue(Value(&x_, 1, 5), Value(&y_, 1, -7))); + ExpectEqual(Value(), MulValue(Value(&x_, 1, -4), Value(&x_, 1, -1))); + ExpectEqual(Value(), MulValue(Value(&x_, 1, 5), Value(&y_, 1, -7))); ExpectEqual(Value(&x_, 9, 60), MulValue(Value(&x_, 3, 20), Value(3))); ExpectEqual(Value(&y_, 55, -110), MulValue(Value(55), Value(&y_, 1, -2))); - // Unsafe. - ExpectEqual(Value(INT_MIN), MulValue(Value(90000), Value(-90000))); + ExpectEqual(Value(), MulValue(Value(90000), Value(-90000))); // unsafe } TEST_F(InductionVarRangeTest, DivValue) { ExpectEqual(Value(25), DivValue(Value(100), Value(4))); - ExpectEqual(Value(INT_MIN), DivValue(Value(&x_, 1, -4), Value(&x_, 1, -1))); - ExpectEqual(Value(INT_MIN), DivValue(Value(&x_, 1, 5), Value(&y_, 1, -7))); - ExpectEqual(Value(INT_MIN), DivValue(Value(&x_, 12, 24), Value(3))); - ExpectEqual(Value(INT_MIN), DivValue(Value(55), Value(&y_, 1, -50))); - // Unsafe. - ExpectEqual(Value(INT_MIN), DivValue(Value(1), Value(0))); + ExpectEqual(Value(), DivValue(Value(&x_, 1, -4), Value(&x_, 1, -1))); + ExpectEqual(Value(), DivValue(Value(&x_, 1, 5), Value(&y_, 1, -7))); + ExpectEqual(Value(), DivValue(Value(&x_, 12, 24), Value(3))); + ExpectEqual(Value(), DivValue(Value(55), Value(&y_, 1, -50))); + ExpectEqual(Value(), DivValue(Value(1), Value(0))); // unsafe } TEST_F(InductionVarRangeTest, MinValue) { ExpectEqual(Value(10), MinValue(Value(10), Value(100))); ExpectEqual(Value(&x_, 1, -4), MinValue(Value(&x_, 1, -4), Value(&x_, 1, -1))); ExpectEqual(Value(&x_, 4, -4), MinValue(Value(&x_, 4, -4), Value(&x_, 4, -1))); - ExpectEqual(Value(INT_MIN), MinValue(Value(&x_, 1, 5), Value(&y_, 1, -7))); - ExpectEqual(Value(INT_MIN), MinValue(Value(&x_, 1, 20), Value(3))); - ExpectEqual(Value(INT_MIN), MinValue(Value(55), Value(&y_, 1, -50))); + ExpectEqual(Value(), MinValue(Value(&x_, 1, 5), Value(&y_, 1, -7))); + ExpectEqual(Value(), MinValue(Value(&x_, 1, 20), Value(3))); + ExpectEqual(Value(), MinValue(Value(55), Value(&y_, 1, -50))); } TEST_F(InductionVarRangeTest, MaxValue) { ExpectEqual(Value(100), MaxValue(Value(10), Value(100))); ExpectEqual(Value(&x_, 1, -1), MaxValue(Value(&x_, 1, -4), Value(&x_, 1, -1))); ExpectEqual(Value(&x_, 4, -1), MaxValue(Value(&x_, 4, -4), Value(&x_, 4, -1))); - ExpectEqual(Value(INT_MAX), MaxValue(Value(&x_, 1, 5), Value(&y_, 1, -7))); - ExpectEqual(Value(INT_MAX), MaxValue(Value(&x_, 1, 20), Value(3))); - ExpectEqual(Value(INT_MAX), MaxValue(Value(55), Value(&y_, 1, -50))); + ExpectEqual(Value(), MaxValue(Value(&x_, 1, 5), Value(&y_, 1, -7))); + ExpectEqual(Value(), MaxValue(Value(&x_, 1, 20), Value(3))); + ExpectEqual(Value(), MaxValue(Value(55), Value(&y_, 1, -50))); } } // namespace art diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index 0ac26de674..22bca2f111 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -71,7 +71,8 @@ class InstructionSimplifierVisitor : public HGraphVisitor { void VisitXor(HXor* instruction) OVERRIDE; void VisitInstanceOf(HInstanceOf* instruction) OVERRIDE; void VisitFakeString(HFakeString* fake_string) OVERRIDE; - bool IsDominatedByInputNullCheck(HInstruction* instr); + + bool CanEnsureNotNullAt(HInstruction* instr, HInstruction* at) const; OptimizingCompilerStats* stats_; bool simplification_occurred_ = false; @@ -187,14 +188,18 @@ void InstructionSimplifierVisitor::VisitNullCheck(HNullCheck* null_check) { } } -bool InstructionSimplifierVisitor::IsDominatedByInputNullCheck(HInstruction* instr) { - HInstruction* input = instr->InputAt(0); +bool InstructionSimplifierVisitor::CanEnsureNotNullAt(HInstruction* input, HInstruction* at) const { + if (!input->CanBeNull()) { + return true; + } + for (HUseIterator<HInstruction*> it(input->GetUses()); !it.Done(); it.Advance()) { HInstruction* use = it.Current()->GetUser(); - if (use->IsNullCheck() && use->StrictlyDominates(instr)) { + if (use->IsNullCheck() && use->StrictlyDominates(at)) { return true; } } + return false; } @@ -231,7 +236,7 @@ static bool TypeCheckHasKnownOutcome(HLoadClass* klass, HInstruction* object, bo void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) { HInstruction* object = check_cast->InputAt(0); - if (!object->CanBeNull() || IsDominatedByInputNullCheck(check_cast)) { + if (CanEnsureNotNullAt(object, check_cast)) { check_cast->ClearMustDoNullCheck(); } @@ -267,7 +272,7 @@ void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) { void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) { HInstruction* object = instruction->InputAt(0); bool can_be_null = true; - if (!object->CanBeNull() || IsDominatedByInputNullCheck(instruction)) { + if (CanEnsureNotNullAt(object, instruction)) { can_be_null = false; instruction->ClearMustDoNullCheck(); } @@ -305,14 +310,14 @@ void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) { void InstructionSimplifierVisitor::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { if ((instruction->GetValue()->GetType() == Primitive::kPrimNot) - && !instruction->GetValue()->CanBeNull()) { + && CanEnsureNotNullAt(instruction->GetValue(), instruction)) { instruction->ClearValueCanBeNull(); } } void InstructionSimplifierVisitor::VisitStaticFieldSet(HStaticFieldSet* instruction) { if ((instruction->GetValue()->GetType() == Primitive::kPrimNot) - && !instruction->GetValue()->CanBeNull()) { + && CanEnsureNotNullAt(instruction->GetValue(), instruction)) { instruction->ClearValueCanBeNull(); } } @@ -437,7 +442,7 @@ void InstructionSimplifierVisitor::VisitArraySet(HArraySet* instruction) { instruction->ClearNeedsTypeCheck(); } - if (!value->CanBeNull()) { + if (CanEnsureNotNullAt(value, instruction)) { instruction->ClearValueCanBeNull(); } } @@ -502,14 +507,45 @@ void InstructionSimplifierVisitor::VisitAnd(HAnd* instruction) { HConstant* input_cst = instruction->GetConstantRight(); HInstruction* input_other = instruction->GetLeastConstantLeft(); - if ((input_cst != nullptr) && AreAllBitsSet(input_cst)) { - // Replace code looking like - // AND dst, src, 0xFFF...FF - // with - // src - instruction->ReplaceWith(input_other); - instruction->GetBlock()->RemoveInstruction(instruction); - return; + if (input_cst != nullptr) { + int64_t value = Int64FromConstant(input_cst); + if (value == -1) { + // Replace code looking like + // AND dst, src, 0xFFF...FF + // with + // src + instruction->ReplaceWith(input_other); + instruction->GetBlock()->RemoveInstruction(instruction); + RecordSimplification(); + return; + } + // Eliminate And from UShr+And if the And-mask contains all the bits that + // can be non-zero after UShr. Transform Shr+And to UShr if the And-mask + // precisely clears the shifted-in sign bits. + if ((input_other->IsUShr() || input_other->IsShr()) && input_other->InputAt(1)->IsConstant()) { + size_t reg_bits = (instruction->GetResultType() == Primitive::kPrimLong) ? 64 : 32; + size_t shift = Int64FromConstant(input_other->InputAt(1)->AsConstant()) & (reg_bits - 1); + size_t num_tail_bits_set = CTZ(value + 1); + if ((num_tail_bits_set >= reg_bits - shift) && input_other->IsUShr()) { + // This AND clears only bits known to be clear, for example "(x >>> 24) & 0xff". + instruction->ReplaceWith(input_other); + instruction->GetBlock()->RemoveInstruction(instruction); + RecordSimplification(); + return; + } else if ((num_tail_bits_set == reg_bits - shift) && IsPowerOfTwo(value + 1) && + input_other->HasOnlyOneNonEnvironmentUse()) { + DCHECK(input_other->IsShr()); // For UShr, we would have taken the branch above. + // Replace SHR+AND with USHR, for example "(x >> 24) & 0xff" -> "x >>> 24". + HUShr* ushr = new (GetGraph()->GetArena()) HUShr(instruction->GetType(), + input_other->InputAt(0), + input_other->InputAt(1), + input_other->GetDexPc()); + instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, ushr); + input_other->GetBlock()->RemoveInstruction(input_other); + RecordSimplification(); + return; + } + } } // We assume that GVN has run before, so we only perform a pointer comparison. diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc index b71fdb8f1d..95646222ef 100644 --- a/compiler/optimizing/intrinsics.cc +++ b/compiler/optimizing/intrinsics.cc @@ -90,7 +90,7 @@ static Primitive::Type GetType(uint64_t data, bool is_op_size) { } static Intrinsics GetIntrinsic(InlineMethod method, InstructionSet instruction_set) { - if (instruction_set == kMips || instruction_set == kMips64) { + if (instruction_set == kMips) { return Intrinsics::kNone; } switch (method.opcode) { diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc new file mode 100644 index 0000000000..52e2cbec34 --- /dev/null +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -0,0 +1,782 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "intrinsics_mips64.h" + +#include "arch/mips64/instruction_set_features_mips64.h" +#include "art_method.h" +#include "code_generator_mips64.h" +#include "entrypoints/quick/quick_entrypoints.h" +#include "intrinsics.h" +#include "mirror/array-inl.h" +#include "mirror/string.h" +#include "thread.h" +#include "utils/mips64/assembler_mips64.h" +#include "utils/mips64/constants_mips64.h" + +namespace art { + +namespace mips64 { + +IntrinsicLocationsBuilderMIPS64::IntrinsicLocationsBuilderMIPS64(CodeGeneratorMIPS64* codegen) + : arena_(codegen->GetGraph()->GetArena()) { +} + +Mips64Assembler* IntrinsicCodeGeneratorMIPS64::GetAssembler() { + return reinterpret_cast<Mips64Assembler*>(codegen_->GetAssembler()); +} + +ArenaAllocator* IntrinsicCodeGeneratorMIPS64::GetAllocator() { + return codegen_->GetGraph()->GetArena(); +} + +bool IntrinsicLocationsBuilderMIPS64::TryDispatch(HInvoke* invoke) { + Dispatch(invoke); + LocationSummary* res = invoke->GetLocations(); + return res != nullptr && res->Intrinsified(); +} + +#define __ assembler-> + +static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); +} + +static void MoveFPToInt(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) { + FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>(); + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + + if (is64bit) { + __ Dmfc1(out, in); + } else { + __ Mfc1(out, in); + } +} + +// long java.lang.Double.doubleToRawLongBits(double) +void IntrinsicLocationsBuilderMIPS64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { + CreateFPToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { + MoveFPToInt(invoke->GetLocations(), true, GetAssembler()); +} + +// int java.lang.Float.floatToRawIntBits(float) +void IntrinsicLocationsBuilderMIPS64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { + CreateFPToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { + MoveFPToInt(invoke->GetLocations(), false, GetAssembler()); +} + +static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresFpuRegister()); +} + +static void MoveIntToFP(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) { + GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); + FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); + + if (is64bit) { + __ Dmtc1(in, out); + } else { + __ Mtc1(in, out); + } +} + +// double java.lang.Double.longBitsToDouble(long) +void IntrinsicLocationsBuilderMIPS64::VisitDoubleLongBitsToDouble(HInvoke* invoke) { + CreateIntToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitDoubleLongBitsToDouble(HInvoke* invoke) { + MoveIntToFP(invoke->GetLocations(), true, GetAssembler()); +} + +// float java.lang.Float.intBitsToFloat(int) +void IntrinsicLocationsBuilderMIPS64::VisitFloatIntBitsToFloat(HInvoke* invoke) { + CreateIntToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitFloatIntBitsToFloat(HInvoke* invoke) { + MoveIntToFP(invoke->GetLocations(), false, GetAssembler()); +} + +static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +static void GenReverseBytes(LocationSummary* locations, + Primitive::Type type, + Mips64Assembler* assembler) { + GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + + switch (type) { + case Primitive::kPrimShort: + __ Dsbh(out, in); + __ Seh(out, out); + break; + case Primitive::kPrimInt: + __ Rotr(out, in, 16); + __ Wsbh(out, out); + break; + case Primitive::kPrimLong: + __ Dsbh(out, in); + __ Dshd(out, out); + break; + default: + LOG(FATAL) << "Unexpected size for reverse-bytes: " << type; + UNREACHABLE(); + } +} + +// int java.lang.Integer.reverseBytes(int) +void IntrinsicLocationsBuilderMIPS64::VisitIntegerReverseBytes(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitIntegerReverseBytes(HInvoke* invoke) { + GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); +} + +// long java.lang.Long.reverseBytes(long) +void IntrinsicLocationsBuilderMIPS64::VisitLongReverseBytes(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitLongReverseBytes(HInvoke* invoke) { + GenReverseBytes(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); +} + +// short java.lang.Short.reverseBytes(short) +void IntrinsicLocationsBuilderMIPS64::VisitShortReverseBytes(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitShortReverseBytes(HInvoke* invoke) { + GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler()); +} + +static void GenCountZeroes(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) { + GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + + if (is64bit) { + __ Dclz(out, in); + } else { + __ Clz(out, in); + } +} + +// int java.lang.Integer.numberOfLeadingZeros(int i) +void IntrinsicLocationsBuilderMIPS64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { + GenCountZeroes(invoke->GetLocations(), false, GetAssembler()); +} + +// int java.lang.Long.numberOfLeadingZeros(long i) +void IntrinsicLocationsBuilderMIPS64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { + GenCountZeroes(invoke->GetLocations(), true, GetAssembler()); +} + +static void GenReverse(LocationSummary* locations, + Primitive::Type type, + Mips64Assembler* assembler) { + DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong); + + GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + + if (type == Primitive::kPrimInt) { + __ Rotr(out, in, 16); + __ Wsbh(out, out); + __ Bitswap(out, out); + } else { + __ Dsbh(out, in); + __ Dshd(out, out); + __ Dbitswap(out, out); + } +} + +// int java.lang.Integer.reverse(int) +void IntrinsicLocationsBuilderMIPS64::VisitIntegerReverse(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitIntegerReverse(HInvoke* invoke) { + GenReverse(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); +} + +// long java.lang.Long.reverse(long) +void IntrinsicLocationsBuilderMIPS64::VisitLongReverse(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitLongReverse(HInvoke* invoke) { + GenReverse(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); +} + +static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); +} + +static void MathAbsFP(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) { + FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>(); + FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); + + if (is64bit) { + __ AbsD(out, in); + } else { + __ AbsS(out, in); + } +} + +// double java.lang.Math.abs(double) +void IntrinsicLocationsBuilderMIPS64::VisitMathAbsDouble(HInvoke* invoke) { + CreateFPToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMathAbsDouble(HInvoke* invoke) { + MathAbsFP(invoke->GetLocations(), true, GetAssembler()); +} + +// float java.lang.Math.abs(float) +void IntrinsicLocationsBuilderMIPS64::VisitMathAbsFloat(HInvoke* invoke) { + CreateFPToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMathAbsFloat(HInvoke* invoke) { + MathAbsFP(invoke->GetLocations(), false, GetAssembler()); +} + +static void CreateIntToInt(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +static void GenAbsInteger(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) { + GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + + if (is64bit) { + __ Dsra32(AT, in, 31); + __ Xor(out, in, AT); + __ Dsubu(out, out, AT); + } else { + __ Sra(AT, in, 31); + __ Xor(out, in, AT); + __ Subu(out, out, AT); + } +} + +// int java.lang.Math.abs(int) +void IntrinsicLocationsBuilderMIPS64::VisitMathAbsInt(HInvoke* invoke) { + CreateIntToInt(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMathAbsInt(HInvoke* invoke) { + GenAbsInteger(invoke->GetLocations(), false, GetAssembler()); +} + +// long java.lang.Math.abs(long) +void IntrinsicLocationsBuilderMIPS64::VisitMathAbsLong(HInvoke* invoke) { + CreateIntToInt(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMathAbsLong(HInvoke* invoke) { + GenAbsInteger(invoke->GetLocations(), true, GetAssembler()); +} + +static void GenMinMaxFP(LocationSummary* locations, + bool is_min, + bool is_double, + Mips64Assembler* assembler) { + FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>(); + FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>(); + FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); + + if (is_double) { + if (is_min) { + __ MinD(out, lhs, rhs); + } else { + __ MaxD(out, lhs, rhs); + } + } else { + if (is_min) { + __ MinS(out, lhs, rhs); + } else { + __ MaxS(out, lhs, rhs); + } + } +} + +static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); +} + +// double java.lang.Math.min(double, double) +void IntrinsicLocationsBuilderMIPS64::VisitMathMinDoubleDouble(HInvoke* invoke) { + CreateFPFPToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMathMinDoubleDouble(HInvoke* invoke) { + GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler()); +} + +// float java.lang.Math.min(float, float) +void IntrinsicLocationsBuilderMIPS64::VisitMathMinFloatFloat(HInvoke* invoke) { + CreateFPFPToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMathMinFloatFloat(HInvoke* invoke) { + GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler()); +} + +// double java.lang.Math.max(double, double) +void IntrinsicLocationsBuilderMIPS64::VisitMathMaxDoubleDouble(HInvoke* invoke) { + CreateFPFPToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMathMaxDoubleDouble(HInvoke* invoke) { + GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler()); +} + +// float java.lang.Math.max(float, float) +void IntrinsicLocationsBuilderMIPS64::VisitMathMaxFloatFloat(HInvoke* invoke) { + CreateFPFPToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMathMaxFloatFloat(HInvoke* invoke) { + GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler()); +} + +static void GenMinMax(LocationSummary* locations, + bool is_min, + Mips64Assembler* assembler) { + GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>(); + GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>(); + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + + if (out == lhs) { + __ Slt(AT, rhs, lhs); + if (is_min) { + __ Seleqz(out, lhs, AT); + __ Selnez(AT, rhs, AT); + } else { + __ Selnez(out, lhs, AT); + __ Seleqz(AT, rhs, AT); + } + } else { + __ Slt(AT, lhs, rhs); + if (is_min) { + __ Seleqz(out, rhs, AT); + __ Selnez(AT, lhs, AT); + } else { + __ Selnez(out, rhs, AT); + __ Seleqz(AT, lhs, AT); + } + } + __ Or(out, out, AT); +} + +static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +// int java.lang.Math.min(int, int) +void IntrinsicLocationsBuilderMIPS64::VisitMathMinIntInt(HInvoke* invoke) { + CreateIntIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMathMinIntInt(HInvoke* invoke) { + GenMinMax(invoke->GetLocations(), true, GetAssembler()); +} + +// long java.lang.Math.min(long, long) +void IntrinsicLocationsBuilderMIPS64::VisitMathMinLongLong(HInvoke* invoke) { + CreateIntIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMathMinLongLong(HInvoke* invoke) { + GenMinMax(invoke->GetLocations(), true, GetAssembler()); +} + +// int java.lang.Math.max(int, int) +void IntrinsicLocationsBuilderMIPS64::VisitMathMaxIntInt(HInvoke* invoke) { + CreateIntIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMathMaxIntInt(HInvoke* invoke) { + GenMinMax(invoke->GetLocations(), false, GetAssembler()); +} + +// long java.lang.Math.max(long, long) +void IntrinsicLocationsBuilderMIPS64::VisitMathMaxLongLong(HInvoke* invoke) { + CreateIntIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMathMaxLongLong(HInvoke* invoke) { + GenMinMax(invoke->GetLocations(), false, GetAssembler()); +} + +// double java.lang.Math.sqrt(double) +void IntrinsicLocationsBuilderMIPS64::VisitMathSqrt(HInvoke* invoke) { + CreateFPToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMathSqrt(HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + Mips64Assembler* assembler = GetAssembler(); + FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>(); + FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); + + __ SqrtD(out, in); +} + +static void CreateFPToFP(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); +} + +// double java.lang.Math.rint(double) +void IntrinsicLocationsBuilderMIPS64::VisitMathRint(HInvoke* invoke) { + CreateFPToFP(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMathRint(HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + Mips64Assembler* assembler = GetAssembler(); + FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>(); + FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); + + __ RintD(out, in); +} + +// double java.lang.Math.floor(double) +void IntrinsicLocationsBuilderMIPS64::VisitMathFloor(HInvoke* invoke) { + CreateFPToFP(arena_, invoke); +} + +// 0x200 - +zero +// 0x040 - +infinity +// 0x020 - -zero +// 0x004 - -infinity +// 0x002 - quiet NaN +// 0x001 - signaling NaN +const constexpr uint16_t CLASS_MASK = 0x267; + +void IntrinsicCodeGeneratorMIPS64::VisitMathFloor(HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + Mips64Assembler* assembler = GetAssembler(); + FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>(); + FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); + + Label done; + + // double floor(double in) { + // if in.isNaN || in.isInfinite || in.isZero { + // return in; + // } + __ ClassD(out, in); + __ Dmfc1(AT, out); + __ Andi(AT, AT, CLASS_MASK); // +0.0 | +Inf | -0.0 | -Inf | qNaN | sNaN + __ MovD(out, in); + __ Bnezc(AT, &done); + + // Long outLong = floor(in); + // if outLong == Long.MAX_VALUE { + // // floor() has almost certainly returned a value which + // // can't be successfully represented as a signed 64-bit + // // number. Java expects that the input value will be + // // returned in these cases. + // // There is also a small probability that floor(in) + // // correctly truncates the input value to Long.MAX_VALUE. In + // // that case, this exception handling code still does the + // // correct thing. + // return in; + // } + __ FloorLD(out, in); + __ Dmfc1(AT, out); + __ MovD(out, in); + __ LoadConst64(TMP, kPrimLongMax); + __ Beqc(AT, TMP, &done); + + // double out = outLong; + // return out; + __ Dmtc1(AT, out); + __ Cvtdl(out, out); + __ Bind(&done); + // } +} + +// double java.lang.Math.ceil(double) +void IntrinsicLocationsBuilderMIPS64::VisitMathCeil(HInvoke* invoke) { + CreateFPToFP(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMathCeil(HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + Mips64Assembler* assembler = GetAssembler(); + FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>(); + FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); + + Label done; + + // double ceil(double in) { + // if in.isNaN || in.isInfinite || in.isZero { + // return in; + // } + __ ClassD(out, in); + __ Dmfc1(AT, out); + __ Andi(AT, AT, CLASS_MASK); // +0.0 | +Inf | -0.0 | -Inf | qNaN | sNaN + __ MovD(out, in); + __ Bnezc(AT, &done); + + // Long outLong = ceil(in); + // if outLong == Long.MAX_VALUE { + // // ceil() has almost certainly returned a value which + // // can't be successfully represented as a signed 64-bit + // // number. Java expects that the input value will be + // // returned in these cases. + // // There is also a small probability that ceil(in) + // // correctly rounds up the input value to Long.MAX_VALUE. In + // // that case, this exception handling code still does the + // // correct thing. + // return in; + // } + __ CeilLD(out, in); + __ Dmfc1(AT, out); + __ MovD(out, in); + __ LoadConst64(TMP, kPrimLongMax); + __ Beqc(AT, TMP, &done); + + // double out = outLong; + // return out; + __ Dmtc1(AT, out); + __ Cvtdl(out, out); + __ Bind(&done); + // } +} + +// byte libcore.io.Memory.peekByte(long address) +void IntrinsicLocationsBuilderMIPS64::VisitMemoryPeekByte(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMemoryPeekByte(HInvoke* invoke) { + Mips64Assembler* assembler = GetAssembler(); + GpuRegister adr = invoke->GetLocations()->InAt(0).AsRegister<GpuRegister>(); + GpuRegister out = invoke->GetLocations()->Out().AsRegister<GpuRegister>(); + + __ Lb(out, adr, 0); +} + +// short libcore.io.Memory.peekShort(long address) +void IntrinsicLocationsBuilderMIPS64::VisitMemoryPeekShortNative(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMemoryPeekShortNative(HInvoke* invoke) { + Mips64Assembler* assembler = GetAssembler(); + GpuRegister adr = invoke->GetLocations()->InAt(0).AsRegister<GpuRegister>(); + GpuRegister out = invoke->GetLocations()->Out().AsRegister<GpuRegister>(); + + __ Lh(out, adr, 0); +} + +// int libcore.io.Memory.peekInt(long address) +void IntrinsicLocationsBuilderMIPS64::VisitMemoryPeekIntNative(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMemoryPeekIntNative(HInvoke* invoke) { + Mips64Assembler* assembler = GetAssembler(); + GpuRegister adr = invoke->GetLocations()->InAt(0).AsRegister<GpuRegister>(); + GpuRegister out = invoke->GetLocations()->Out().AsRegister<GpuRegister>(); + + __ Lw(out, adr, 0); +} + +// long libcore.io.Memory.peekLong(long address) +void IntrinsicLocationsBuilderMIPS64::VisitMemoryPeekLongNative(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMemoryPeekLongNative(HInvoke* invoke) { + Mips64Assembler* assembler = GetAssembler(); + GpuRegister adr = invoke->GetLocations()->InAt(0).AsRegister<GpuRegister>(); + GpuRegister out = invoke->GetLocations()->Out().AsRegister<GpuRegister>(); + + __ Ld(out, adr, 0); +} + +static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); +} + +// void libcore.io.Memory.pokeByte(long address, byte value) +void IntrinsicLocationsBuilderMIPS64::VisitMemoryPokeByte(HInvoke* invoke) { + CreateIntIntToVoidLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMemoryPokeByte(HInvoke* invoke) { + Mips64Assembler* assembler = GetAssembler(); + GpuRegister adr = invoke->GetLocations()->InAt(0).AsRegister<GpuRegister>(); + GpuRegister val = invoke->GetLocations()->InAt(1).AsRegister<GpuRegister>(); + + __ Sb(val, adr, 0); +} + +// void libcore.io.Memory.pokeShort(long address, short value) +void IntrinsicLocationsBuilderMIPS64::VisitMemoryPokeShortNative(HInvoke* invoke) { + CreateIntIntToVoidLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMemoryPokeShortNative(HInvoke* invoke) { + Mips64Assembler* assembler = GetAssembler(); + GpuRegister adr = invoke->GetLocations()->InAt(0).AsRegister<GpuRegister>(); + GpuRegister val = invoke->GetLocations()->InAt(1).AsRegister<GpuRegister>(); + + __ Sh(val, adr, 0); +} + +// void libcore.io.Memory.pokeInt(long address, int value) +void IntrinsicLocationsBuilderMIPS64::VisitMemoryPokeIntNative(HInvoke* invoke) { + CreateIntIntToVoidLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMemoryPokeIntNative(HInvoke* invoke) { + Mips64Assembler* assembler = GetAssembler(); + GpuRegister adr = invoke->GetLocations()->InAt(0).AsRegister<GpuRegister>(); + GpuRegister val = invoke->GetLocations()->InAt(1).AsRegister<GpuRegister>(); + + __ Sw(val, adr, 00); +} + +// void libcore.io.Memory.pokeLong(long address, long value) +void IntrinsicLocationsBuilderMIPS64::VisitMemoryPokeLongNative(HInvoke* invoke) { + CreateIntIntToVoidLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMemoryPokeLongNative(HInvoke* invoke) { + Mips64Assembler* assembler = GetAssembler(); + GpuRegister adr = invoke->GetLocations()->InAt(0).AsRegister<GpuRegister>(); + GpuRegister val = invoke->GetLocations()->InAt(1).AsRegister<GpuRegister>(); + + __ Sd(val, adr, 0); +} + +// Unimplemented intrinsics. + +#define UNIMPLEMENTED_INTRINSIC(Name) \ +void IntrinsicLocationsBuilderMIPS64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ +} \ +void IntrinsicCodeGeneratorMIPS64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ +} + +UNIMPLEMENTED_INTRINSIC(MathRoundDouble) +UNIMPLEMENTED_INTRINSIC(MathRoundFloat) + +UNIMPLEMENTED_INTRINSIC(ThreadCurrentThread) +UNIMPLEMENTED_INTRINSIC(UnsafeGet) +UNIMPLEMENTED_INTRINSIC(UnsafeGetVolatile) +UNIMPLEMENTED_INTRINSIC(UnsafeGetLong) +UNIMPLEMENTED_INTRINSIC(UnsafeGetLongVolatile) +UNIMPLEMENTED_INTRINSIC(UnsafeGetObject) +UNIMPLEMENTED_INTRINSIC(UnsafeGetObjectVolatile) +UNIMPLEMENTED_INTRINSIC(UnsafePut) +UNIMPLEMENTED_INTRINSIC(UnsafePutOrdered) +UNIMPLEMENTED_INTRINSIC(UnsafePutVolatile) +UNIMPLEMENTED_INTRINSIC(UnsafePutObject) +UNIMPLEMENTED_INTRINSIC(UnsafePutObjectOrdered) +UNIMPLEMENTED_INTRINSIC(UnsafePutObjectVolatile) +UNIMPLEMENTED_INTRINSIC(UnsafePutLong) +UNIMPLEMENTED_INTRINSIC(UnsafePutLongOrdered) +UNIMPLEMENTED_INTRINSIC(UnsafePutLongVolatile) +UNIMPLEMENTED_INTRINSIC(UnsafeCASInt) +UNIMPLEMENTED_INTRINSIC(UnsafeCASLong) +UNIMPLEMENTED_INTRINSIC(UnsafeCASObject) +UNIMPLEMENTED_INTRINSIC(StringCharAt) +UNIMPLEMENTED_INTRINSIC(StringCompareTo) +UNIMPLEMENTED_INTRINSIC(StringEquals) +UNIMPLEMENTED_INTRINSIC(StringIndexOf) +UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter) +UNIMPLEMENTED_INTRINSIC(StringNewStringFromBytes) +UNIMPLEMENTED_INTRINSIC(StringNewStringFromChars) +UNIMPLEMENTED_INTRINSIC(StringNewStringFromString) +UNIMPLEMENTED_INTRINSIC(LongRotateLeft) +UNIMPLEMENTED_INTRINSIC(LongRotateRight) +UNIMPLEMENTED_INTRINSIC(LongNumberOfTrailingZeros) +UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft) +UNIMPLEMENTED_INTRINSIC(IntegerRotateRight) +UNIMPLEMENTED_INTRINSIC(IntegerNumberOfTrailingZeros) + +UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) +UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck) +UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) + +#undef UNIMPLEMENTED_INTRINSIC + +#undef __ + +} // namespace mips64 +} // namespace art diff --git a/compiler/optimizing/intrinsics_mips64.h b/compiler/optimizing/intrinsics_mips64.h new file mode 100644 index 0000000000..1481d24c9e --- /dev/null +++ b/compiler/optimizing/intrinsics_mips64.h @@ -0,0 +1,83 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_MIPS64_H_ +#define ART_COMPILER_OPTIMIZING_INTRINSICS_MIPS64_H_ + +#include "intrinsics.h" + +namespace art { + +class ArenaAllocator; +class HInvokeStaticOrDirect; +class HInvokeVirtual; + +namespace mips64 { + +class CodeGeneratorMIPS64; +class Mips64Assembler; + +class IntrinsicLocationsBuilderMIPS64 FINAL : public IntrinsicVisitor { + public: + explicit IntrinsicLocationsBuilderMIPS64(CodeGeneratorMIPS64* codegen); + + // Define visitor methods. + +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache) \ + void Visit ## Name(HInvoke* invoke) OVERRIDE; +#include "intrinsics_list.h" +INTRINSICS_LIST(OPTIMIZING_INTRINSICS) +#undef INTRINSICS_LIST +#undef OPTIMIZING_INTRINSICS + + // Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether + // a corresponding LocationSummary with the intrinsified_ flag set was generated and attached to + // the invoke. + bool TryDispatch(HInvoke* invoke); + + private: + ArenaAllocator* arena_; + + DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderMIPS64); +}; + +class IntrinsicCodeGeneratorMIPS64 FINAL : public IntrinsicVisitor { + public: + explicit IntrinsicCodeGeneratorMIPS64(CodeGeneratorMIPS64* codegen) : codegen_(codegen) {} + + // Define visitor methods. + +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache) \ + void Visit ## Name(HInvoke* invoke) OVERRIDE; +#include "intrinsics_list.h" +INTRINSICS_LIST(OPTIMIZING_INTRINSICS) +#undef INTRINSICS_LIST +#undef OPTIMIZING_INTRINSICS + + private: + Mips64Assembler* GetAssembler(); + + ArenaAllocator* GetAllocator(); + + CodeGeneratorMIPS64* codegen_; + + DISALLOW_COPY_AND_ASSIGN(IntrinsicCodeGeneratorMIPS64); +}; + +} // namespace mips64 +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_INTRINSICS_MIPS64_H_ diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc index d14dfc190f..ebdf7a2f65 100644 --- a/compiler/optimizing/locations.cc +++ b/compiler/optimizing/locations.cc @@ -23,18 +23,15 @@ namespace art { LocationSummary::LocationSummary(HInstruction* instruction, CallKind call_kind, bool intrinsified) - : inputs_(instruction->GetBlock()->GetGraph()->GetArena(), instruction->InputCount()), - temps_(instruction->GetBlock()->GetGraph()->GetArena(), 0), + : inputs_(instruction->InputCount(), + instruction->GetBlock()->GetGraph()->GetArena()->Adapter(kArenaAllocLocationSummary)), + temps_(instruction->GetBlock()->GetGraph()->GetArena()->Adapter(kArenaAllocLocationSummary)), output_overlaps_(Location::kOutputOverlap), call_kind_(call_kind), stack_mask_(nullptr), register_mask_(0), live_registers_(), intrinsified_(intrinsified) { - inputs_.SetSize(instruction->InputCount()); - for (size_t i = 0; i < instruction->InputCount(); ++i) { - inputs_.Put(i, Location()); - } instruction->SetLocations(this); if (NeedsSafepoint()) { diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index 2162ab928b..de4fb7e201 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -17,11 +17,11 @@ #ifndef ART_COMPILER_OPTIMIZING_LOCATIONS_H_ #define ART_COMPILER_OPTIMIZING_LOCATIONS_H_ +#include "base/arena_containers.h" #include "base/arena_object.h" #include "base/bit_field.h" #include "base/bit_vector.h" #include "base/value_object.h" -#include "utils/growable_array.h" namespace art { @@ -468,7 +468,7 @@ static constexpr bool kIntrinsified = true; * The intent is to have the code for generating the instruction independent of * register allocation. A register allocator just has to provide a LocationSummary. */ -class LocationSummary : public ArenaObject<kArenaAllocMisc> { +class LocationSummary : public ArenaObject<kArenaAllocLocationSummary> { public: enum CallKind { kNoCall, @@ -481,15 +481,17 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> { bool intrinsified = false); void SetInAt(uint32_t at, Location location) { - inputs_.Put(at, location); + DCHECK_LT(at, GetInputCount()); + inputs_[at] = location; } Location InAt(uint32_t at) const { - return inputs_.Get(at); + DCHECK_LT(at, GetInputCount()); + return inputs_[at]; } size_t GetInputCount() const { - return inputs_.Size(); + return inputs_.size(); } void SetOut(Location location, Location::OutputOverlap overlaps = Location::kOutputOverlap) { @@ -508,23 +510,25 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> { } void AddTemp(Location location) { - temps_.Add(location); + temps_.push_back(location); } Location GetTemp(uint32_t at) const { - return temps_.Get(at); + DCHECK_LT(at, GetTempCount()); + return temps_[at]; } void SetTempAt(uint32_t at, Location location) { - DCHECK(temps_.Get(at).IsUnallocated() || temps_.Get(at).IsInvalid()); - temps_.Put(at, location); + DCHECK_LT(at, GetTempCount()); + DCHECK(temps_[at].IsUnallocated() || temps_[at].IsInvalid()); + temps_[at] = location; } size_t GetTempCount() const { - return temps_.Size(); + return temps_.size(); } - bool HasTemps() const { return !temps_.IsEmpty(); } + bool HasTemps() const { return !temps_.empty(); } Location Out() const { return output_; } @@ -576,7 +580,7 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> { } bool IsFixedInput(uint32_t input_index) const { - Location input = inputs_.Get(input_index); + Location input = inputs_[input_index]; return input.IsRegister() || input.IsFpuRegister() || input.IsPair() @@ -593,8 +597,8 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> { } private: - GrowableArray<Location> inputs_; - GrowableArray<Location> temps_; + ArenaVector<Location> inputs_; + ArenaVector<Location> temps_; // Whether the output overlaps with any of the inputs. If it overlaps, then it cannot // share the same register as the inputs. Location::OutputOverlap output_overlaps_; diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index b2407c520c..989970fb49 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -20,8 +20,8 @@ #include "ssa_builder.h" #include "base/bit_vector-inl.h" #include "base/bit_utils.h" +#include "base/stl_util.h" #include "mirror/class-inl.h" -#include "utils/growable_array.h" #include "scoped_thread_state_change.h" namespace art { @@ -32,8 +32,41 @@ void HGraph::AddBlock(HBasicBlock* block) { } void HGraph::FindBackEdges(ArenaBitVector* visited) { + // "visited" must be empty on entry, it's an output argument for all visited (i.e. live) blocks. + DCHECK_EQ(visited->GetHighestBitSet(), -1); + + // Nodes that we're currently visiting, indexed by block id. ArenaBitVector visiting(arena_, blocks_.size(), false); - VisitBlockForBackEdges(entry_block_, visited, &visiting); + // Number of successors visited from a given node, indexed by block id. + ArenaVector<size_t> successors_visited(blocks_.size(), 0u, arena_->Adapter()); + // Stack of nodes that we're currently visiting (same as marked in "visiting" above). + ArenaVector<HBasicBlock*> worklist(arena_->Adapter()); + constexpr size_t kDefaultWorklistSize = 8; + worklist.reserve(kDefaultWorklistSize); + visited->SetBit(entry_block_->GetBlockId()); + visiting.SetBit(entry_block_->GetBlockId()); + worklist.push_back(entry_block_); + + while (!worklist.empty()) { + HBasicBlock* current = worklist.back(); + uint32_t current_id = current->GetBlockId(); + if (successors_visited[current_id] == current->GetSuccessors().size()) { + visiting.ClearBit(current_id); + worklist.pop_back(); + } else { + DCHECK_LT(successors_visited[current_id], current->GetSuccessors().size()); + HBasicBlock* successor = current->GetSuccessors()[successors_visited[current_id]++]; + uint32_t successor_id = successor->GetBlockId(); + if (visiting.IsBitSet(successor_id)) { + DCHECK(ContainsElement(worklist, successor)); + successor->AddBackEdge(current); + } else if (!visited->IsBitSet(successor_id)) { + visited->SetBit(successor_id); + visiting.SetBit(successor_id); + worklist.push_back(successor); + } + } + } } static void RemoveAsUser(HInstruction* instruction) { @@ -79,24 +112,6 @@ void HGraph::RemoveDeadBlocks(const ArenaBitVector& visited) { } } -void HGraph::VisitBlockForBackEdges(HBasicBlock* block, - ArenaBitVector* visited, - ArenaBitVector* visiting) { - int id = block->GetBlockId(); - if (visited->IsBitSet(id)) return; - - visited->SetBit(id); - visiting->SetBit(id); - for (HBasicBlock* successor : block->GetSuccessors()) { - if (visiting->IsBitSet(successor->GetBlockId())) { - successor->AddBackEdge(block); - } else { - VisitBlockForBackEdges(successor, visited, visiting); - } - } - visiting->ClearBit(id); -} - void HGraph::BuildDominatorTree() { // (1) Simplify the CFG so that catch blocks have only exceptional incoming // edges. This invariant simplifies building SSA form because Phis cannot @@ -141,10 +156,43 @@ void HBasicBlock::ClearDominanceInformation() { void HGraph::ComputeDominanceInformation() { DCHECK(reverse_post_order_.empty()); reverse_post_order_.reserve(blocks_.size()); - ArenaVector<size_t> visits(blocks_.size(), 0u, arena_->Adapter()); reverse_post_order_.push_back(entry_block_); - for (HBasicBlock* successor : entry_block_->GetSuccessors()) { - VisitBlockForDominatorTree(successor, entry_block_, &visits); + + // Number of visits of a given node, indexed by block id. + ArenaVector<size_t> visits(blocks_.size(), 0u, arena_->Adapter()); + // Number of successors visited from a given node, indexed by block id. + ArenaVector<size_t> successors_visited(blocks_.size(), 0u, arena_->Adapter()); + // Nodes for which we need to visit successors. + ArenaVector<HBasicBlock*> worklist(arena_->Adapter()); + constexpr size_t kDefaultWorklistSize = 8; + worklist.reserve(kDefaultWorklistSize); + worklist.push_back(entry_block_); + + while (!worklist.empty()) { + HBasicBlock* current = worklist.back(); + uint32_t current_id = current->GetBlockId(); + if (successors_visited[current_id] == current->GetSuccessors().size()) { + worklist.pop_back(); + } else { + DCHECK_LT(successors_visited[current_id], current->GetSuccessors().size()); + HBasicBlock* successor = current->GetSuccessors()[successors_visited[current_id]++]; + + if (successor->GetDominator() == nullptr) { + successor->SetDominator(current); + } else { + successor->SetDominator(FindCommonDominator(successor->GetDominator(), current)); + } + + // Once all the forward edges have been visited, we know the immediate + // dominator of the block. We can then start visiting its successors. + DCHECK_LT(successor->GetBlockId(), visits.size()); + if (++visits[successor->GetBlockId()] == + successor->GetPredecessors().size() - successor->NumberOfBackEdges()) { + successor->GetDominator()->AddDominatedBlock(successor); + reverse_post_order_.push_back(successor); + worklist.push_back(successor); + } + } } } @@ -166,28 +214,6 @@ HBasicBlock* HGraph::FindCommonDominator(HBasicBlock* first, HBasicBlock* second return nullptr; } -void HGraph::VisitBlockForDominatorTree(HBasicBlock* block, - HBasicBlock* predecessor, - ArenaVector<size_t>* visits) { - if (block->GetDominator() == nullptr) { - block->SetDominator(predecessor); - } else { - block->SetDominator(FindCommonDominator(block->GetDominator(), predecessor)); - } - - // Once all the forward edges have been visited, we know the immediate - // dominator of the block. We can then start visiting its successors. - DCHECK_LT(block->GetBlockId(), visits->size()); - if (++(*visits)[block->GetBlockId()] == - block->GetPredecessors().size() - block->NumberOfBackEdges()) { - block->GetDominator()->AddDominatedBlock(block); - reverse_post_order_.push_back(block); - for (HBasicBlock* successor : block->GetSuccessors()) { - VisitBlockForDominatorTree(successor, block, visits); - } - } -} - void HGraph::TransformToSsa() { DCHECK(!reverse_post_order_.empty()); SsaBuilder ssa_builder(this); @@ -1143,6 +1169,23 @@ HBasicBlock* HBasicBlock::SplitBefore(HInstruction* cursor) { return new_block; } +HBasicBlock* HBasicBlock::CreateImmediateDominator() { + DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented"; + DCHECK(!IsCatchBlock()) << "Support for updating try/catch information not implemented."; + + HBasicBlock* new_block = new (GetGraph()->GetArena()) HBasicBlock(GetGraph(), GetDexPc()); + + for (HBasicBlock* predecessor : GetPredecessors()) { + new_block->predecessors_.push_back(predecessor); + predecessor->successors_[predecessor->GetSuccessorIndexOf(this)] = new_block; + } + predecessors_.clear(); + AddPredecessor(new_block); + + GetGraph()->AddBlock(new_block); + return new_block; +} + HBasicBlock* HBasicBlock::SplitAfter(HInstruction* cursor) { DCHECK(!cursor->IsControlFlow()); DCHECK_NE(instructions_.last_instruction_, cursor); @@ -1188,6 +1231,15 @@ const HTryBoundary* HBasicBlock::ComputeTryEntryOfSuccessors() const { } } +bool HBasicBlock::HasThrowingInstructions() const { + for (HInstructionIterator it(GetInstructions()); !it.Done(); it.Advance()) { + if (it.Current()->CanThrow()) { + return true; + } + } + return false; +} + static bool HasOnlyOneInstruction(const HBasicBlock& block) { return block.GetPhis().IsEmpty() && !block.GetInstructions().IsEmpty() @@ -1297,16 +1349,25 @@ void HBasicBlock::DisconnectAndDelete() { // instructions. for (HBasicBlock* predecessor : predecessors_) { HInstruction* last_instruction = predecessor->GetLastInstruction(); - predecessor->RemoveInstruction(last_instruction); predecessor->RemoveSuccessor(this); - if (predecessor->GetSuccessors().size() == 1u) { - DCHECK(last_instruction->IsIf()); + uint32_t num_pred_successors = predecessor->GetSuccessors().size(); + if (num_pred_successors == 1u) { + // If we have one successor after removing one, then we must have + // had an HIf or HPackedSwitch, as they have more than one successor. + // Replace those with a HGoto. + DCHECK(last_instruction->IsIf() || last_instruction->IsPackedSwitch()); + predecessor->RemoveInstruction(last_instruction); predecessor->AddInstruction(new (graph_->GetArena()) HGoto(last_instruction->GetDexPc())); - } else { + } else if (num_pred_successors == 0u) { // The predecessor has no remaining successors and therefore must be dead. // We deliberately leave it without a control-flow instruction so that the // SSAChecker fails unless it is not removed during the pass too. - DCHECK_EQ(predecessor->GetSuccessors().size(), 0u); + predecessor->RemoveInstruction(last_instruction); + } else { + // There are multiple successors left. This must come from a HPackedSwitch + // and we are in the middle of removing the HPackedSwitch. Like above, leave + // this alone, and the SSAChecker will fail if it is not removed as well. + DCHECK(last_instruction->IsPackedSwitch()); } } predecessors_.clear(); diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 8dd31bef86..486968cf9e 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -35,7 +35,6 @@ #include "offsets.h" #include "primitive.h" #include "utils/arena_bit_vector.h" -#include "utils/growable_array.h" namespace art { @@ -370,13 +369,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { void SetHasTryCatch(bool value) { has_try_catch_ = value; } private: - void VisitBlockForDominatorTree(HBasicBlock* block, - HBasicBlock* predecessor, - ArenaVector<size_t>* visits); void FindBackEdges(ArenaBitVector* visited); - void VisitBlockForBackEdges(HBasicBlock* block, - ArenaBitVector* visited, - ArenaBitVector* visiting); void RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const; void RemoveDeadBlocks(const ArenaBitVector& visited); @@ -825,11 +818,17 @@ class HBasicBlock : public ArenaObject<kArenaAllocBasicBlock> { return EndsWithTryBoundary() ? 1 : GetSuccessors().size(); } + // Create a new block between this block and its predecessors. The new block + // is added to the graph, all predecessor edges are relinked to it and an edge + // is created to `this`. Returns the new empty block. Reverse post order or + // loop and try/catch information are not updated. + HBasicBlock* CreateImmediateDominator(); + // Split the block into two blocks just before `cursor`. Returns the newly // created, latter block. Note that this method will add the block to the // graph, create a Goto at the end of the former block and will create an edge // between the blocks. It will not, however, update the reverse post order or - // loop information. + // loop and try/catch information. HBasicBlock* SplitBefore(HInstruction* cursor); // Split the block into two blocks just after `cursor`. Returns the newly @@ -940,6 +939,8 @@ class HBasicBlock : public ArenaObject<kArenaAllocBasicBlock> { // the appropriate try entry will be returned. const HTryBoundary* ComputeTryEntryOfSuccessors() const; + bool HasThrowingInstructions() const; + // Returns whether this block dominates the blocked passed as parameter. bool Dominates(HBasicBlock* block) const; @@ -949,7 +950,6 @@ class HBasicBlock : public ArenaObject<kArenaAllocBasicBlock> { void SetLifetimeStart(size_t start) { lifetime_start_ = start; } void SetLifetimeEnd(size_t end) { lifetime_end_ = end; } - bool EndsWithControlFlowInstruction() const; bool EndsWithIf() const; bool EndsWithTryBoundary() const; @@ -1056,6 +1056,7 @@ class HLoopInformationOutwardIterator : public ValueObject { M(NullConstant, Instruction) \ M(NullCheck, Instruction) \ M(Or, BinaryOperation) \ + M(PackedSwitch, Instruction) \ M(ParallelMove, Instruction) \ M(ParameterValue, Instruction) \ M(Phi, Instruction) \ @@ -2402,6 +2403,40 @@ class HCurrentMethod : public HExpression<0> { DISALLOW_COPY_AND_ASSIGN(HCurrentMethod); }; +// PackedSwitch (jump table). A block ending with a PackedSwitch instruction will +// have one successor for each entry in the switch table, and the final successor +// will be the block containing the next Dex opcode. +class HPackedSwitch : public HTemplateInstruction<1> { + public: + HPackedSwitch(int32_t start_value, + uint32_t num_entries, + HInstruction* input, + uint32_t dex_pc = kNoDexPc) + : HTemplateInstruction(SideEffects::None(), dex_pc), + start_value_(start_value), + num_entries_(num_entries) { + SetRawInputAt(0, input); + } + + bool IsControlFlow() const OVERRIDE { return true; } + + int32_t GetStartValue() const { return start_value_; } + + uint32_t GetNumEntries() const { return num_entries_; } + + HBasicBlock* GetDefaultBlock() const { + // Last entry is the default block. + return GetBlock()->GetSuccessor(num_entries_); + } + DECLARE_INSTRUCTION(PackedSwitch); + + private: + const int32_t start_value_; + const uint32_t num_entries_; + + DISALLOW_COPY_AND_ASSIGN(HPackedSwitch); +}; + class HUnaryOperation : public HExpression<1> { public: HUnaryOperation(Primitive::Type result_type, HInstruction* input, uint32_t dex_pc = kNoDexPc) @@ -5020,7 +5055,10 @@ static constexpr size_t kDefaultNumberOfMoves = 4; class HParallelMove : public HTemplateInstruction<0> { public: explicit HParallelMove(ArenaAllocator* arena, uint32_t dex_pc = kNoDexPc) - : HTemplateInstruction(SideEffects::None(), dex_pc), moves_(arena, kDefaultNumberOfMoves) {} + : HTemplateInstruction(SideEffects::None(), dex_pc), + moves_(arena->Adapter(kArenaAllocMoveOperands)) { + moves_.reserve(kDefaultNumberOfMoves); + } void AddMove(Location source, Location destination, @@ -5030,15 +5068,15 @@ class HParallelMove : public HTemplateInstruction<0> { DCHECK(destination.IsValid()); if (kIsDebugBuild) { if (instruction != nullptr) { - for (size_t i = 0, e = moves_.Size(); i < e; ++i) { - if (moves_.Get(i).GetInstruction() == instruction) { + for (const MoveOperands& move : moves_) { + if (move.GetInstruction() == instruction) { // Special case the situation where the move is for the spill slot // of the instruction. if ((GetPrevious() == instruction) || ((GetPrevious() == nullptr) && instruction->IsPhi() && instruction->GetBlock() == GetBlock())) { - DCHECK_NE(destination.GetKind(), moves_.Get(i).GetDestination().GetKind()) + DCHECK_NE(destination.GetKind(), move.GetDestination().GetKind()) << "Doing parallel moves for the same instruction."; } else { DCHECK(false) << "Doing parallel moves for the same instruction."; @@ -5046,26 +5084,27 @@ class HParallelMove : public HTemplateInstruction<0> { } } } - for (size_t i = 0, e = moves_.Size(); i < e; ++i) { - DCHECK(!destination.OverlapsWith(moves_.Get(i).GetDestination())) + for (const MoveOperands& move : moves_) { + DCHECK(!destination.OverlapsWith(move.GetDestination())) << "Overlapped destination for two moves in a parallel move: " - << moves_.Get(i).GetSource() << " ==> " << moves_.Get(i).GetDestination() << " and " + << move.GetSource() << " ==> " << move.GetDestination() << " and " << source << " ==> " << destination; } } - moves_.Add(MoveOperands(source, destination, type, instruction)); + moves_.emplace_back(source, destination, type, instruction); } - MoveOperands* MoveOperandsAt(size_t index) const { - return moves_.GetRawStorage() + index; + MoveOperands* MoveOperandsAt(size_t index) { + DCHECK_LT(index, moves_.size()); + return &moves_[index]; } - size_t NumMoves() const { return moves_.Size(); } + size_t NumMoves() const { return moves_.size(); } DECLARE_INSTRUCTION(ParallelMove); private: - GrowableArray<MoveOperands> moves_; + ArenaVector<MoveOperands> moves_; DISALLOW_COPY_AND_ASSIGN(HParallelMove); }; diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index a2b613194f..dbfbd96e39 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -321,8 +321,7 @@ static const int kMaximumCompilationTimeBeforeWarning = 100; /* ms */ OptimizingCompiler::OptimizingCompiler(CompilerDriver* driver) : Compiler(driver, kMaximumCompilationTimeBeforeWarning), run_optimizations_( - (driver->GetCompilerOptions().GetCompilerFilter() != CompilerOptions::kTime) - && !driver->GetCompilerOptions().GetDebuggable()), + driver->GetCompilerOptions().GetCompilerFilter() != CompilerOptions::kTime), delegate_(Create(driver, Compiler::Kind::kQuick)) {} void OptimizingCompiler::Init() { @@ -575,12 +574,6 @@ CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph, CompilerDriver* compiler_driver, const DexCompilationUnit& dex_compilation_unit, PassObserver* pass_observer) const { - if (graph->HasTryCatch() && graph->IsDebuggable()) { - // TODO: b/24054676, stop creating catch phis eagerly to avoid special cases like phis without - // inputs. - return nullptr; - } - ScopedObjectAccess soa(Thread::Current()); StackHandleScopeCollection handles(soa.Self()); soa.Self()->TransitionFromRunnableToSuspended(kNative); diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc index f9d812f6a6..fce776920d 100644 --- a/compiler/optimizing/parallel_move_resolver.cc +++ b/compiler/optimizing/parallel_move_resolver.cc @@ -16,6 +16,8 @@ #include <iostream> #include "parallel_move_resolver.h" + +#include "base/stl_util.h" #include "nodes.h" namespace art { @@ -28,19 +30,19 @@ void ParallelMoveResolver::BuildInitialMoveList(HParallelMove* parallel_move) { for (size_t i = 0; i < parallel_move->NumMoves(); ++i) { MoveOperands* move = parallel_move->MoveOperandsAt(i); if (!move->IsRedundant()) { - moves_.Add(move); + moves_.push_back(move); } } } void ParallelMoveResolverWithSwap::EmitNativeCode(HParallelMove* parallel_move) { - DCHECK(moves_.IsEmpty()); + DCHECK(moves_.empty()); // Build up a worklist of moves. BuildInitialMoveList(parallel_move); // Move stack/stack slot to take advantage of a free register on constrained machines. - for (size_t i = 0; i < moves_.Size(); ++i) { - const MoveOperands& move = *moves_.Get(i); + for (size_t i = 0; i < moves_.size(); ++i) { + const MoveOperands& move = *moves_[i]; // Ignore constants and moves already eliminated. if (move.IsEliminated() || move.GetSource().IsConstant()) { continue; @@ -52,8 +54,8 @@ void ParallelMoveResolverWithSwap::EmitNativeCode(HParallelMove* parallel_move) } } - for (size_t i = 0; i < moves_.Size(); ++i) { - const MoveOperands& move = *moves_.Get(i); + for (size_t i = 0; i < moves_.size(); ++i) { + const MoveOperands& move = *moves_[i]; // Skip constants to perform them last. They don't block other moves // and skipping such moves with register destinations keeps those // registers free for the whole algorithm. @@ -63,8 +65,8 @@ void ParallelMoveResolverWithSwap::EmitNativeCode(HParallelMove* parallel_move) } // Perform the moves with constant sources. - for (size_t i = 0; i < moves_.Size(); ++i) { - MoveOperands* move = moves_.Get(i); + for (size_t i = 0; i < moves_.size(); ++i) { + MoveOperands* move = moves_[i]; if (!move->IsEliminated()) { DCHECK(move->GetSource().IsConstant()); EmitMove(i); @@ -73,7 +75,7 @@ void ParallelMoveResolverWithSwap::EmitNativeCode(HParallelMove* parallel_move) } } - moves_.Reset(); + moves_.clear(); } Location LowOf(Location location) { @@ -123,7 +125,8 @@ MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) { // which means that a call to PerformMove could change any source operand // in the move graph. - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; DCHECK(!move->IsPending()); if (move->IsRedundant()) { // Because we swap register pairs first, following, un-pending @@ -143,8 +146,8 @@ MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) { // as this one's destination blocks this one so recursively perform all // such moves. MoveOperands* required_swap = nullptr; - for (size_t i = 0; i < moves_.Size(); ++i) { - const MoveOperands& other_move = *moves_.Get(i); + for (size_t i = 0; i < moves_.size(); ++i) { + const MoveOperands& other_move = *moves_[i]; if (other_move.Blocks(destination) && !other_move.IsPending()) { // Though PerformMove can change any source operand in the move graph, // calling `PerformMove` cannot create a blocking move via a swap @@ -163,7 +166,7 @@ MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) { // at the next moves. Swapping is not blocked by anything, it just // updates other moves's source. break; - } else if (required_swap == moves_.Get(i)) { + } else if (required_swap == moves_[i]) { // If `other_move` was swapped, we iterate again to find a new // potential cycle. required_swap = nullptr; @@ -171,7 +174,7 @@ MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) { } else if (required_swap != nullptr) { // A move is required to swap. We walk back the cycle to find the // move by just returning from this `PerforrmMove`. - moves_.Get(index)->ClearPending(destination); + moves_[index]->ClearPending(destination); return required_swap; } } @@ -197,14 +200,13 @@ MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) { DCHECK_EQ(required_swap, move); do_swap = true; } else { - for (size_t i = 0; i < moves_.Size(); ++i) { - const MoveOperands& other_move = *moves_.Get(i); - if (other_move.Blocks(destination)) { - DCHECK(other_move.IsPending()); - if (!move->Is64BitMove() && other_move.Is64BitMove()) { + for (MoveOperands* other_move : moves_) { + if (other_move->Blocks(destination)) { + DCHECK(other_move->IsPending()); + if (!move->Is64BitMove() && other_move->Is64BitMove()) { // We swap 64bits moves before swapping 32bits moves. Go back from the // cycle by returning the move that must be swapped. - return moves_.Get(i); + return other_move; } do_swap = true; break; @@ -220,12 +222,11 @@ MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) { Location source = move->GetSource(); Location swap_destination = move->GetDestination(); move->Eliminate(); - for (size_t i = 0; i < moves_.Size(); ++i) { - const MoveOperands& other_move = *moves_.Get(i); - if (other_move.Blocks(source)) { - UpdateSourceOf(moves_.Get(i), source, swap_destination); - } else if (other_move.Blocks(swap_destination)) { - UpdateSourceOf(moves_.Get(i), swap_destination, source); + for (MoveOperands* other_move : moves_) { + if (other_move->Blocks(source)) { + UpdateSourceOf(other_move, source, swap_destination); + } else if (other_move->Blocks(swap_destination)) { + UpdateSourceOf(other_move, swap_destination, source); } } // If the swap was required because of a 64bits move in the middle of a cycle, @@ -242,14 +243,14 @@ MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) { } bool ParallelMoveResolverWithSwap::IsScratchLocation(Location loc) { - for (size_t i = 0; i < moves_.Size(); ++i) { - if (moves_.Get(i)->Blocks(loc)) { + for (MoveOperands* move : moves_) { + if (move->Blocks(loc)) { return false; } } - for (size_t i = 0; i < moves_.Size(); ++i) { - if (moves_.Get(i)->GetDestination().Equals(loc)) { + for (MoveOperands* move : moves_) { + if (move->GetDestination().Equals(loc)) { return true; } } @@ -302,8 +303,8 @@ ParallelMoveResolverWithSwap::ScratchRegisterScope::~ScratchRegisterScope() { void ParallelMoveResolverNoSwap::EmitNativeCode(HParallelMove* parallel_move) { DCHECK_EQ(GetNumberOfPendingMoves(), 0u); - DCHECK(moves_.IsEmpty()); - DCHECK(scratches_.IsEmpty()); + DCHECK(moves_.empty()); + DCHECK(scratches_.empty()); // Backend dependent initialization. PrepareForEmitNativeCode(); @@ -311,8 +312,8 @@ void ParallelMoveResolverNoSwap::EmitNativeCode(HParallelMove* parallel_move) { // Build up a worklist of moves. BuildInitialMoveList(parallel_move); - for (size_t i = 0; i < moves_.Size(); ++i) { - const MoveOperands& move = *moves_.Get(i); + for (size_t i = 0; i < moves_.size(); ++i) { + const MoveOperands& move = *moves_[i]; // Skip constants to perform them last. They don't block other moves and // skipping such moves with register destinations keeps those registers // free for the whole algorithm. @@ -324,8 +325,8 @@ void ParallelMoveResolverNoSwap::EmitNativeCode(HParallelMove* parallel_move) { // Perform the moves with constant sources and register destinations with UpdateMoveSource() // to reduce the number of literal loads. Stack destinations are skipped since we won't be benefit // from changing the constant sources to stack locations. - for (size_t i = 0; i < moves_.Size(); ++i) { - MoveOperands* move = moves_.Get(i); + for (size_t i = 0; i < moves_.size(); ++i) { + MoveOperands* move = moves_[i]; Location destination = move->GetDestination(); if (!move->IsEliminated() && !destination.IsStackSlot() && !destination.IsDoubleStackSlot()) { Location source = move->GetSource(); @@ -344,8 +345,8 @@ void ParallelMoveResolverNoSwap::EmitNativeCode(HParallelMove* parallel_move) { } // Perform the rest of the moves. - for (size_t i = 0; i < moves_.Size(); ++i) { - MoveOperands* move = moves_.Get(i); + for (size_t i = 0; i < moves_.size(); ++i) { + MoveOperands* move = moves_[i]; if (!move->IsEliminated()) { EmitMove(i); move->Eliminate(); @@ -358,19 +359,18 @@ void ParallelMoveResolverNoSwap::EmitNativeCode(HParallelMove* parallel_move) { // Backend dependent cleanup. FinishEmitNativeCode(); - moves_.Reset(); - scratches_.Reset(); + moves_.clear(); + scratches_.clear(); } Location ParallelMoveResolverNoSwap::GetScratchLocation(Location::Kind kind) { - for (size_t i = 0; i < scratches_.Size(); ++i) { - Location loc = scratches_.Get(i); + for (Location loc : scratches_) { if (loc.GetKind() == kind && !IsBlockedByMoves(loc)) { return loc; } } - for (size_t i = 0; i < moves_.Size(); ++i) { - Location loc = moves_.Get(i)->GetDestination(); + for (MoveOperands* move : moves_) { + Location loc = move->GetDestination(); if (loc.GetKind() == kind && !IsBlockedByMoves(loc)) { return loc; } @@ -380,18 +380,18 @@ Location ParallelMoveResolverNoSwap::GetScratchLocation(Location::Kind kind) { void ParallelMoveResolverNoSwap::AddScratchLocation(Location loc) { if (kIsDebugBuild) { - for (size_t i = 0; i < scratches_.Size(); ++i) { - DCHECK(!loc.Equals(scratches_.Get(i))); + for (Location scratch : scratches_) { + CHECK(!loc.Equals(scratch)); } } - scratches_.Add(loc); + scratches_.push_back(loc); } void ParallelMoveResolverNoSwap::RemoveScratchLocation(Location loc) { DCHECK(!IsBlockedByMoves(loc)); - for (size_t i = 0; i < scratches_.Size(); ++i) { - if (loc.Equals(scratches_.Get(i))) { - scratches_.DeleteAt(i); + for (auto it = scratches_.begin(), end = scratches_.end(); it != end; ++it) { + if (loc.Equals(*it)) { + scratches_.erase(it); break; } } @@ -406,7 +406,8 @@ void ParallelMoveResolverNoSwap::PerformMove(size_t index) { // we will update source operand in the move graph to reduce dependencies in // the graph. - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; DCHECK(!move->IsPending()); DCHECK(!move->IsEliminated()); if (move->IsRedundant()) { @@ -433,8 +434,8 @@ void ParallelMoveResolverNoSwap::PerformMove(size_t index) { // dependencies. Any unperformed, unpending move with a source the same // as this one's destination blocks this one so recursively perform all // such moves. - for (size_t i = 0; i < moves_.Size(); ++i) { - const MoveOperands& other_move = *moves_.Get(i); + for (size_t i = 0; i < moves_.size(); ++i) { + const MoveOperands& other_move = *moves_[i]; if (other_move.Blocks(destination) && !other_move.IsPending()) { PerformMove(i); } @@ -490,8 +491,11 @@ void ParallelMoveResolverNoSwap::PerformMove(size_t index) { move->Eliminate(); UpdateMoveSource(pending_source, pending_destination); // Free any unblocked locations in the scratch location list. - for (size_t i = 0; i < scratches_.Size(); ++i) { - Location scratch = scratches_.Get(i); + // Note: Fetch size() on each iteration because scratches_ can be modified inside the loop. + // FIXME: If FreeScratchLocation() removes the location from scratches_, + // we skip the next location. This happens for arm64. + for (size_t i = 0; i < scratches_.size(); ++i) { + Location scratch = scratches_[i]; // Only scratch overlapping with performed move source can be unblocked. if (scratch.OverlapsWith(pending_source) && !IsBlockedByMoves(scratch)) { FreeScratchLocation(pending_source); @@ -512,8 +516,7 @@ void ParallelMoveResolverNoSwap::UpdateMoveSource(Location from, Location to) { // This is not something we must do, but we can use fewer scratch locations with // this trick. For example, we can avoid using additional scratch locations for // moves (0 -> 1), (1 -> 2), (1 -> 0). - for (size_t i = 0; i < moves_.Size(); ++i) { - MoveOperands* move = moves_.Get(i); + for (MoveOperands* move : moves_) { if (move->GetSource().Equals(from)) { move->SetSource(to); } @@ -522,16 +525,15 @@ void ParallelMoveResolverNoSwap::UpdateMoveSource(Location from, Location to) { void ParallelMoveResolverNoSwap::AddPendingMove(Location source, Location destination, Primitive::Type type) { - pending_moves_.Add(new (allocator_) MoveOperands(source, destination, type, nullptr)); + pending_moves_.push_back(new (allocator_) MoveOperands(source, destination, type, nullptr)); } void ParallelMoveResolverNoSwap::DeletePendingMove(MoveOperands* move) { - pending_moves_.Delete(move); + RemoveElement(pending_moves_, move); } MoveOperands* ParallelMoveResolverNoSwap::GetUnblockedPendingMove(Location loc) { - for (size_t i = 0; i < pending_moves_.Size(); ++i) { - MoveOperands* move = pending_moves_.Get(i); + for (MoveOperands* move : pending_moves_) { Location destination = move->GetDestination(); // Only moves with destination overlapping with input loc can be unblocked. if (destination.OverlapsWith(loc) && !IsBlockedByMoves(destination)) { @@ -542,13 +544,13 @@ MoveOperands* ParallelMoveResolverNoSwap::GetUnblockedPendingMove(Location loc) } bool ParallelMoveResolverNoSwap::IsBlockedByMoves(Location loc) { - for (size_t i = 0; i < pending_moves_.Size(); ++i) { - if (pending_moves_.Get(i)->Blocks(loc)) { + for (MoveOperands* move : pending_moves_) { + if (move->Blocks(loc)) { return true; } } - for (size_t i = 0; i < moves_.Size(); ++i) { - if (moves_.Get(i)->Blocks(loc)) { + for (MoveOperands* move : moves_) { + if (move->Blocks(loc)) { return true; } } @@ -558,7 +560,7 @@ bool ParallelMoveResolverNoSwap::IsBlockedByMoves(Location loc) { // So far it is only used for debugging purposes to make sure all pending moves // have been performed. size_t ParallelMoveResolverNoSwap::GetNumberOfPendingMoves() { - return pending_moves_.Size(); + return pending_moves_.size(); } } // namespace art diff --git a/compiler/optimizing/parallel_move_resolver.h b/compiler/optimizing/parallel_move_resolver.h index 9ede91013e..4278861690 100644 --- a/compiler/optimizing/parallel_move_resolver.h +++ b/compiler/optimizing/parallel_move_resolver.h @@ -17,8 +17,8 @@ #ifndef ART_COMPILER_OPTIMIZING_PARALLEL_MOVE_RESOLVER_H_ #define ART_COMPILER_OPTIMIZING_PARALLEL_MOVE_RESOLVER_H_ +#include "base/arena_containers.h" #include "base/value_object.h" -#include "utils/growable_array.h" #include "locations.h" #include "primitive.h" @@ -31,7 +31,10 @@ class MoveOperands; // have their own subclass that implements corresponding virtual functions. class ParallelMoveResolver : public ValueObject { public: - explicit ParallelMoveResolver(ArenaAllocator* allocator) : moves_(allocator, 32) {} + explicit ParallelMoveResolver(ArenaAllocator* allocator) + : moves_(allocator->Adapter(kArenaAllocParallelMoveResolver)) { + moves_.reserve(32); + } virtual ~ParallelMoveResolver() {} // Resolve a set of parallel moves, emitting assembler instructions. @@ -41,7 +44,7 @@ class ParallelMoveResolver : public ValueObject { // Build the initial list of moves. void BuildInitialMoveList(HParallelMove* parallel_move); - GrowableArray<MoveOperands*> moves_; + ArenaVector<MoveOperands*> moves_; private: DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolver); @@ -120,8 +123,13 @@ class ParallelMoveResolverWithSwap : public ParallelMoveResolver { class ParallelMoveResolverNoSwap : public ParallelMoveResolver { public: explicit ParallelMoveResolverNoSwap(ArenaAllocator* allocator) - : ParallelMoveResolver(allocator), scratches_(allocator, 32), - pending_moves_(allocator, 8), allocator_(allocator) {} + : ParallelMoveResolver(allocator), + scratches_(allocator->Adapter(kArenaAllocParallelMoveResolver)), + pending_moves_(allocator->Adapter(kArenaAllocParallelMoveResolver)), + allocator_(allocator) { + scratches_.reserve(32); + pending_moves_.reserve(8); + } virtual ~ParallelMoveResolverNoSwap() {} // Resolve a set of parallel moves, emitting assembler instructions. @@ -160,7 +168,7 @@ class ParallelMoveResolverNoSwap : public ParallelMoveResolver { void RemoveScratchLocation(Location loc); // List of scratch locations. - GrowableArray<Location> scratches_; + ArenaVector<Location> scratches_; private: // Perform the move at the given index in `moves_` (possibly requiring other moves to satisfy @@ -183,7 +191,7 @@ class ParallelMoveResolverNoSwap : public ParallelMoveResolver { size_t GetNumberOfPendingMoves(); // Additional pending moves which might be added to resolve dependency cycle. - GrowableArray<MoveOperands*> pending_moves_; + ArenaVector<MoveOperands*> pending_moves_; // Used to allocate pending MoveOperands. ArenaAllocator* const allocator_; diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc index f8f70105cf..da91cb811d 100644 --- a/compiler/optimizing/parallel_move_test.cc +++ b/compiler/optimizing/parallel_move_test.cc @@ -56,7 +56,8 @@ class TestParallelMoveResolverWithSwap : public ParallelMoveResolverWithSwap { : ParallelMoveResolverWithSwap(allocator) {} void EmitMove(size_t index) OVERRIDE { - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; if (!message_.str().empty()) { message_ << " "; } @@ -68,7 +69,8 @@ class TestParallelMoveResolverWithSwap : public ParallelMoveResolverWithSwap { } void EmitSwap(size_t index) OVERRIDE { - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; if (!message_.str().empty()) { message_ << " "; } @@ -127,7 +129,8 @@ class TestParallelMoveResolverNoSwap : public ParallelMoveResolverNoSwap { void FreeScratchLocation(Location loc ATTRIBUTE_UNUSED) OVERRIDE {} void EmitMove(size_t index) OVERRIDE { - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; if (!message_.str().empty()) { message_ << " "; } diff --git a/compiler/optimizing/primitive_type_propagation.cc b/compiler/optimizing/primitive_type_propagation.cc index af93438c9a..c98f43e461 100644 --- a/compiler/optimizing/primitive_type_propagation.cc +++ b/compiler/optimizing/primitive_type_propagation.cc @@ -108,8 +108,9 @@ void PrimitiveTypePropagation::VisitBasicBlock(HBasicBlock* block) { } void PrimitiveTypePropagation::ProcessWorklist() { - while (!worklist_.IsEmpty()) { - HPhi* instruction = worklist_.Pop(); + while (!worklist_.empty()) { + HPhi* instruction = worklist_.back(); + worklist_.pop_back(); if (UpdateType(instruction)) { AddDependentInstructionsToWorklist(instruction); } @@ -118,7 +119,7 @@ void PrimitiveTypePropagation::ProcessWorklist() { void PrimitiveTypePropagation::AddToWorklist(HPhi* instruction) { DCHECK(instruction->IsLive()); - worklist_.Add(instruction); + worklist_.push_back(instruction); } void PrimitiveTypePropagation::AddDependentInstructionsToWorklist(HInstruction* instruction) { diff --git a/compiler/optimizing/primitive_type_propagation.h b/compiler/optimizing/primitive_type_propagation.h index 6d370ed2ab..212fcfc69f 100644 --- a/compiler/optimizing/primitive_type_propagation.h +++ b/compiler/optimizing/primitive_type_propagation.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_PRIMITIVE_TYPE_PROPAGATION_H_ #define ART_COMPILER_OPTIMIZING_PRIMITIVE_TYPE_PROPAGATION_H_ +#include "base/arena_containers.h" #include "nodes.h" namespace art { @@ -25,7 +26,9 @@ namespace art { class PrimitiveTypePropagation : public ValueObject { public: explicit PrimitiveTypePropagation(HGraph* graph) - : graph_(graph), worklist_(graph->GetArena(), kDefaultWorklistSize) {} + : graph_(graph), worklist_(graph->GetArena()->Adapter(kArenaAllocPrimitiveTypePropagation)) { + worklist_.reserve(kDefaultWorklistSize); + } void Run(); @@ -37,7 +40,7 @@ class PrimitiveTypePropagation : public ValueObject { bool UpdateType(HPhi* phi); HGraph* const graph_; - GrowableArray<HPhi*> worklist_; + ArenaVector<HPhi*> worklist_; static constexpr size_t kDefaultWorklistSize = 8; diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc index a88c5431c5..fe837e4545 100644 --- a/compiler/optimizing/reference_type_propagation.cc +++ b/compiler/optimizing/reference_type_propagation.cc @@ -27,7 +27,7 @@ class RTPVisitor : public HGraphDelegateVisitor { public: RTPVisitor(HGraph* graph, StackHandleScopeCollection* handles, - GrowableArray<HInstruction*>* worklist, + ArenaVector<HInstruction*>* worklist, ReferenceTypeInfo::TypeHandle object_class_handle, ReferenceTypeInfo::TypeHandle class_class_handle, ReferenceTypeInfo::TypeHandle string_class_handle, @@ -68,7 +68,7 @@ class RTPVisitor : public HGraphDelegateVisitor { ReferenceTypeInfo::TypeHandle class_class_handle_; ReferenceTypeInfo::TypeHandle string_class_handle_; ReferenceTypeInfo::TypeHandle throwable_class_handle_; - GrowableArray<HInstruction*>* worklist_; + ArenaVector<HInstruction*>* worklist_; static constexpr size_t kDefaultWorklistSize = 8; }; @@ -78,7 +78,8 @@ ReferenceTypePropagation::ReferenceTypePropagation(HGraph* graph, const char* name) : HOptimization(graph, name), handles_(handles), - worklist_(graph->GetArena(), kDefaultWorklistSize) { + worklist_(graph->GetArena()->Adapter(kArenaAllocReferenceTypePropagation)) { + worklist_.reserve(kDefaultWorklistSize); // Mutator lock is required for NewHandle, but annotalysis ignores constructors. ScopedObjectAccess soa(Thread::Current()); ClassLinker* linker = Runtime::Current()->GetClassLinker(); @@ -649,7 +650,7 @@ void RTPVisitor::VisitArrayGet(HArrayGet* instr) { ScopedObjectAccess soa(Thread::Current()); UpdateArrayGet(instr, handles_, object_class_handle_); if (!instr->GetReferenceTypeInfo().IsValid()) { - worklist_->Add(instr); + worklist_->push_back(instr); } } @@ -718,8 +719,9 @@ bool ReferenceTypePropagation::UpdateNullability(HInstruction* instr) { } void ReferenceTypePropagation::ProcessWorklist() { - while (!worklist_.IsEmpty()) { - HInstruction* instruction = worklist_.Pop(); + while (!worklist_.empty()) { + HInstruction* instruction = worklist_.back(); + worklist_.pop_back(); if (UpdateNullability(instruction) || UpdateReferenceTypeInfo(instruction)) { AddDependentInstructionsToWorklist(instruction); } @@ -729,7 +731,7 @@ void ReferenceTypePropagation::ProcessWorklist() { void ReferenceTypePropagation::AddToWorklist(HInstruction* instruction) { DCHECK_EQ(instruction->GetType(), Primitive::kPrimNot) << instruction->DebugName() << ":" << instruction->GetType(); - worklist_.Add(instruction); + worklist_.push_back(instruction); } void ReferenceTypePropagation::AddDependentInstructionsToWorklist(HInstruction* instruction) { diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h index 62f6ab80b3..5493601adc 100644 --- a/compiler/optimizing/reference_type_propagation.h +++ b/compiler/optimizing/reference_type_propagation.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_REFERENCE_TYPE_PROPAGATION_H_ #define ART_COMPILER_OPTIMIZING_REFERENCE_TYPE_PROPAGATION_H_ +#include "base/arena_containers.h" #include "driver/dex_compilation_unit.h" #include "handle_scope-inl.h" #include "nodes.h" @@ -57,7 +58,7 @@ class ReferenceTypePropagation : public HOptimization { StackHandleScopeCollection* handles_; - GrowableArray<HInstruction*> worklist_; + ArenaVector<HInstruction*> worklist_; ReferenceTypeInfo::TypeHandle object_class_handle_; ReferenceTypeInfo::TypeHandle class_class_handle_; diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index a4f1f458fd..9cdb89b7b3 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -43,21 +43,21 @@ RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator, : allocator_(allocator), codegen_(codegen), liveness_(liveness), - unhandled_core_intervals_(allocator, 0), - unhandled_fp_intervals_(allocator, 0), + unhandled_core_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)), + unhandled_fp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)), unhandled_(nullptr), - handled_(allocator, 0), - active_(allocator, 0), - inactive_(allocator, 0), - physical_core_register_intervals_(allocator, codegen->GetNumberOfCoreRegisters()), - physical_fp_register_intervals_(allocator, codegen->GetNumberOfFloatingPointRegisters()), - temp_intervals_(allocator, 4), - int_spill_slots_(allocator, kDefaultNumberOfSpillSlots), - long_spill_slots_(allocator, kDefaultNumberOfSpillSlots), - float_spill_slots_(allocator, kDefaultNumberOfSpillSlots), - double_spill_slots_(allocator, kDefaultNumberOfSpillSlots), + handled_(allocator->Adapter(kArenaAllocRegisterAllocator)), + active_(allocator->Adapter(kArenaAllocRegisterAllocator)), + inactive_(allocator->Adapter(kArenaAllocRegisterAllocator)), + physical_core_register_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)), + physical_fp_register_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)), + temp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)), + int_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)), + long_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)), + float_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)), + double_spill_slots_(allocator->Adapter(kArenaAllocRegisterAllocator)), catch_phi_spill_slots_(0), - safepoints_(allocator, 0), + safepoints_(allocator->Adapter(kArenaAllocRegisterAllocator)), processing_core_registers_(false), number_of_registers_(-1), registers_array_(nullptr), @@ -66,10 +66,16 @@ RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator, reserved_out_slots_(0), maximum_number_of_live_core_registers_(0), maximum_number_of_live_fp_registers_(0) { + temp_intervals_.reserve(4); + int_spill_slots_.reserve(kDefaultNumberOfSpillSlots); + long_spill_slots_.reserve(kDefaultNumberOfSpillSlots); + float_spill_slots_.reserve(kDefaultNumberOfSpillSlots); + double_spill_slots_.reserve(kDefaultNumberOfSpillSlots); + static constexpr bool kIsBaseline = false; codegen->SetupBlockedRegisters(kIsBaseline); - physical_core_register_intervals_.SetSize(codegen->GetNumberOfCoreRegisters()); - physical_fp_register_intervals_.SetSize(codegen->GetNumberOfFloatingPointRegisters()); + physical_core_register_intervals_.resize(codegen->GetNumberOfCoreRegisters(), nullptr); + physical_fp_register_intervals_.resize(codegen->GetNumberOfFloatingPointRegisters(), nullptr); // Always reserve for the current method and the graph's max out registers. // TODO: compute it instead. // ArtMethod* takes 2 vregs for 64 bits. @@ -129,17 +135,17 @@ void RegisterAllocator::BlockRegister(Location location, size_t start, size_t en int reg = location.reg(); DCHECK(location.IsRegister() || location.IsFpuRegister()); LiveInterval* interval = location.IsRegister() - ? physical_core_register_intervals_.Get(reg) - : physical_fp_register_intervals_.Get(reg); + ? physical_core_register_intervals_[reg] + : physical_fp_register_intervals_[reg]; Primitive::Type type = location.IsRegister() ? Primitive::kPrimInt : Primitive::kPrimFloat; if (interval == nullptr) { interval = LiveInterval::MakeFixedInterval(allocator_, reg, type); if (location.IsRegister()) { - physical_core_register_intervals_.Put(reg, interval); + physical_core_register_intervals_[reg] = interval; } else { - physical_fp_register_intervals_.Put(reg, interval); + physical_fp_register_intervals_[reg] = interval; } } DCHECK(interval->GetRegister() == reg); @@ -181,37 +187,37 @@ void RegisterAllocator::AllocateRegistersInternal() { } number_of_registers_ = codegen_->GetNumberOfCoreRegisters(); - registers_array_ = allocator_->AllocArray<size_t>(number_of_registers_); + registers_array_ = allocator_->AllocArray<size_t>(number_of_registers_, + kArenaAllocRegisterAllocator); processing_core_registers_ = true; unhandled_ = &unhandled_core_intervals_; - for (size_t i = 0, e = physical_core_register_intervals_.Size(); i < e; ++i) { - LiveInterval* fixed = physical_core_register_intervals_.Get(i); + for (LiveInterval* fixed : physical_core_register_intervals_) { if (fixed != nullptr) { // Fixed interval is added to inactive_ instead of unhandled_. // It's also the only type of inactive interval whose start position // can be after the current interval during linear scan. // Fixed interval is never split and never moves to unhandled_. - inactive_.Add(fixed); + inactive_.push_back(fixed); } } LinearScan(); - inactive_.Reset(); - active_.Reset(); - handled_.Reset(); + inactive_.clear(); + active_.clear(); + handled_.clear(); number_of_registers_ = codegen_->GetNumberOfFloatingPointRegisters(); - registers_array_ = allocator_->AllocArray<size_t>(number_of_registers_); + registers_array_ = allocator_->AllocArray<size_t>(number_of_registers_, + kArenaAllocRegisterAllocator); processing_core_registers_ = false; unhandled_ = &unhandled_fp_intervals_; - for (size_t i = 0, e = physical_fp_register_intervals_.Size(); i < e; ++i) { - LiveInterval* fixed = physical_fp_register_intervals_.Get(i); + for (LiveInterval* fixed : physical_fp_register_intervals_) { if (fixed != nullptr) { // Fixed interval is added to inactive_ instead of unhandled_. // It's also the only type of inactive interval whose start position // can be after the current interval during linear scan. // Fixed interval is never split and never moves to unhandled_. - inactive_.Add(fixed); + inactive_.push_back(fixed); } } LinearScan(); @@ -236,24 +242,24 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) { case Location::kRequiresRegister: { LiveInterval* interval = LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimInt); - temp_intervals_.Add(interval); + temp_intervals_.push_back(interval); interval->AddTempUse(instruction, i); - unhandled_core_intervals_.Add(interval); + unhandled_core_intervals_.push_back(interval); break; } case Location::kRequiresFpuRegister: { LiveInterval* interval = LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimDouble); - temp_intervals_.Add(interval); + temp_intervals_.push_back(interval); interval->AddTempUse(instruction, i); if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) { interval->AddHighInterval(/* is_temp */ true); LiveInterval* high = interval->GetHighInterval(); - temp_intervals_.Add(high); - unhandled_fp_intervals_.Add(high); + temp_intervals_.push_back(high); + unhandled_fp_intervals_.push_back(high); } - unhandled_fp_intervals_.Add(interval); + unhandled_fp_intervals_.push_back(interval); break; } @@ -276,7 +282,7 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) { instruction->GetBlock()->RemoveInstruction(instruction); return; } - safepoints_.Add(instruction); + safepoints_.push_back(instruction); if (locations->OnlyCallsOnSlowPath()) { // We add a synthesized range at this position to record the live registers // at this position. Ideally, we could just update the safepoints when locations @@ -310,28 +316,28 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) { LiveInterval* current = instruction->GetLiveInterval(); if (current == nullptr) return; - GrowableArray<LiveInterval*>& unhandled = core_register + ArenaVector<LiveInterval*>& unhandled = core_register ? unhandled_core_intervals_ : unhandled_fp_intervals_; - DCHECK(unhandled.IsEmpty() || current->StartsBeforeOrAt(unhandled.Peek())); + DCHECK(unhandled.empty() || current->StartsBeforeOrAt(unhandled.back())); if (codegen_->NeedsTwoRegisters(current->GetType())) { current->AddHighInterval(); } - for (size_t safepoint_index = safepoints_.Size(); safepoint_index > 0; --safepoint_index) { - HInstruction* safepoint = safepoints_.Get(safepoint_index - 1); + for (size_t safepoint_index = safepoints_.size(); safepoint_index > 0; --safepoint_index) { + HInstruction* safepoint = safepoints_[safepoint_index - 1u]; size_t safepoint_position = safepoint->GetLifetimePosition(); // Test that safepoints are ordered in the optimal way. - DCHECK(safepoint_index == safepoints_.Size() - || safepoints_.Get(safepoint_index)->GetLifetimePosition() < safepoint_position); + DCHECK(safepoint_index == safepoints_.size() || + safepoints_[safepoint_index]->GetLifetimePosition() < safepoint_position); if (safepoint_position == current->GetStart()) { // The safepoint is for this instruction, so the location of the instruction // does not need to be saved. - DCHECK_EQ(safepoint_index, safepoints_.Size()); + DCHECK_EQ(safepoint_index, safepoints_.size()); DCHECK_EQ(safepoint, instruction); continue; } else if (current->IsDeadAt(safepoint_position)) { @@ -437,34 +443,26 @@ class AllRangesIterator : public ValueObject { bool RegisterAllocator::ValidateInternal(bool log_fatal_on_failure) const { // To simplify unit testing, we eagerly create the array of intervals, and // call the helper method. - GrowableArray<LiveInterval*> intervals(allocator_, 0); + ArenaVector<LiveInterval*> intervals(allocator_->Adapter(kArenaAllocRegisterAllocator)); for (size_t i = 0; i < liveness_.GetNumberOfSsaValues(); ++i) { HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i); if (ShouldProcess(processing_core_registers_, instruction->GetLiveInterval())) { - intervals.Add(instruction->GetLiveInterval()); + intervals.push_back(instruction->GetLiveInterval()); } } - if (processing_core_registers_) { - for (size_t i = 0, e = physical_core_register_intervals_.Size(); i < e; ++i) { - LiveInterval* fixed = physical_core_register_intervals_.Get(i); - if (fixed != nullptr) { - intervals.Add(fixed); - } - } - } else { - for (size_t i = 0, e = physical_fp_register_intervals_.Size(); i < e; ++i) { - LiveInterval* fixed = physical_fp_register_intervals_.Get(i); - if (fixed != nullptr) { - intervals.Add(fixed); - } + const ArenaVector<LiveInterval*>* physical_register_intervals = processing_core_registers_ + ? &physical_core_register_intervals_ + : &physical_fp_register_intervals_; + for (LiveInterval* fixed : *physical_register_intervals) { + if (fixed != nullptr) { + intervals.push_back(fixed); } } - for (size_t i = 0, e = temp_intervals_.Size(); i < e; ++i) { - LiveInterval* temp = temp_intervals_.Get(i); + for (LiveInterval* temp : temp_intervals_) { if (ShouldProcess(processing_core_registers_, temp)) { - intervals.Add(temp); + intervals.push_back(temp); } } @@ -472,7 +470,7 @@ bool RegisterAllocator::ValidateInternal(bool log_fatal_on_failure) const { allocator_, processing_core_registers_, log_fatal_on_failure); } -bool RegisterAllocator::ValidateIntervals(const GrowableArray<LiveInterval*>& intervals, +bool RegisterAllocator::ValidateIntervals(const ArenaVector<LiveInterval*>& intervals, size_t number_of_spill_slots, size_t number_of_out_slots, const CodeGenerator& codegen, @@ -482,26 +480,27 @@ bool RegisterAllocator::ValidateIntervals(const GrowableArray<LiveInterval*>& in size_t number_of_registers = processing_core_registers ? codegen.GetNumberOfCoreRegisters() : codegen.GetNumberOfFloatingPointRegisters(); - GrowableArray<ArenaBitVector*> liveness_of_values( - allocator, number_of_registers + number_of_spill_slots); + ArenaVector<ArenaBitVector*> liveness_of_values( + allocator->Adapter(kArenaAllocRegisterAllocator)); + liveness_of_values.reserve(number_of_registers + number_of_spill_slots); // Allocate a bit vector per register. A live interval that has a register // allocated will populate the associated bit vector based on its live ranges. for (size_t i = 0; i < number_of_registers + number_of_spill_slots; ++i) { - liveness_of_values.Add(new (allocator) ArenaBitVector(allocator, 0, true)); + liveness_of_values.push_back(new (allocator) ArenaBitVector(allocator, 0, true)); } - for (size_t i = 0, e = intervals.Size(); i < e; ++i) { - for (AllRangesIterator it(intervals.Get(i)); !it.Done(); it.Advance()) { + for (LiveInterval* start_interval : intervals) { + for (AllRangesIterator it(start_interval); !it.Done(); it.Advance()) { LiveInterval* current = it.CurrentInterval(); HInstruction* defined_by = current->GetParent()->GetDefinedBy(); if (current->GetParent()->HasSpillSlot() // Parameters and current method have their own stack slot. && !(defined_by != nullptr && (defined_by->IsParameterValue() || defined_by->IsCurrentMethod()))) { - BitVector* liveness_of_spill_slot = liveness_of_values.Get(number_of_registers + BitVector* liveness_of_spill_slot = liveness_of_values[number_of_registers + current->GetParent()->GetSpillSlot() / kVRegSize - - number_of_out_slots); + - number_of_out_slots]; for (size_t j = it.CurrentRange()->GetStart(); j < it.CurrentRange()->GetEnd(); ++j) { if (liveness_of_spill_slot->IsBitSet(j)) { if (log_fatal_on_failure) { @@ -523,7 +522,7 @@ bool RegisterAllocator::ValidateIntervals(const GrowableArray<LiveInterval*>& in // and test code may not properly fill the right information to the code generator. CHECK(codegen.HasAllocatedRegister(processing_core_registers, current->GetRegister())); } - BitVector* liveness_of_register = liveness_of_values.Get(current->GetRegister()); + BitVector* liveness_of_register = liveness_of_values[current->GetRegister()]; for (size_t j = it.CurrentRange()->GetStart(); j < it.CurrentRange()->GetEnd(); ++j) { if (liveness_of_register->IsBitSet(j)) { if (current->IsUsingInputRegister() && current->CanUseInputRegister()) { @@ -572,93 +571,101 @@ void RegisterAllocator::DumpInterval(std::ostream& stream, LiveInterval* interva void RegisterAllocator::DumpAllIntervals(std::ostream& stream) const { stream << "inactive: " << std::endl; - for (size_t i = 0; i < inactive_.Size(); i ++) { - DumpInterval(stream, inactive_.Get(i)); + for (LiveInterval* inactive_interval : inactive_) { + DumpInterval(stream, inactive_interval); } stream << "active: " << std::endl; - for (size_t i = 0; i < active_.Size(); i ++) { - DumpInterval(stream, active_.Get(i)); + for (LiveInterval* active_interval : active_) { + DumpInterval(stream, active_interval); } stream << "unhandled: " << std::endl; auto unhandled = (unhandled_ != nullptr) ? unhandled_ : &unhandled_core_intervals_; - for (size_t i = 0; i < unhandled->Size(); i ++) { - DumpInterval(stream, unhandled->Get(i)); + for (LiveInterval* unhandled_interval : *unhandled) { + DumpInterval(stream, unhandled_interval); } stream << "handled: " << std::endl; - for (size_t i = 0; i < handled_.Size(); i ++) { - DumpInterval(stream, handled_.Get(i)); + for (LiveInterval* handled_interval : handled_) { + DumpInterval(stream, handled_interval); } } // By the book implementation of a linear scan register allocator. void RegisterAllocator::LinearScan() { - while (!unhandled_->IsEmpty()) { + while (!unhandled_->empty()) { // (1) Remove interval with the lowest start position from unhandled. - LiveInterval* current = unhandled_->Pop(); + LiveInterval* current = unhandled_->back(); + unhandled_->pop_back(); // Make sure the interval is an expected state. DCHECK(!current->IsFixed() && !current->HasSpillSlot()); // Make sure we are going in the right order. - DCHECK(unhandled_->IsEmpty() || unhandled_->Peek()->GetStart() >= current->GetStart()); + DCHECK(unhandled_->empty() || unhandled_->back()->GetStart() >= current->GetStart()); // Make sure a low interval is always with a high. - DCHECK(!current->IsLowInterval() || unhandled_->Peek()->IsHighInterval()); + DCHECK(!current->IsLowInterval() || unhandled_->back()->IsHighInterval()); // Make sure a high interval is always with a low. DCHECK(current->IsLowInterval() || - unhandled_->IsEmpty() || - !unhandled_->Peek()->IsHighInterval()); + unhandled_->empty() || + !unhandled_->back()->IsHighInterval()); size_t position = current->GetStart(); // Remember the inactive_ size here since the ones moved to inactive_ from // active_ below shouldn't need to be re-checked. - size_t inactive_intervals_to_handle = inactive_.Size(); + size_t inactive_intervals_to_handle = inactive_.size(); // (2) Remove currently active intervals that are dead at this position. // Move active intervals that have a lifetime hole at this position // to inactive. - for (size_t i = 0; i < active_.Size(); ++i) { - LiveInterval* interval = active_.Get(i); + // Note: Copy elements we keep to the beginning, just like + // v.erase(std::remove(v.begin(), v.end(), value), v.end()); + auto active_kept_end = active_.begin(); + for (auto it = active_.begin(), end = active_.end(); it != end; ++it) { + LiveInterval* interval = *it; if (interval->IsDeadAt(position)) { - active_.Delete(interval); - --i; - handled_.Add(interval); + handled_.push_back(interval); } else if (!interval->Covers(position)) { - active_.Delete(interval); - --i; - inactive_.Add(interval); + inactive_.push_back(interval); + } else { + *active_kept_end++ = interval; // Keep this interval. } } + // We have copied what we want to keep to [active_.begin(), active_kept_end), + // the rest of the data in active_ is junk - drop it. + active_.erase(active_kept_end, active_.end()); // (3) Remove currently inactive intervals that are dead at this position. // Move inactive intervals that cover this position to active. - for (size_t i = 0; i < inactive_intervals_to_handle; ++i) { - LiveInterval* interval = inactive_.Get(i); + // Note: Copy elements we keep to the beginning, just like + // v.erase(std::remove(v.begin(), v.begin() + num, value), v.begin() + num); + auto inactive_kept_end = inactive_.begin(); + auto inactive_to_handle_end = inactive_.begin() + inactive_intervals_to_handle; + for (auto it = inactive_.begin(); it != inactive_to_handle_end; ++it) { + LiveInterval* interval = *it; DCHECK(interval->GetStart() < position || interval->IsFixed()); if (interval->IsDeadAt(position)) { - inactive_.Delete(interval); - --i; - --inactive_intervals_to_handle; - handled_.Add(interval); + handled_.push_back(interval); } else if (interval->Covers(position)) { - inactive_.Delete(interval); - --i; - --inactive_intervals_to_handle; - active_.Add(interval); + active_.push_back(interval); + } else { + *inactive_kept_end++ = interval; // Keep this interval. } } + // We have copied what we want to keep to [inactive_.begin(), inactive_kept_end), + // the rest of the data in the processed interval is junk - drop it. + inactive_.erase(inactive_kept_end, inactive_to_handle_end); if (current->IsSlowPathSafepoint()) { // Synthesized interval to record the maximum number of live registers // at safepoints. No need to allocate a register for it. if (processing_core_registers_) { maximum_number_of_live_core_registers_ = - std::max(maximum_number_of_live_core_registers_, active_.Size()); + std::max(maximum_number_of_live_core_registers_, active_.size()); } else { maximum_number_of_live_fp_registers_ = - std::max(maximum_number_of_live_fp_registers_, active_.Size()); + std::max(maximum_number_of_live_fp_registers_, active_.size()); } - DCHECK(unhandled_->IsEmpty() || unhandled_->Peek()->GetStart() > current->GetStart()); + DCHECK(unhandled_->empty() || unhandled_->back()->GetStart() > current->GetStart()); continue; } @@ -683,7 +690,7 @@ void RegisterAllocator::LinearScan() { codegen_->AddAllocatedRegister(processing_core_registers_ ? Location::RegisterLocation(current->GetRegister()) : Location::FpuRegisterLocation(current->GetRegister())); - active_.Add(current); + active_.push_back(current); if (current->HasHighInterval() && !current->GetHighInterval()->HasRegister()) { current->GetHighInterval()->SetRegister(GetHighForLowRegister(current->GetRegister())); } @@ -726,8 +733,7 @@ bool RegisterAllocator::TryAllocateFreeReg(LiveInterval* current) { } // For each active interval, set its register to not free. - for (size_t i = 0, e = active_.Size(); i < e; ++i) { - LiveInterval* interval = active_.Get(i); + for (LiveInterval* interval : active_) { DCHECK(interval->HasRegister()); free_until[interval->GetRegister()] = 0; } @@ -762,8 +768,7 @@ bool RegisterAllocator::TryAllocateFreeReg(LiveInterval* current) { // For each inactive interval, set its register to be free until // the next intersection with `current`. - for (size_t i = 0, e = inactive_.Size(); i < e; ++i) { - LiveInterval* inactive = inactive_.Get(i); + for (LiveInterval* inactive : inactive_) { // Temp/Slow-path-safepoint interval has no holes. DCHECK(!inactive->IsTemp() && !inactive->IsSlowPathSafepoint()); if (!current->IsSplit() && !inactive->IsFixed()) { @@ -923,11 +928,29 @@ int RegisterAllocator::FindAvailableRegister(size_t* next_use, LiveInterval* cur return reg; } +// Remove interval and its other half if any. Return iterator to the following element. +static ArenaVector<LiveInterval*>::iterator RemoveIntervalAndPotentialOtherHalf( + ArenaVector<LiveInterval*>* intervals, ArenaVector<LiveInterval*>::iterator pos) { + DCHECK(intervals->begin() <= pos && pos < intervals->end()); + LiveInterval* interval = *pos; + if (interval->IsLowInterval()) { + DCHECK(pos + 1 < intervals->end()); + DCHECK_EQ(*(pos + 1), interval->GetHighInterval()); + return intervals->erase(pos, pos + 2); + } else if (interval->IsHighInterval()) { + DCHECK(intervals->begin() < pos); + DCHECK_EQ(*(pos - 1), interval->GetLowInterval()); + return intervals->erase(pos - 1, pos + 1); + } else { + return intervals->erase(pos); + } +} + bool RegisterAllocator::TrySplitNonPairOrUnalignedPairIntervalAt(size_t position, size_t first_register_use, size_t* next_use) { - for (size_t i = 0, e = active_.Size(); i < e; ++i) { - LiveInterval* active = active_.Get(i); + for (auto it = active_.begin(), end = active_.end(); it != end; ++it) { + LiveInterval* active = *it; DCHECK(active->HasRegister()); if (active->IsFixed()) continue; if (active->IsHighInterval()) continue; @@ -941,11 +964,10 @@ bool RegisterAllocator::TrySplitNonPairOrUnalignedPairIntervalAt(size_t position IsLowOfUnalignedPairInterval(active) || !IsLowRegister(active->GetRegister())) { LiveInterval* split = Split(active, position); - active_.DeleteAt(i); if (split != active) { - handled_.Add(active); + handled_.push_back(active); } - PotentiallyRemoveOtherHalf(active, &active_, i); + RemoveIntervalAndPotentialOtherHalf(&active_, it); AddSorted(unhandled_, split); return true; } @@ -953,23 +975,6 @@ bool RegisterAllocator::TrySplitNonPairOrUnalignedPairIntervalAt(size_t position return false; } -bool RegisterAllocator::PotentiallyRemoveOtherHalf(LiveInterval* interval, - GrowableArray<LiveInterval*>* intervals, - size_t index) { - if (interval->IsLowInterval()) { - DCHECK_EQ(intervals->Get(index), interval->GetHighInterval()); - intervals->DeleteAt(index); - return true; - } else if (interval->IsHighInterval()) { - DCHECK_GT(index, 0u); - DCHECK_EQ(intervals->Get(index - 1), interval->GetLowInterval()); - intervals->DeleteAt(index - 1); - return true; - } else { - return false; - } -} - // Find the register that is used the last, and spill the interval // that holds it. If the first use of `current` is after that register // we spill `current` instead. @@ -1001,8 +1006,7 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { // For each active interval, find the next use of its register after the // start of current. - for (size_t i = 0, e = active_.Size(); i < e; ++i) { - LiveInterval* active = active_.Get(i); + for (LiveInterval* active : active_) { DCHECK(active->HasRegister()); if (active->IsFixed()) { next_use[active->GetRegister()] = current->GetStart(); @@ -1016,8 +1020,7 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { // For each inactive interval, find the next use of its register after the // start of current. - for (size_t i = 0, e = inactive_.Size(); i < e; ++i) { - LiveInterval* inactive = inactive_.Get(i); + for (LiveInterval* inactive : inactive_) { // Temp/Slow-path-safepoint interval has no holes. DCHECK(!inactive->IsTemp() && !inactive->IsSlowPathSafepoint()); if (!current->IsSplit() && !inactive->IsFixed()) { @@ -1087,10 +1090,10 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { first_register_use, next_use); DCHECK(success); - LiveInterval* existing = unhandled_->Peek(); + LiveInterval* existing = unhandled_->back(); DCHECK(existing->IsHighInterval()); DCHECK_EQ(existing->GetLowInterval(), current); - unhandled_->Add(current); + unhandled_->push_back(current); } else { // If the first use of that instruction is after the last use of the found // register, we split this interval just before its first register use. @@ -1105,23 +1108,24 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { // have that register. current->SetRegister(reg); - for (size_t i = 0, e = active_.Size(); i < e; ++i) { - LiveInterval* active = active_.Get(i); + for (auto it = active_.begin(), end = active_.end(); it != end; ++it) { + LiveInterval* active = *it; if (active->GetRegister() == reg) { DCHECK(!active->IsFixed()); LiveInterval* split = Split(active, current->GetStart()); if (split != active) { - handled_.Add(active); + handled_.push_back(active); } - active_.DeleteAt(i); - PotentiallyRemoveOtherHalf(active, &active_, i); + RemoveIntervalAndPotentialOtherHalf(&active_, it); AddSorted(unhandled_, split); break; } } - for (size_t i = 0; i < inactive_.Size(); ++i) { - LiveInterval* inactive = inactive_.Get(i); + // NOTE: Retrieve end() on each iteration because we're removing elements in the loop body. + for (auto it = inactive_.begin(); it != inactive_.end(); ) { + LiveInterval* inactive = *it; + bool erased = false; if (inactive->GetRegister() == reg) { if (!current->IsSplit() && !inactive->IsFixed()) { // Neither current nor inactive are fixed. @@ -1129,43 +1133,43 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { // inactive interval should never intersect with that inactive interval. // Only if it's not fixed though, because fixed intervals don't come from SSA. DCHECK_EQ(inactive->FirstIntersectionWith(current), kNoLifetime); - continue; - } - size_t next_intersection = inactive->FirstIntersectionWith(current); - if (next_intersection != kNoLifetime) { - if (inactive->IsFixed()) { - LiveInterval* split = Split(current, next_intersection); - DCHECK_NE(split, current); - AddSorted(unhandled_, split); - } else { - // Split at the start of `current`, which will lead to splitting - // at the end of the lifetime hole of `inactive`. - LiveInterval* split = Split(inactive, current->GetStart()); - // If it's inactive, it must start before the current interval. - DCHECK_NE(split, inactive); - inactive_.DeleteAt(i); - if (PotentiallyRemoveOtherHalf(inactive, &inactive_, i) && inactive->IsHighInterval()) { - // We have removed an entry prior to `inactive`. So we need to decrement. - --i; + } else { + size_t next_intersection = inactive->FirstIntersectionWith(current); + if (next_intersection != kNoLifetime) { + if (inactive->IsFixed()) { + LiveInterval* split = Split(current, next_intersection); + DCHECK_NE(split, current); + AddSorted(unhandled_, split); + } else { + // Split at the start of `current`, which will lead to splitting + // at the end of the lifetime hole of `inactive`. + LiveInterval* split = Split(inactive, current->GetStart()); + // If it's inactive, it must start before the current interval. + DCHECK_NE(split, inactive); + it = RemoveIntervalAndPotentialOtherHalf(&inactive_, it); + erased = true; + handled_.push_back(inactive); + AddSorted(unhandled_, split); } - // Decrement because we have removed `inactive` from the list. - --i; - handled_.Add(inactive); - AddSorted(unhandled_, split); } } } + // If we have erased the element, `it` already points to the next element. + // Otherwise we need to move to the next element. + if (!erased) { + ++it; + } } return true; } } -void RegisterAllocator::AddSorted(GrowableArray<LiveInterval*>* array, LiveInterval* interval) { +void RegisterAllocator::AddSorted(ArenaVector<LiveInterval*>* array, LiveInterval* interval) { DCHECK(!interval->IsFixed() && !interval->HasSpillSlot()); size_t insert_at = 0; - for (size_t i = array->Size(); i > 0; --i) { - LiveInterval* current = array->Get(i - 1); + for (size_t i = array->size(); i > 0; --i) { + LiveInterval* current = (*array)[i - 1u]; // High intervals must be processed right after their low equivalent. if (current->StartsAfter(interval) && !current->IsHighInterval()) { insert_at = i; @@ -1173,18 +1177,20 @@ void RegisterAllocator::AddSorted(GrowableArray<LiveInterval*>* array, LiveInter } else if ((current->GetStart() == interval->GetStart()) && current->IsSlowPathSafepoint()) { // Ensure the slow path interval is the last to be processed at its location: we want the // interval to know all live registers at this location. - DCHECK(i == 1 || array->Get(i - 2)->StartsAfter(current)); + DCHECK(i == 1 || (*array)[i - 2u]->StartsAfter(current)); insert_at = i; break; } } - array->InsertAt(insert_at, interval); // Insert the high interval before the low, to ensure the low is processed before. + auto insert_pos = array->begin() + insert_at; if (interval->HasHighInterval()) { - array->InsertAt(insert_at, interval->GetHighInterval()); + array->insert(insert_pos, { interval->GetHighInterval(), interval }); } else if (interval->HasLowInterval()) { - array->InsertAt(insert_at + 1, interval->GetLowInterval()); + array->insert(insert_pos, { interval, interval->GetLowInterval() }); + } else { + array->insert(insert_pos, interval); } } @@ -1309,7 +1315,7 @@ void RegisterAllocator::AllocateSpillSlotFor(LiveInterval* interval) { return; } - GrowableArray<size_t>* spill_slots = nullptr; + ArenaVector<size_t>* spill_slots = nullptr; switch (interval->GetType()) { case Primitive::kPrimDouble: spill_slots = &double_spill_slots_; @@ -1334,32 +1340,27 @@ void RegisterAllocator::AllocateSpillSlotFor(LiveInterval* interval) { // Find an available spill slot. size_t slot = 0; - for (size_t e = spill_slots->Size(); slot < e; ++slot) { - if (spill_slots->Get(slot) <= parent->GetStart() - && (slot == (e - 1) || spill_slots->Get(slot + 1) <= parent->GetStart())) { + for (size_t e = spill_slots->size(); slot < e; ++slot) { + if ((*spill_slots)[slot] <= parent->GetStart() + && (slot == (e - 1) || (*spill_slots)[slot + 1] <= parent->GetStart())) { break; } } size_t end = interval->GetLastSibling()->GetEnd(); if (parent->NeedsTwoSpillSlots()) { - if (slot == spill_slots->Size()) { + if (slot + 2u > spill_slots->size()) { // We need a new spill slot. - spill_slots->Add(end); - spill_slots->Add(end); - } else if (slot == spill_slots->Size() - 1) { - spill_slots->Put(slot, end); - spill_slots->Add(end); - } else { - spill_slots->Put(slot, end); - spill_slots->Put(slot + 1, end); + spill_slots->resize(slot + 2u, end); } + (*spill_slots)[slot] = end; + (*spill_slots)[slot + 1] = end; } else { - if (slot == spill_slots->Size()) { + if (slot == spill_slots->size()) { // We need a new spill slot. - spill_slots->Add(end); + spill_slots->push_back(end); } else { - spill_slots->Put(slot, end); + (*spill_slots)[slot] = end; } } @@ -1528,10 +1529,10 @@ void RegisterAllocator::InsertParallelMoveAtExitOf(HBasicBlock* block, DCHECK_EQ(block->NumberOfNormalSuccessors(), 1u); HInstruction* last = block->GetLastInstruction(); // We insert moves at exit for phi predecessors and connecting blocks. - // A block ending with an if cannot branch to a block with phis because - // we do not allow critical edges. It can also not connect + // A block ending with an if or a packed switch cannot branch to a block + // with phis because we do not allow critical edges. It can also not connect // a split interval between two blocks: the move has to happen in the successor. - DCHECK(!last->IsIf()); + DCHECK(!last->IsIf() && !last->IsPackedSwitch()); HInstruction* previous = last->GetPrevious(); HParallelMove* move; // This is a parallel move for connecting blocks. We need to differentiate @@ -1817,13 +1818,13 @@ void RegisterAllocator::Resolve() { size_t slot = current->GetSpillSlot(); switch (current->GetType()) { case Primitive::kPrimDouble: - slot += long_spill_slots_.Size(); + slot += long_spill_slots_.size(); FALLTHROUGH_INTENDED; case Primitive::kPrimLong: - slot += float_spill_slots_.Size(); + slot += float_spill_slots_.size(); FALLTHROUGH_INTENDED; case Primitive::kPrimFloat: - slot += int_spill_slots_.Size(); + slot += int_spill_slots_.size(); FALLTHROUGH_INTENDED; case Primitive::kPrimNot: case Primitive::kPrimInt: @@ -1906,8 +1907,7 @@ void RegisterAllocator::Resolve() { } // Assign temp locations. - for (size_t i = 0; i < temp_intervals_.Size(); ++i) { - LiveInterval* temp = temp_intervals_.Get(i); + for (LiveInterval* temp : temp_intervals_) { if (temp->IsHighInterval()) { // High intervals can be skipped, they are already handled by the low interval. continue; diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h index e0304643e6..58600b789b 100644 --- a/compiler/optimizing/register_allocator.h +++ b/compiler/optimizing/register_allocator.h @@ -18,9 +18,9 @@ #define ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_H_ #include "arch/instruction_set.h" +#include "base/arena_containers.h" #include "base/macros.h" #include "primitive.h" -#include "utils/growable_array.h" namespace art { @@ -59,7 +59,7 @@ class RegisterAllocator { } // Helper method for validation. Used by unit testing. - static bool ValidateIntervals(const GrowableArray<LiveInterval*>& intervals, + static bool ValidateIntervals(const ArenaVector<LiveInterval*>& intervals, size_t number_of_spill_slots, size_t number_of_out_slots, const CodeGenerator& codegen, @@ -70,10 +70,10 @@ class RegisterAllocator { static bool CanAllocateRegistersFor(const HGraph& graph, InstructionSet instruction_set); size_t GetNumberOfSpillSlots() const { - return int_spill_slots_.Size() - + long_spill_slots_.Size() - + float_spill_slots_.Size() - + double_spill_slots_.Size() + return int_spill_slots_.size() + + long_spill_slots_.size() + + float_spill_slots_.size() + + double_spill_slots_.size() + catch_phi_spill_slots_; } @@ -87,7 +87,7 @@ class RegisterAllocator { void Resolve(); // Add `interval` in the given sorted list. - static void AddSorted(GrowableArray<LiveInterval*>* array, LiveInterval* interval); + static void AddSorted(ArenaVector<LiveInterval*>* array, LiveInterval* interval); // Split `interval` at the position `position`. The new interval starts at `position`. LiveInterval* Split(LiveInterval* interval, size_t position); @@ -159,13 +159,6 @@ class RegisterAllocator { size_t first_register_use, size_t* next_use); - // If `interval` has another half, remove it from the list of `intervals`. - // `index` holds the index at which `interval` is in `intervals`. - // Returns whether there is another half. - bool PotentiallyRemoveOtherHalf(LiveInterval* interval, - GrowableArray<LiveInterval*>* intervals, - size_t index); - ArenaAllocator* const allocator_; CodeGenerator* const codegen_; const SsaLivenessAnalysis& liveness_; @@ -173,43 +166,43 @@ class RegisterAllocator { // List of intervals for core registers that must be processed, ordered by start // position. Last entry is the interval that has the lowest start position. // This list is initially populated before doing the linear scan. - GrowableArray<LiveInterval*> unhandled_core_intervals_; + ArenaVector<LiveInterval*> unhandled_core_intervals_; // List of intervals for floating-point registers. Same comments as above. - GrowableArray<LiveInterval*> unhandled_fp_intervals_; + ArenaVector<LiveInterval*> unhandled_fp_intervals_; // Currently processed list of unhandled intervals. Either `unhandled_core_intervals_` // or `unhandled_fp_intervals_`. - GrowableArray<LiveInterval*>* unhandled_; + ArenaVector<LiveInterval*>* unhandled_; // List of intervals that have been processed. - GrowableArray<LiveInterval*> handled_; + ArenaVector<LiveInterval*> handled_; // List of intervals that are currently active when processing a new live interval. // That is, they have a live range that spans the start of the new interval. - GrowableArray<LiveInterval*> active_; + ArenaVector<LiveInterval*> active_; // List of intervals that are currently inactive when processing a new live interval. // That is, they have a lifetime hole that spans the start of the new interval. - GrowableArray<LiveInterval*> inactive_; + ArenaVector<LiveInterval*> inactive_; // Fixed intervals for physical registers. Such intervals cover the positions // where an instruction requires a specific register. - GrowableArray<LiveInterval*> physical_core_register_intervals_; - GrowableArray<LiveInterval*> physical_fp_register_intervals_; + ArenaVector<LiveInterval*> physical_core_register_intervals_; + ArenaVector<LiveInterval*> physical_fp_register_intervals_; // Intervals for temporaries. Such intervals cover the positions // where an instruction requires a temporary. - GrowableArray<LiveInterval*> temp_intervals_; + ArenaVector<LiveInterval*> temp_intervals_; // The spill slots allocated for live intervals. We ensure spill slots // are typed to avoid (1) doing moves and swaps between two different kinds // of registers, and (2) swapping between a single stack slot and a double // stack slot. This simplifies the parallel move resolver. - GrowableArray<size_t> int_spill_slots_; - GrowableArray<size_t> long_spill_slots_; - GrowableArray<size_t> float_spill_slots_; - GrowableArray<size_t> double_spill_slots_; + ArenaVector<size_t> int_spill_slots_; + ArenaVector<size_t> long_spill_slots_; + ArenaVector<size_t> float_spill_slots_; + ArenaVector<size_t> double_spill_slots_; // Spill slots allocated to catch phis. This category is special-cased because // (1) slots are allocated prior to linear scan and in reverse linear order, @@ -217,7 +210,7 @@ class RegisterAllocator { size_t catch_phi_spill_slots_; // Instructions that need a safepoint. - GrowableArray<HInstruction*> safepoints_; + ArenaVector<HInstruction*> safepoints_; // True if processing core registers. False if processing floating // point registers. diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc index b72df868d3..2bb5a8bb08 100644 --- a/compiler/optimizing/register_allocator_test.cc +++ b/compiler/optimizing/register_allocator_test.cc @@ -64,83 +64,83 @@ TEST(RegisterAllocatorTest, ValidateIntervals) { std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - GrowableArray<LiveInterval*> intervals(&allocator, 0); + ArenaVector<LiveInterval*> intervals(allocator.Adapter()); // Test with two intervals of the same range. { static constexpr size_t ranges[][2] = {{0, 42}}; - intervals.Add(BuildInterval(ranges, arraysize(ranges), &allocator, 0)); - intervals.Add(BuildInterval(ranges, arraysize(ranges), &allocator, 1)); + intervals.push_back(BuildInterval(ranges, arraysize(ranges), &allocator, 0)); + intervals.push_back(BuildInterval(ranges, arraysize(ranges), &allocator, 1)); ASSERT_TRUE(RegisterAllocator::ValidateIntervals( intervals, 0, 0, codegen, &allocator, true, false)); - intervals.Get(1)->SetRegister(0); + intervals[1]->SetRegister(0); ASSERT_FALSE(RegisterAllocator::ValidateIntervals( intervals, 0, 0, codegen, &allocator, true, false)); - intervals.Reset(); + intervals.clear(); } // Test with two non-intersecting intervals. { static constexpr size_t ranges1[][2] = {{0, 42}}; - intervals.Add(BuildInterval(ranges1, arraysize(ranges1), &allocator, 0)); + intervals.push_back(BuildInterval(ranges1, arraysize(ranges1), &allocator, 0)); static constexpr size_t ranges2[][2] = {{42, 43}}; - intervals.Add(BuildInterval(ranges2, arraysize(ranges2), &allocator, 1)); + intervals.push_back(BuildInterval(ranges2, arraysize(ranges2), &allocator, 1)); ASSERT_TRUE(RegisterAllocator::ValidateIntervals( intervals, 0, 0, codegen, &allocator, true, false)); - intervals.Get(1)->SetRegister(0); + intervals[1]->SetRegister(0); ASSERT_TRUE(RegisterAllocator::ValidateIntervals( intervals, 0, 0, codegen, &allocator, true, false)); - intervals.Reset(); + intervals.clear(); } // Test with two non-intersecting intervals, with one with a lifetime hole. { static constexpr size_t ranges1[][2] = {{0, 42}, {45, 48}}; - intervals.Add(BuildInterval(ranges1, arraysize(ranges1), &allocator, 0)); + intervals.push_back(BuildInterval(ranges1, arraysize(ranges1), &allocator, 0)); static constexpr size_t ranges2[][2] = {{42, 43}}; - intervals.Add(BuildInterval(ranges2, arraysize(ranges2), &allocator, 1)); + intervals.push_back(BuildInterval(ranges2, arraysize(ranges2), &allocator, 1)); ASSERT_TRUE(RegisterAllocator::ValidateIntervals( intervals, 0, 0, codegen, &allocator, true, false)); - intervals.Get(1)->SetRegister(0); + intervals[1]->SetRegister(0); ASSERT_TRUE(RegisterAllocator::ValidateIntervals( intervals, 0, 0, codegen, &allocator, true, false)); - intervals.Reset(); + intervals.clear(); } // Test with intersecting intervals. { static constexpr size_t ranges1[][2] = {{0, 42}, {44, 48}}; - intervals.Add(BuildInterval(ranges1, arraysize(ranges1), &allocator, 0)); + intervals.push_back(BuildInterval(ranges1, arraysize(ranges1), &allocator, 0)); static constexpr size_t ranges2[][2] = {{42, 47}}; - intervals.Add(BuildInterval(ranges2, arraysize(ranges2), &allocator, 1)); + intervals.push_back(BuildInterval(ranges2, arraysize(ranges2), &allocator, 1)); ASSERT_TRUE(RegisterAllocator::ValidateIntervals( intervals, 0, 0, codegen, &allocator, true, false)); - intervals.Get(1)->SetRegister(0); + intervals[1]->SetRegister(0); ASSERT_FALSE(RegisterAllocator::ValidateIntervals( intervals, 0, 0, codegen, &allocator, true, false)); - intervals.Reset(); + intervals.clear(); } // Test with siblings. { static constexpr size_t ranges1[][2] = {{0, 42}, {44, 48}}; - intervals.Add(BuildInterval(ranges1, arraysize(ranges1), &allocator, 0)); - intervals.Get(0)->SplitAt(43); + intervals.push_back(BuildInterval(ranges1, arraysize(ranges1), &allocator, 0)); + intervals[0]->SplitAt(43); static constexpr size_t ranges2[][2] = {{42, 47}}; - intervals.Add(BuildInterval(ranges2, arraysize(ranges2), &allocator, 1)); + intervals.push_back(BuildInterval(ranges2, arraysize(ranges2), &allocator, 1)); ASSERT_TRUE(RegisterAllocator::ValidateIntervals( intervals, 0, 0, codegen, &allocator, true, false)); - intervals.Get(1)->SetRegister(0); + intervals[1]->SetRegister(0); // Sibling of the first interval has no register allocated to it. ASSERT_TRUE(RegisterAllocator::ValidateIntervals( intervals, 0, 0, codegen, &allocator, true, false)); - intervals.Get(0)->GetNextSibling()->SetRegister(0); + intervals[0]->GetNextSibling()->SetRegister(0); ASSERT_FALSE(RegisterAllocator::ValidateIntervals( intervals, 0, 0, codegen, &allocator, true, false)); } @@ -429,7 +429,7 @@ TEST(RegisterAllocatorTest, FreeUntil) { // Populate the instructions in the liveness object, to please the register allocator. for (size_t i = 0; i < 60; ++i) { - liveness.instructions_from_lifetime_position_.Add( + liveness.instructions_from_lifetime_position_.push_back( graph->GetEntryBlock()->GetFirstInstruction()); } @@ -442,15 +442,15 @@ TEST(RegisterAllocatorTest, FreeUntil) { // we do not depend on an order. LiveInterval* interval = LiveInterval::MakeFixedInterval(&allocator, 0, Primitive::kPrimInt); interval->AddRange(40, 50); - register_allocator.inactive_.Add(interval); + register_allocator.inactive_.push_back(interval); interval = LiveInterval::MakeFixedInterval(&allocator, 0, Primitive::kPrimInt); interval->AddRange(20, 30); - register_allocator.inactive_.Add(interval); + register_allocator.inactive_.push_back(interval); interval = LiveInterval::MakeFixedInterval(&allocator, 0, Primitive::kPrimInt); interval->AddRange(60, 70); - register_allocator.inactive_.Add(interval); + register_allocator.inactive_.push_back(interval); register_allocator.number_of_registers_ = 1; register_allocator.registers_array_ = allocator.AllocArray<size_t>(1); @@ -460,10 +460,10 @@ TEST(RegisterAllocatorTest, FreeUntil) { ASSERT_TRUE(register_allocator.TryAllocateFreeReg(unhandled)); // Check that we have split the interval. - ASSERT_EQ(1u, register_allocator.unhandled_->Size()); + ASSERT_EQ(1u, register_allocator.unhandled_->size()); // Check that we know need to find a new register where the next interval // that uses the register starts. - ASSERT_EQ(20u, register_allocator.unhandled_->Get(0)->GetStart()); + ASSERT_EQ(20u, register_allocator.unhandled_->front()->GetStart()); } static HGraph* BuildIfElseWithPhi(ArenaAllocator* allocator, @@ -678,7 +678,7 @@ TEST(RegisterAllocatorTest, ExpectedInRegisterHint) { // Check that the field gets put in the register expected by its use. // Don't use SetInAt because we are overriding an already allocated location. - ret->GetLocations()->inputs_.Put(0, Location::RegisterLocation(2)); + ret->GetLocations()->inputs_[0] = Location::RegisterLocation(2); RegisterAllocator register_allocator(&allocator, &codegen, liveness); register_allocator.AllocateRegisters(); @@ -885,14 +885,14 @@ TEST(RegisterAllocatorTest, SpillInactive) { SsaLivenessAnalysis liveness(graph, &codegen); // Populate the instructions in the liveness object, to please the register allocator. for (size_t i = 0; i < 32; ++i) { - liveness.instructions_from_lifetime_position_.Add(user); + liveness.instructions_from_lifetime_position_.push_back(user); } RegisterAllocator register_allocator(&allocator, &codegen, liveness); - register_allocator.unhandled_core_intervals_.Add(fourth); - register_allocator.unhandled_core_intervals_.Add(third); - register_allocator.unhandled_core_intervals_.Add(second); - register_allocator.unhandled_core_intervals_.Add(first); + register_allocator.unhandled_core_intervals_.push_back(fourth); + register_allocator.unhandled_core_intervals_.push_back(third); + register_allocator.unhandled_core_intervals_.push_back(second); + register_allocator.unhandled_core_intervals_.push_back(first); // Set just one register available to make all intervals compete for the same. register_allocator.number_of_registers_ = 1; @@ -902,11 +902,11 @@ TEST(RegisterAllocatorTest, SpillInactive) { register_allocator.LinearScan(); // Test that there is no conflicts between intervals. - GrowableArray<LiveInterval*> intervals(&allocator, 0); - intervals.Add(first); - intervals.Add(second); - intervals.Add(third); - intervals.Add(fourth); + ArenaVector<LiveInterval*> intervals(allocator.Adapter()); + intervals.push_back(first); + intervals.push_back(second); + intervals.push_back(third); + intervals.push_back(fourth); ASSERT_TRUE(RegisterAllocator::ValidateIntervals( intervals, 0, 0, codegen, &allocator, true, false)); } diff --git a/compiler/optimizing/side_effects_analysis.cc b/compiler/optimizing/side_effects_analysis.cc index 1956781b79..338a3aaad0 100644 --- a/compiler/optimizing/side_effects_analysis.cc +++ b/compiler/optimizing/side_effects_analysis.cc @@ -21,8 +21,8 @@ namespace art { void SideEffectsAnalysis::Run() { // Inlining might have created more blocks, so we need to increase the size // if needed. - block_effects_.SetSize(graph_->GetBlocks().size()); - loop_effects_.SetSize(graph_->GetBlocks().size()); + block_effects_.resize(graph_->GetBlocks().size()); + loop_effects_.resize(graph_->GetBlocks().size()); // In DEBUG mode, ensure side effects are properly initialized to empty. if (kIsDebugBuild) { @@ -54,7 +54,7 @@ void SideEffectsAnalysis::Run() { } } - block_effects_.Put(block->GetBlockId(), effects); + block_effects_[block->GetBlockId()] = effects; if (block->IsLoopHeader()) { // The side effects of the loop header are part of the loop. @@ -76,16 +76,19 @@ void SideEffectsAnalysis::Run() { SideEffects SideEffectsAnalysis::GetLoopEffects(HBasicBlock* block) const { DCHECK(block->IsLoopHeader()); - return loop_effects_.Get(block->GetBlockId()); + DCHECK_LT(block->GetBlockId(), loop_effects_.size()); + return loop_effects_[block->GetBlockId()]; } SideEffects SideEffectsAnalysis::GetBlockEffects(HBasicBlock* block) const { - return block_effects_.Get(block->GetBlockId()); + DCHECK_LT(block->GetBlockId(), block_effects_.size()); + return block_effects_[block->GetBlockId()]; } void SideEffectsAnalysis::UpdateLoopEffects(HLoopInformation* info, SideEffects effects) { - int id = info->GetHeader()->GetBlockId(); - loop_effects_.Put(id, loop_effects_.Get(id).Union(effects)); + uint32_t id = info->GetHeader()->GetBlockId(); + DCHECK_LT(id, loop_effects_.size()); + loop_effects_[id] = loop_effects_[id].Union(effects); } } // namespace art diff --git a/compiler/optimizing/side_effects_analysis.h b/compiler/optimizing/side_effects_analysis.h index 9888140fb6..bac6088bf7 100644 --- a/compiler/optimizing/side_effects_analysis.h +++ b/compiler/optimizing/side_effects_analysis.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_SIDE_EFFECTS_ANALYSIS_H_ #define ART_COMPILER_OPTIMIZING_SIDE_EFFECTS_ANALYSIS_H_ +#include "base/arena_containers.h" #include "nodes.h" #include "optimization.h" @@ -27,8 +28,10 @@ class SideEffectsAnalysis : public HOptimization { explicit SideEffectsAnalysis(HGraph* graph) : HOptimization(graph, kSideEffectsAnalysisPassName), graph_(graph), - block_effects_(graph->GetArena(), graph->GetBlocks().size(), SideEffects::None()), - loop_effects_(graph->GetArena(), graph->GetBlocks().size(), SideEffects::None()) {} + block_effects_(graph->GetBlocks().size(), + graph->GetArena()->Adapter(kArenaAllocSideEffectsAnalysis)), + loop_effects_(graph->GetBlocks().size(), + graph->GetArena()->Adapter(kArenaAllocSideEffectsAnalysis)) {} SideEffects GetLoopEffects(HBasicBlock* block) const; SideEffects GetBlockEffects(HBasicBlock* block) const; @@ -51,11 +54,11 @@ class SideEffectsAnalysis : public HOptimization { // Side effects of individual blocks, that is the union of the side effects // of the instructions in the block. - GrowableArray<SideEffects> block_effects_; + ArenaVector<SideEffects> block_effects_; // Side effects of loops, that is the union of the side effects of the // blocks contained in that loop. - GrowableArray<SideEffects> loop_effects_; + ArenaVector<SideEffects> loop_effects_; ART_FRIEND_TEST(GVNTest, LoopSideEffects); DISALLOW_COPY_AND_ASSIGN(SideEffectsAnalysis); diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index 6f71ea3d6b..40c75af6ef 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -56,9 +56,32 @@ class DeadPhiHandling : public ValueObject { DISALLOW_COPY_AND_ASSIGN(DeadPhiHandling); }; +static bool HasConflictingEquivalent(HPhi* phi) { + if (phi->GetNext() == nullptr) { + return false; + } + HPhi* next = phi->GetNext()->AsPhi(); + if (next->GetRegNumber() == phi->GetRegNumber()) { + if (next->GetType() == Primitive::kPrimVoid) { + // We only get a void type for an equivalent phi we processed and found out + // it was conflicting. + return true; + } else { + // Go to the next phi, in case it is also an equivalent. + return HasConflictingEquivalent(next); + } + } + return false; +} + bool DeadPhiHandling::UpdateType(HPhi* phi) { + if (phi->IsDead()) { + // Phi was rendered dead while waiting in the worklist because it was replaced + // with an equivalent. + return false; + } + Primitive::Type existing = phi->GetType(); - DCHECK(phi->IsLive()); bool conflict = false; Primitive::Type new_type = existing; @@ -82,21 +105,26 @@ bool DeadPhiHandling::UpdateType(HPhi* phi) { if (new_type == Primitive::kPrimVoid) { new_type = input_type; } else if (new_type == Primitive::kPrimNot && input_type == Primitive::kPrimInt) { + if (input->IsPhi() && HasConflictingEquivalent(input->AsPhi())) { + // If we already asked for an equivalent of the input phi, but that equivalent + // ended up conflicting, make this phi conflicting too. + conflict = true; + break; + } HInstruction* equivalent = SsaBuilder::GetReferenceTypeEquivalent(input); if (equivalent == nullptr) { conflict = true; break; - } else { - phi->ReplaceInput(equivalent, i); - if (equivalent->IsPhi()) { - DCHECK_EQ(equivalent->GetType(), Primitive::kPrimNot); - // We created a new phi, but that phi has the same inputs as the old phi. We - // add it to the worklist to ensure its inputs can also be converted to reference. - // If not, it will remain dead, and the algorithm will make the current phi dead - // as well. - equivalent->AsPhi()->SetLive(); - AddToWorklist(equivalent->AsPhi()); - } + } + phi->ReplaceInput(equivalent, i); + if (equivalent->IsPhi()) { + DCHECK_EQ(equivalent->GetType(), Primitive::kPrimNot); + // We created a new phi, but that phi has the same inputs as the old phi. We + // add it to the worklist to ensure its inputs can also be converted to reference. + // If not, it will remain dead, and the algorithm will make the current phi dead + // as well. + equivalent->AsPhi()->SetLive(); + AddToWorklist(equivalent->AsPhi()); } } else if (new_type == Primitive::kPrimInt && input_type == Primitive::kPrimNot) { new_type = Primitive::kPrimNot; @@ -112,11 +140,26 @@ bool DeadPhiHandling::UpdateType(HPhi* phi) { phi->SetType(Primitive::kPrimVoid); phi->SetDead(); return true; - } else { - DCHECK(phi->IsLive()); - phi->SetType(new_type); - return existing != new_type; + } else if (existing == new_type) { + return false; } + + DCHECK(phi->IsLive()); + phi->SetType(new_type); + + // There might exist a `new_type` equivalent of `phi` already. In that case, + // we replace the equivalent with the, now live, `phi`. + HPhi* equivalent = phi->GetNextEquivalentPhiWithSameType(); + if (equivalent != nullptr) { + // There cannot be more than two equivalents with the same type. + DCHECK(equivalent->GetNextEquivalentPhiWithSameType() == nullptr); + // If doing fix-point iteration, the equivalent might be in `worklist_`. + // Setting it dead will make UpdateType skip it. + equivalent->SetDead(); + equivalent->ReplaceWith(phi); + } + + return true; } void DeadPhiHandling::VisitBasicBlock(HBasicBlock* block) { @@ -125,8 +168,14 @@ void DeadPhiHandling::VisitBasicBlock(HBasicBlock* block) { if (phi->IsDead() && phi->HasEnvironmentUses()) { phi->SetLive(); if (block->IsLoopHeader()) { - // Give a type to the loop phi, to guarantee convergence of the algorithm. - phi->SetType(phi->InputAt(0)->GetType()); + // Give a type to the loop phi to guarantee convergence of the algorithm. + // Note that the dead phi may already have a type if it is an equivalent + // generated for a typed LoadLocal. In that case we do not change the + // type because it could lead to an unsupported PrimNot/Float/Double -> + // PrimInt/Long transition and create same type equivalents. + if (phi->GetType() == Primitive::kPrimVoid) { + phi->SetType(phi->InputAt(0)->GetType()); + } AddToWorklist(phi); } else { // Because we are doing a reverse post order visit, all inputs of @@ -171,12 +220,6 @@ void DeadPhiHandling::Run() { ProcessWorklist(); } -static bool IsPhiEquivalentOf(HInstruction* instruction, HPhi* phi) { - return instruction != nullptr - && instruction->IsPhi() - && instruction->AsPhi()->GetRegNumber() == phi->GetRegNumber(); -} - void SsaBuilder::FixNullConstantType() { // The order doesn't matter here. for (HReversePostOrderIterator itb(*GetGraph()); !itb.Done(); itb.Advance()) { @@ -304,13 +347,13 @@ void SsaBuilder::BuildSsa() { // If the phi is not dead, or has no environment uses, there is nothing to do. if (!phi->IsDead() || !phi->HasEnvironmentUses()) continue; HInstruction* next = phi->GetNext(); - if (!IsPhiEquivalentOf(next, phi)) continue; + if (!phi->IsVRegEquivalentOf(next)) continue; if (next->AsPhi()->IsDead()) { // If the phi equivalent is dead, check if there is another one. next = next->GetNext(); - if (!IsPhiEquivalentOf(next, phi)) continue; + if (!phi->IsVRegEquivalentOf(next)) continue; // There can be at most two phi equivalents. - DCHECK(!IsPhiEquivalentOf(next->GetNext(), phi)); + DCHECK(!phi->IsVRegEquivalentOf(next->GetNext())); if (next->AsPhi()->IsDead()) continue; } // We found a live phi equivalent. Update the environment uses of `phi` with it. @@ -345,6 +388,33 @@ void SsaBuilder::BuildSsa() { } } +ArenaVector<HInstruction*>* SsaBuilder::GetLocalsFor(HBasicBlock* block) { + DCHECK_LT(block->GetBlockId(), locals_for_.size()); + ArenaVector<HInstruction*>* locals = &locals_for_[block->GetBlockId()]; + const size_t vregs = GetGraph()->GetNumberOfVRegs(); + if (locals->empty() && vregs != 0u) { + locals->resize(vregs, nullptr); + + if (block->IsCatchBlock()) { + ArenaAllocator* arena = GetGraph()->GetArena(); + // We record incoming inputs of catch phis at throwing instructions and + // must therefore eagerly create the phis. Phis for undefined vregs will + // be deleted when the first throwing instruction with the vreg undefined + // is encountered. Unused phis will be removed by dead phi analysis. + for (size_t i = 0; i < vregs; ++i) { + // No point in creating the catch phi if it is already undefined at + // the first throwing instruction. + if ((*current_locals_)[i] != nullptr) { + HPhi* phi = new (arena) HPhi(arena, i, 0, Primitive::kPrimVoid); + block->AddPhi(phi); + (*locals)[i] = phi; + } + } + } + } + return locals; +} + HInstruction* SsaBuilder::ValueOfLocal(HBasicBlock* block, size_t local) { ArenaVector<HInstruction*>* locals = GetLocalsFor(block); DCHECK_LT(local, locals->size()); @@ -356,6 +426,24 @@ void SsaBuilder::VisitBasicBlock(HBasicBlock* block) { if (block->IsCatchBlock()) { // Catch phis were already created and inputs collected from throwing sites. + if (kIsDebugBuild) { + // Make sure there was at least one throwing instruction which initialized + // locals (guaranteed by HGraphBuilder) and that all try blocks have been + // visited already (from HTryBoundary scoping and reverse post order). + bool throwing_instruction_found = false; + bool catch_block_visited = false; + for (HReversePostOrderIterator it(*GetGraph()); !it.Done(); it.Advance()) { + HBasicBlock* current = it.Current(); + if (current == block) { + catch_block_visited = true; + } else if (current->IsTryBlock() && + current->GetTryCatchInformation()->GetTryEntry().HasExceptionHandler(*block)) { + DCHECK(!catch_block_visited) << "Catch block visited before its try block."; + throwing_instruction_found |= current->HasThrowingInstructions(); + } + } + DCHECK(throwing_instruction_found) << "No instructions throwing into a live catch block."; + } } else if (block->IsLoopHeader()) { // If the block is a loop header, we know we only have visited the pre header // because we are visiting in reverse post order. We create phis for all initialized diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h index 804296f7ba..79f1a28ac8 100644 --- a/compiler/optimizing/ssa_builder.h +++ b/compiler/optimizing/ssa_builder.h @@ -61,28 +61,9 @@ class SsaBuilder : public HGraphVisitor { void BuildSsa(); - ArenaVector<HInstruction*>* GetLocalsFor(HBasicBlock* block) { - DCHECK_LT(block->GetBlockId(), locals_for_.size()); - ArenaVector<HInstruction*>* locals = &locals_for_[block->GetBlockId()]; - if (locals->empty() && GetGraph()->GetNumberOfVRegs() != 0u) { - const size_t vregs = GetGraph()->GetNumberOfVRegs(); - locals->resize(vregs, nullptr); - - if (block->IsCatchBlock()) { - // We record incoming inputs of catch phis at throwing instructions and - // must therefore eagerly create the phis. Unused phis will be removed - // in the dead phi analysis. - ArenaAllocator* arena = GetGraph()->GetArena(); - for (size_t i = 0; i < vregs; ++i) { - HPhi* phi = new (arena) HPhi(arena, i, 0, Primitive::kPrimVoid); - block->AddPhi(phi); - (*locals)[i] = phi; - } - } - } - return locals; - } - + // Returns locals vector for `block`. If it is a catch block, the vector will be + // prepopulated with catch phis for vregs which are defined in `current_locals_`. + ArenaVector<HInstruction*>* GetLocalsFor(HBasicBlock* block); HInstruction* ValueOfLocal(HBasicBlock* block, size_t local); void VisitBasicBlock(HBasicBlock* block); diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc index 1e9a813be9..b869d57be8 100644 --- a/compiler/optimizing/ssa_liveness_analysis.cc +++ b/compiler/optimizing/ssa_liveness_analysis.cc @@ -43,11 +43,11 @@ static bool IsInnerLoop(HLoopInformation* outer, HLoopInformation* inner) { && inner->IsIn(*outer); } -static void AddToListForLinearization(GrowableArray<HBasicBlock*>* worklist, HBasicBlock* block) { - size_t insert_at = worklist->Size(); +static void AddToListForLinearization(ArenaVector<HBasicBlock*>* worklist, HBasicBlock* block) { HLoopInformation* block_loop = block->GetLoopInformation(); - for (; insert_at > 0; --insert_at) { - HBasicBlock* current = worklist->Get(insert_at - 1); + auto insert_pos = worklist->rbegin(); // insert_pos.base() will be the actual position. + for (auto end = worklist->rend(); insert_pos != end; ++insert_pos) { + HBasicBlock* current = *insert_pos; HLoopInformation* current_loop = current->GetLoopInformation(); if (InSameLoop(block_loop, current_loop) || !IsLoop(current_loop) @@ -56,7 +56,7 @@ static void AddToListForLinearization(GrowableArray<HBasicBlock*>* worklist, HBa break; } } - worklist->InsertAt(insert_at, block); + worklist->insert(insert_pos.base(), block); } void SsaLivenessAnalysis::LinearizeGraph() { @@ -69,15 +69,15 @@ void SsaLivenessAnalysis::LinearizeGraph() { // current reverse post order in the graph, but it would require making // order queries to a GrowableArray, which is not the best data structure // for it. - GrowableArray<uint32_t> forward_predecessors(graph_->GetArena(), graph_->GetBlocks().size()); - forward_predecessors.SetSize(graph_->GetBlocks().size()); + ArenaVector<uint32_t> forward_predecessors(graph_->GetBlocks().size(), + graph_->GetArena()->Adapter(kArenaAllocSsaLiveness)); for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); size_t number_of_forward_predecessors = block->GetPredecessors().size(); if (block->IsLoopHeader()) { number_of_forward_predecessors -= block->GetLoopInformation()->NumberOfBackEdges(); } - forward_predecessors.Put(block->GetBlockId(), number_of_forward_predecessors); + forward_predecessors[block->GetBlockId()] = number_of_forward_predecessors; } // (2): Following a worklist approach, first start with the entry block, and @@ -85,20 +85,21 @@ void SsaLivenessAnalysis::LinearizeGraph() { // successor block are visited, the successor block is added in the worklist // following an order that satisfies the requirements to build our linear graph. graph_->linear_order_.reserve(graph_->GetReversePostOrder().size()); - GrowableArray<HBasicBlock*> worklist(graph_->GetArena(), 1); - worklist.Add(graph_->GetEntryBlock()); + ArenaVector<HBasicBlock*> worklist(graph_->GetArena()->Adapter(kArenaAllocSsaLiveness)); + worklist.push_back(graph_->GetEntryBlock()); do { - HBasicBlock* current = worklist.Pop(); + HBasicBlock* current = worklist.back(); + worklist.pop_back(); graph_->linear_order_.push_back(current); for (HBasicBlock* successor : current->GetSuccessors()) { int block_id = successor->GetBlockId(); - size_t number_of_remaining_predecessors = forward_predecessors.Get(block_id); + size_t number_of_remaining_predecessors = forward_predecessors[block_id]; if (number_of_remaining_predecessors == 1) { AddToListForLinearization(&worklist, successor); } - forward_predecessors.Put(block_id, number_of_remaining_predecessors - 1); + forward_predecessors[block_id] = number_of_remaining_predecessors - 1; } - } while (!worklist.IsEmpty()); + } while (!worklist.empty()); } void SsaLivenessAnalysis::NumberInstructions() { @@ -122,7 +123,7 @@ void SsaLivenessAnalysis::NumberInstructions() { codegen_->AllocateLocations(current); LocationSummary* locations = current->GetLocations(); if (locations != nullptr && locations->Out().IsValid()) { - instructions_from_ssa_index_.Add(current); + instructions_from_ssa_index_.push_back(current); current->SetSsaIndex(ssa_index++); current->SetLiveInterval( LiveInterval::MakeInterval(graph_->GetArena(), current->GetType(), current)); @@ -132,7 +133,7 @@ void SsaLivenessAnalysis::NumberInstructions() { lifetime_position += 2; // Add a null marker to notify we are starting a block. - instructions_from_lifetime_position_.Add(nullptr); + instructions_from_lifetime_position_.push_back(nullptr); for (HInstructionIterator inst_it(block->GetInstructions()); !inst_it.Done(); inst_it.Advance()) { @@ -140,12 +141,12 @@ void SsaLivenessAnalysis::NumberInstructions() { codegen_->AllocateLocations(current); LocationSummary* locations = current->GetLocations(); if (locations != nullptr && locations->Out().IsValid()) { - instructions_from_ssa_index_.Add(current); + instructions_from_ssa_index_.push_back(current); current->SetSsaIndex(ssa_index++); current->SetLiveInterval( LiveInterval::MakeInterval(graph_->GetArena(), current->GetType(), current)); } - instructions_from_lifetime_position_.Add(current); + instructions_from_lifetime_position_.push_back(current); current->SetLifetimePosition(lifetime_position); lifetime_position += 2; } @@ -158,9 +159,9 @@ void SsaLivenessAnalysis::NumberInstructions() { void SsaLivenessAnalysis::ComputeLiveness() { for (HLinearOrderIterator it(*graph_); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); - block_infos_.Put( - block->GetBlockId(), - new (graph_->GetArena()) BlockInfo(graph_->GetArena(), *block, number_of_ssa_values_)); + DCHECK_LT(block->GetBlockId(), block_infos_.size()); + block_infos_[block->GetBlockId()] = + new (graph_->GetArena()) BlockInfo(graph_->GetArena(), *block, number_of_ssa_values_); } // Compute the live ranges, as well as the initial live_in, live_out, and kill sets. @@ -212,7 +213,7 @@ void SsaLivenessAnalysis::ComputeLiveRanges() { // Add a range that covers this block to all instructions live_in because of successors. // Instructions defined in this block will have their start of the range adjusted. for (uint32_t idx : live_in->Indexes()) { - HInstruction* current = instructions_from_ssa_index_.Get(idx); + HInstruction* current = GetInstructionFromSsaIndex(idx); current->GetLiveInterval()->AddRange(block->GetLifetimeStart(), block->GetLifetimeEnd()); } @@ -277,7 +278,7 @@ void SsaLivenessAnalysis::ComputeLiveRanges() { // For all live_in instructions at the loop header, we need to create a range // that covers the full loop. for (uint32_t idx : live_in->Indexes()) { - HInstruction* current = instructions_from_ssa_index_.Get(idx); + HInstruction* current = GetInstructionFromSsaIndex(idx); current->GetLiveInterval()->AddLoopRange(block->GetLifetimeStart(), last_position); } } diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index 3aedaa56a2..e4b0999d4f 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -27,7 +27,7 @@ class SsaLivenessAnalysis; static constexpr int kNoRegister = -1; -class BlockInfo : public ArenaObject<kArenaAllocMisc> { +class BlockInfo : public ArenaObject<kArenaAllocSsaLiveness> { public: BlockInfo(ArenaAllocator* allocator, const HBasicBlock& block, size_t number_of_ssa_values) : block_(block), @@ -55,7 +55,7 @@ class BlockInfo : public ArenaObject<kArenaAllocMisc> { * A live range contains the start and end of a range where an instruction or a temporary * is live. */ -class LiveRange FINAL : public ArenaObject<kArenaAllocMisc> { +class LiveRange FINAL : public ArenaObject<kArenaAllocSsaLiveness> { public: LiveRange(size_t start, size_t end, LiveRange* next) : start_(start), end_(end), next_(next) { DCHECK_LT(start, end); @@ -101,7 +101,7 @@ class LiveRange FINAL : public ArenaObject<kArenaAllocMisc> { /** * A use position represents a live interval use at a given position. */ -class UsePosition : public ArenaObject<kArenaAllocMisc> { +class UsePosition : public ArenaObject<kArenaAllocSsaLiveness> { public: UsePosition(HInstruction* user, HEnvironment* environment, @@ -169,7 +169,7 @@ class UsePosition : public ArenaObject<kArenaAllocMisc> { DISALLOW_COPY_AND_ASSIGN(UsePosition); }; -class SafepointPosition : public ArenaObject<kArenaAllocMisc> { +class SafepointPosition : public ArenaObject<kArenaAllocSsaLiveness> { public: explicit SafepointPosition(HInstruction* instruction) : instruction_(instruction), @@ -206,7 +206,7 @@ class SafepointPosition : public ArenaObject<kArenaAllocMisc> { * An interval is a list of disjoint live ranges where an instruction is live. * Each instruction that has uses gets an interval. */ -class LiveInterval : public ArenaObject<kArenaAllocMisc> { +class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { public: static LiveInterval* MakeInterval(ArenaAllocator* allocator, Primitive::Type type, @@ -1106,33 +1106,39 @@ class SsaLivenessAnalysis : public ValueObject { SsaLivenessAnalysis(HGraph* graph, CodeGenerator* codegen) : graph_(graph), codegen_(codegen), - block_infos_(graph->GetArena(), graph->GetBlocks().size()), - instructions_from_ssa_index_(graph->GetArena(), 0), - instructions_from_lifetime_position_(graph->GetArena(), 0), + block_infos_(graph->GetBlocks().size(), + nullptr, + graph->GetArena()->Adapter(kArenaAllocSsaLiveness)), + instructions_from_ssa_index_(graph->GetArena()->Adapter(kArenaAllocSsaLiveness)), + instructions_from_lifetime_position_(graph->GetArena()->Adapter(kArenaAllocSsaLiveness)), number_of_ssa_values_(0) { - block_infos_.SetSize(graph->GetBlocks().size()); } void Analyze(); BitVector* GetLiveInSet(const HBasicBlock& block) const { - return &block_infos_.Get(block.GetBlockId())->live_in_; + DCHECK_LT(block.GetBlockId(), block_infos_.size()); + return &block_infos_[block.GetBlockId()]->live_in_; } BitVector* GetLiveOutSet(const HBasicBlock& block) const { - return &block_infos_.Get(block.GetBlockId())->live_out_; + DCHECK_LT(block.GetBlockId(), block_infos_.size()); + return &block_infos_[block.GetBlockId()]->live_out_; } BitVector* GetKillSet(const HBasicBlock& block) const { - return &block_infos_.Get(block.GetBlockId())->kill_; + DCHECK_LT(block.GetBlockId(), block_infos_.size()); + return &block_infos_[block.GetBlockId()]->kill_; } HInstruction* GetInstructionFromSsaIndex(size_t index) const { - return instructions_from_ssa_index_.Get(index); + DCHECK_LT(index, instructions_from_ssa_index_.size()); + return instructions_from_ssa_index_[index]; } HInstruction* GetInstructionFromPosition(size_t index) const { - return instructions_from_lifetime_position_.Get(index); + DCHECK_LT(index, instructions_from_lifetime_position_.size()); + return instructions_from_lifetime_position_[index]; } HBasicBlock* GetBlockFromPosition(size_t index) const { @@ -1163,7 +1169,7 @@ class SsaLivenessAnalysis : public ValueObject { } size_t GetMaxLifetimePosition() const { - return instructions_from_lifetime_position_.Size() * 2 - 1; + return instructions_from_lifetime_position_.size() * 2 - 1; } size_t GetNumberOfSsaValues() const { @@ -1218,13 +1224,13 @@ class SsaLivenessAnalysis : public ValueObject { HGraph* const graph_; CodeGenerator* const codegen_; - GrowableArray<BlockInfo*> block_infos_; + ArenaVector<BlockInfo*> block_infos_; // Temporary array used when computing live_in, live_out, and kill sets. - GrowableArray<HInstruction*> instructions_from_ssa_index_; + ArenaVector<HInstruction*> instructions_from_ssa_index_; // Temporary array used when inserting moves in the graph. - GrowableArray<HInstruction*> instructions_from_lifetime_position_; + ArenaVector<HInstruction*> instructions_from_lifetime_position_; size_t number_of_ssa_values_; ART_FRIEND_TEST(RegisterAllocatorTest, SpillInactive); diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc index a9f04cd201..72f9ddd506 100644 --- a/compiler/optimizing/ssa_phi_elimination.cc +++ b/compiler/optimizing/ssa_phi_elimination.cc @@ -35,7 +35,7 @@ void SsaDeadPhiElimination::MarkDeadPhis() { HUseListNode<HInstruction*>* current = use_it.Current(); HInstruction* user = current->GetUser(); if (!user->IsPhi()) { - worklist_.Add(phi); + worklist_.push_back(phi); phi->SetLive(); break; } @@ -44,12 +44,13 @@ void SsaDeadPhiElimination::MarkDeadPhis() { } // Process the worklist by propagating liveness to phi inputs. - while (!worklist_.IsEmpty()) { - HPhi* phi = worklist_.Pop(); + while (!worklist_.empty()) { + HPhi* phi = worklist_.back(); + worklist_.pop_back(); for (HInputIterator it(phi); !it.Done(); it.Advance()) { HInstruction* input = it.Current(); if (input->IsPhi() && input->AsPhi()->IsDead()) { - worklist_.Add(input->AsPhi()); + worklist_.push_back(input->AsPhi()); input->AsPhi()->SetLive(); } } @@ -103,12 +104,13 @@ void SsaRedundantPhiElimination::Run() { for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) { - worklist_.Add(inst_it.Current()->AsPhi()); + worklist_.push_back(inst_it.Current()->AsPhi()); } } - while (!worklist_.IsEmpty()) { - HPhi* phi = worklist_.Pop(); + while (!worklist_.empty()) { + HPhi* phi = worklist_.back(); + worklist_.pop_back(); // If the phi has already been processed, continue. if (!phi->IsInBlock()) { @@ -155,7 +157,7 @@ void SsaRedundantPhiElimination::Run() { HUseListNode<HInstruction*>* current = it.Current(); HInstruction* user = current->GetUser(); if (user->IsPhi()) { - worklist_.Add(user->AsPhi()); + worklist_.push_back(user->AsPhi()); } } diff --git a/compiler/optimizing/ssa_phi_elimination.h b/compiler/optimizing/ssa_phi_elimination.h index 67351f277b..b48e8200d5 100644 --- a/compiler/optimizing/ssa_phi_elimination.h +++ b/compiler/optimizing/ssa_phi_elimination.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_SSA_PHI_ELIMINATION_H_ #define ART_COMPILER_OPTIMIZING_SSA_PHI_ELIMINATION_H_ +#include "base/arena_containers.h" #include "nodes.h" #include "optimization.h" @@ -30,7 +31,9 @@ class SsaDeadPhiElimination : public HOptimization { public: explicit SsaDeadPhiElimination(HGraph* graph) : HOptimization(graph, kSsaDeadPhiEliminationPassName), - worklist_(graph->GetArena(), kDefaultWorklistSize) {} + worklist_(graph->GetArena()->Adapter(kArenaAllocSsaPhiElimination)) { + worklist_.reserve(kDefaultWorklistSize); + } void Run() OVERRIDE; @@ -40,7 +43,7 @@ class SsaDeadPhiElimination : public HOptimization { static constexpr const char* kSsaDeadPhiEliminationPassName = "dead_phi_elimination"; private: - GrowableArray<HPhi*> worklist_; + ArenaVector<HPhi*> worklist_; static constexpr size_t kDefaultWorklistSize = 8; @@ -57,14 +60,16 @@ class SsaRedundantPhiElimination : public HOptimization { public: explicit SsaRedundantPhiElimination(HGraph* graph) : HOptimization(graph, kSsaRedundantPhiEliminationPassName), - worklist_(graph->GetArena(), kDefaultWorklistSize) {} + worklist_(graph->GetArena()->Adapter(kArenaAllocSsaPhiElimination)) { + worklist_.reserve(kDefaultWorklistSize); + } void Run() OVERRIDE; static constexpr const char* kSsaRedundantPhiEliminationPassName = "redundant_phi_elimination"; private: - GrowableArray<HPhi*> worklist_; + ArenaVector<HPhi*> worklist_; static constexpr size_t kDefaultWorklistSize = 8; diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc index 1f0bac59e0..f27cecc8fa 100644 --- a/compiler/optimizing/stack_map_stream.cc +++ b/compiler/optimizing/stack_map_stream.cc @@ -30,8 +30,8 @@ void StackMapStream::BeginStackMapEntry(uint32_t dex_pc, current_entry_.sp_mask = sp_mask; current_entry_.num_dex_registers = num_dex_registers; current_entry_.inlining_depth = inlining_depth; - current_entry_.dex_register_locations_start_index = dex_register_locations_.Size(); - current_entry_.inline_infos_start_index = inline_infos_.Size(); + current_entry_.dex_register_locations_start_index = dex_register_locations_.size(); + current_entry_.inline_infos_start_index = inline_infos_.size(); current_entry_.dex_register_map_hash = 0; current_entry_.same_dex_register_map_as_ = kNoSameDexMapFound; if (num_dex_registers != 0) { @@ -55,7 +55,7 @@ void StackMapStream::BeginStackMapEntry(uint32_t dex_pc, void StackMapStream::EndStackMapEntry() { current_entry_.same_dex_register_map_as_ = FindEntryWithTheSameDexMap(); - stack_maps_.Add(current_entry_); + stack_maps_.push_back(current_entry_); current_entry_ = StackMapEntry(); } @@ -73,12 +73,12 @@ void StackMapStream::AddDexRegisterEntry(DexRegisterLocation::Kind kind, int32_t auto it = location_catalog_entries_indices_.Find(location); if (it != location_catalog_entries_indices_.end()) { // Retrieve the index from the hash map. - dex_register_locations_.Add(it->second); + dex_register_locations_.push_back(it->second); } else { // Create a new entry in the location catalog and the hash map. - size_t index = location_catalog_entries_.Size(); - location_catalog_entries_.Add(location); - dex_register_locations_.Add(index); + size_t index = location_catalog_entries_.size(); + location_catalog_entries_.push_back(location); + dex_register_locations_.push_back(index); location_catalog_entries_indices_.Insert(std::make_pair(location, index)); } @@ -108,7 +108,7 @@ void StackMapStream::BeginInlineInfoEntry(uint32_t method_index, current_inline_info_.dex_pc = dex_pc; current_inline_info_.invoke_type = invoke_type; current_inline_info_.num_dex_registers = num_dex_registers; - current_inline_info_.dex_register_locations_start_index = dex_register_locations_.Size(); + current_inline_info_.dex_register_locations_start_index = dex_register_locations_.size(); if (num_dex_registers != 0) { current_inline_info_.live_dex_registers_mask = new (allocator_) ArenaBitVector(allocator_, num_dex_registers, true); @@ -123,14 +123,14 @@ void StackMapStream::EndInlineInfoEntry() { DCHECK_EQ(current_dex_register_, current_inline_info_.num_dex_registers) << "Inline information contains less registers than expected"; in_inline_frame_ = false; - inline_infos_.Add(current_inline_info_); + inline_infos_.push_back(current_inline_info_); current_inline_info_ = InlineInfoEntry(); } uint32_t StackMapStream::ComputeMaxNativePcOffset() const { uint32_t max_native_pc_offset = 0u; - for (size_t i = 0, size = stack_maps_.Size(); i != size; ++i) { - max_native_pc_offset = std::max(max_native_pc_offset, stack_maps_.Get(i).native_pc_offset); + for (const StackMapEntry& entry : stack_maps_) { + max_native_pc_offset = std::max(max_native_pc_offset, entry.native_pc_offset); } return max_native_pc_offset; } @@ -147,7 +147,7 @@ size_t StackMapStream::PrepareForFillIn() { dex_pc_max_, max_native_pc_offset, register_mask_max_); - stack_maps_size_ = stack_maps_.Size() * stack_map_encoding_.ComputeStackMapSize(); + stack_maps_size_ = stack_maps_.size() * stack_map_encoding_.ComputeStackMapSize(); dex_register_location_catalog_size_ = ComputeDexRegisterLocationCatalogSize(); // Note: use RoundUp to word-size here if you want CodeInfo objects to be word aligned. @@ -170,33 +170,28 @@ size_t StackMapStream::PrepareForFillIn() { size_t StackMapStream::ComputeDexRegisterLocationCatalogSize() const { size_t size = DexRegisterLocationCatalog::kFixedSize; - for (size_t location_catalog_entry_index = 0; - location_catalog_entry_index < location_catalog_entries_.Size(); - ++location_catalog_entry_index) { - DexRegisterLocation dex_register_location = - location_catalog_entries_.Get(location_catalog_entry_index); + for (const DexRegisterLocation& dex_register_location : location_catalog_entries_) { size += DexRegisterLocationCatalog::EntrySize(dex_register_location); } return size; } size_t StackMapStream::ComputeDexRegisterMapSize(uint32_t num_dex_registers, - const BitVector& live_dex_registers_mask) const { + const BitVector* live_dex_registers_mask) const { + // For num_dex_registers == 0u live_dex_registers_mask may be null. + if (num_dex_registers == 0u) { + return 0u; // No register map will be emitted. + } + DCHECK(live_dex_registers_mask != nullptr); + // Size of the map in bytes. size_t size = DexRegisterMap::kFixedSize; // Add the live bit mask for the Dex register liveness. size += DexRegisterMap::GetLiveBitMaskSize(num_dex_registers); // Compute the size of the set of live Dex register entries. - size_t number_of_live_dex_registers = 0; - for (size_t dex_register_number = 0; - dex_register_number < num_dex_registers; - ++dex_register_number) { - if (live_dex_registers_mask.IsBitSet(dex_register_number)) { - ++number_of_live_dex_registers; - } - } + size_t number_of_live_dex_registers = live_dex_registers_mask->NumSetBits(); size_t map_entries_size_in_bits = - DexRegisterMap::SingleEntrySizeInBits(location_catalog_entries_.Size()) + DexRegisterMap::SingleEntrySizeInBits(location_catalog_entries_.size()) * number_of_live_dex_registers; size_t map_entries_size_in_bytes = RoundUp(map_entries_size_in_bits, kBitsPerByte) / kBitsPerByte; @@ -207,24 +202,24 @@ size_t StackMapStream::ComputeDexRegisterMapSize(uint32_t num_dex_registers, size_t StackMapStream::ComputeDexRegisterMapsSize() const { size_t size = 0; size_t inline_info_index = 0; - for (size_t i = 0; i < stack_maps_.Size(); ++i) { - StackMapEntry entry = stack_maps_.Get(i); + for (const StackMapEntry& entry : stack_maps_) { if (entry.same_dex_register_map_as_ == kNoSameDexMapFound) { - size += ComputeDexRegisterMapSize(entry.num_dex_registers, *entry.live_dex_registers_mask); + size += ComputeDexRegisterMapSize(entry.num_dex_registers, entry.live_dex_registers_mask); } else { // Entries with the same dex map will have the same offset. } for (size_t j = 0; j < entry.inlining_depth; ++j) { - InlineInfoEntry inline_entry = inline_infos_.Get(inline_info_index++); + DCHECK_LT(inline_info_index, inline_infos_.size()); + InlineInfoEntry inline_entry = inline_infos_[inline_info_index++]; size += ComputeDexRegisterMapSize(inline_entry.num_dex_registers, - *inline_entry.live_dex_registers_mask); + inline_entry.live_dex_registers_mask); } } return size; } size_t StackMapStream::ComputeInlineInfoSize() const { - return inline_infos_.Size() * InlineInfo::SingleEntrySize() + return inline_infos_.size() * InlineInfo::SingleEntrySize() // For encoding the depth. + (number_of_stack_maps_with_inline_info_ * InlineInfo::kFixedSize); } @@ -244,19 +239,18 @@ void StackMapStream::FillIn(MemoryRegion region) { inline_infos_start_, inline_info_size_); code_info.SetEncoding(stack_map_encoding_); - code_info.SetNumberOfStackMaps(stack_maps_.Size()); + code_info.SetNumberOfStackMaps(stack_maps_.size()); DCHECK_EQ(code_info.GetStackMapsSize(code_info.ExtractEncoding()), stack_maps_size_); // Set the Dex register location catalog. - code_info.SetNumberOfLocationCatalogEntries(location_catalog_entries_.Size()); + code_info.SetNumberOfLocationCatalogEntries(location_catalog_entries_.size()); MemoryRegion dex_register_location_catalog_region = region.Subregion( dex_register_location_catalog_start_, dex_register_location_catalog_size_); DexRegisterLocationCatalog dex_register_location_catalog(dex_register_location_catalog_region); // Offset in `dex_register_location_catalog` where to store the next // register location. size_t location_catalog_offset = DexRegisterLocationCatalog::kFixedSize; - for (size_t i = 0, e = location_catalog_entries_.Size(); i < e; ++i) { - DexRegisterLocation dex_register_location = location_catalog_entries_.Get(i); + for (DexRegisterLocation dex_register_location : location_catalog_entries_) { dex_register_location_catalog.SetRegisterInfo(location_catalog_offset, dex_register_location); location_catalog_offset += DexRegisterLocationCatalog::EntrySize(dex_register_location); } @@ -265,9 +259,9 @@ void StackMapStream::FillIn(MemoryRegion region) { uintptr_t next_dex_register_map_offset = 0; uintptr_t next_inline_info_offset = 0; - for (size_t i = 0, e = stack_maps_.Size(); i < e; ++i) { + for (size_t i = 0, e = stack_maps_.size(); i < e; ++i) { StackMap stack_map = code_info.GetStackMapAt(i, stack_map_encoding_); - StackMapEntry entry = stack_maps_.Get(i); + StackMapEntry entry = stack_maps_[i]; stack_map.SetDexPc(stack_map_encoding_, entry.dex_pc); stack_map.SetNativePcOffset(stack_map_encoding_, entry.native_pc_offset); @@ -291,7 +285,7 @@ void StackMapStream::FillIn(MemoryRegion region) { // New dex registers maps should be added to the stack map. MemoryRegion register_region = dex_register_locations_region.Subregion( next_dex_register_map_offset, - ComputeDexRegisterMapSize(entry.num_dex_registers, *entry.live_dex_registers_mask)); + ComputeDexRegisterMapSize(entry.num_dex_registers, entry.live_dex_registers_mask)); next_dex_register_map_offset += register_region.size(); DexRegisterMap dex_register_map(register_region); stack_map.SetDexRegisterMapOffset( @@ -318,8 +312,9 @@ void StackMapStream::FillIn(MemoryRegion region) { stack_map_encoding_, inline_region.start() - dex_register_locations_region.start()); inline_info.SetDepth(entry.inlining_depth); + DCHECK_LE(entry.inline_infos_start_index + entry.inlining_depth, inline_infos_.size()); for (size_t depth = 0; depth < entry.inlining_depth; ++depth) { - InlineInfoEntry inline_entry = inline_infos_.Get(depth + entry.inline_infos_start_index); + InlineInfoEntry inline_entry = inline_infos_[depth + entry.inline_infos_start_index]; inline_info.SetMethodIndexAtDepth(depth, inline_entry.method_index); inline_info.SetDexPcAtDepth(depth, inline_entry.dex_pc); inline_info.SetInvokeTypeAtDepth(depth, inline_entry.invoke_type); @@ -331,7 +326,7 @@ void StackMapStream::FillIn(MemoryRegion region) { MemoryRegion register_region = dex_register_locations_region.Subregion( next_dex_register_map_offset, ComputeDexRegisterMapSize(inline_entry.num_dex_registers, - *inline_entry.live_dex_registers_mask)); + inline_entry.live_dex_registers_mask)); next_dex_register_map_offset += register_region.size(); DexRegisterMap dex_register_map(register_region); inline_info.SetDexRegisterMapOffsetAtDepth( @@ -357,42 +352,43 @@ void StackMapStream::FillInDexRegisterMap(DexRegisterMap dex_register_map, uint32_t start_index_in_dex_register_locations) const { dex_register_map.SetLiveBitMask(num_dex_registers, live_dex_registers_mask); // Set the dex register location mapping data. - for (size_t dex_register_number = 0, index_in_dex_register_locations = 0; - dex_register_number < num_dex_registers; - ++dex_register_number) { - if (live_dex_registers_mask.IsBitSet(dex_register_number)) { - size_t location_catalog_entry_index = dex_register_locations_.Get( - start_index_in_dex_register_locations + index_in_dex_register_locations); - dex_register_map.SetLocationCatalogEntryIndex( - index_in_dex_register_locations, - location_catalog_entry_index, - num_dex_registers, - location_catalog_entries_.Size()); - ++index_in_dex_register_locations; - } + size_t number_of_live_dex_registers = live_dex_registers_mask.NumSetBits(); + DCHECK_LE(number_of_live_dex_registers, dex_register_locations_.size()); + DCHECK_LE(start_index_in_dex_register_locations, + dex_register_locations_.size() - number_of_live_dex_registers); + for (size_t index_in_dex_register_locations = 0; + index_in_dex_register_locations != number_of_live_dex_registers; + ++index_in_dex_register_locations) { + size_t location_catalog_entry_index = dex_register_locations_[ + start_index_in_dex_register_locations + index_in_dex_register_locations]; + dex_register_map.SetLocationCatalogEntryIndex( + index_in_dex_register_locations, + location_catalog_entry_index, + num_dex_registers, + location_catalog_entries_.size()); } } size_t StackMapStream::FindEntryWithTheSameDexMap() { - size_t current_entry_index = stack_maps_.Size(); + size_t current_entry_index = stack_maps_.size(); auto entries_it = dex_map_hash_to_stack_map_indices_.find(current_entry_.dex_register_map_hash); if (entries_it == dex_map_hash_to_stack_map_indices_.end()) { // We don't have a perfect hash functions so we need a list to collect all stack maps // which might have the same dex register map. - GrowableArray<uint32_t> stack_map_indices(allocator_, 1); - stack_map_indices.Add(current_entry_index); - dex_map_hash_to_stack_map_indices_.Put(current_entry_.dex_register_map_hash, stack_map_indices); + ArenaVector<uint32_t> stack_map_indices(allocator_->Adapter(kArenaAllocStackMapStream)); + stack_map_indices.push_back(current_entry_index); + dex_map_hash_to_stack_map_indices_.Put(current_entry_.dex_register_map_hash, + std::move(stack_map_indices)); return kNoSameDexMapFound; } // We might have collisions, so we need to check whether or not we really have a match. - for (size_t i = 0; i < entries_it->second.Size(); i++) { - size_t test_entry_index = entries_it->second.Get(i); - if (HaveTheSameDexMaps(stack_maps_.Get(test_entry_index), current_entry_)) { + for (uint32_t test_entry_index : entries_it->second) { + if (HaveTheSameDexMaps(GetStackMap(test_entry_index), current_entry_)) { return test_entry_index; } } - entries_it->second.Add(current_entry_index); + entries_it->second.push_back(current_entry_index); return kNoSameDexMapFound; } @@ -406,21 +402,22 @@ bool StackMapStream::HaveTheSameDexMaps(const StackMapEntry& a, const StackMapEn if (a.num_dex_registers != b.num_dex_registers) { return false; } - - int index_in_dex_register_locations = 0; - for (uint32_t i = 0; i < a.num_dex_registers; i++) { - if (a.live_dex_registers_mask->IsBitSet(i) != b.live_dex_registers_mask->IsBitSet(i)) { + if (a.num_dex_registers != 0u) { + DCHECK(a.live_dex_registers_mask != nullptr); + DCHECK(b.live_dex_registers_mask != nullptr); + if (!a.live_dex_registers_mask->Equal(b.live_dex_registers_mask)) { return false; } - if (a.live_dex_registers_mask->IsBitSet(i)) { - size_t a_loc = dex_register_locations_.Get( - a.dex_register_locations_start_index + index_in_dex_register_locations); - size_t b_loc = dex_register_locations_.Get( - b.dex_register_locations_start_index + index_in_dex_register_locations); - if (a_loc != b_loc) { - return false; - } - ++index_in_dex_register_locations; + size_t number_of_live_dex_registers = a.live_dex_registers_mask->NumSetBits(); + DCHECK_LE(number_of_live_dex_registers, dex_register_locations_.size()); + DCHECK_LE(a.dex_register_locations_start_index, + dex_register_locations_.size() - number_of_live_dex_registers); + DCHECK_LE(b.dex_register_locations_start_index, + dex_register_locations_.size() - number_of_live_dex_registers); + auto a_begin = dex_register_locations_.begin() + a.dex_register_locations_start_index; + auto b_begin = dex_register_locations_.begin() + b.dex_register_locations_start_index; + if (!std::equal(a_begin, a_begin + number_of_live_dex_registers, b_begin)) { + return false; } } return true; diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h index 703b6f7e13..4783e283b3 100644 --- a/compiler/optimizing/stack_map_stream.h +++ b/compiler/optimizing/stack_map_stream.h @@ -24,7 +24,6 @@ #include "memory_region.h" #include "nodes.h" #include "stack_map.h" -#include "utils/growable_array.h" namespace art { @@ -62,15 +61,16 @@ class StackMapStream : public ValueObject { public: explicit StackMapStream(ArenaAllocator* allocator) : allocator_(allocator), - stack_maps_(allocator, 10), - location_catalog_entries_(allocator, 4), - dex_register_locations_(allocator, 10 * 4), - inline_infos_(allocator, 2), + stack_maps_(allocator->Adapter(kArenaAllocStackMapStream)), + location_catalog_entries_(allocator->Adapter(kArenaAllocStackMapStream)), + dex_register_locations_(allocator->Adapter(kArenaAllocStackMapStream)), + inline_infos_(allocator->Adapter(kArenaAllocStackMapStream)), stack_mask_max_(-1), dex_pc_max_(0), register_mask_max_(0), number_of_stack_maps_with_inline_info_(0), - dex_map_hash_to_stack_map_indices_(std::less<uint32_t>(), allocator->Adapter()), + dex_map_hash_to_stack_map_indices_(std::less<uint32_t>(), + allocator->Adapter(kArenaAllocStackMapStream)), current_entry_(), current_inline_info_(), stack_mask_size_(0), @@ -84,7 +84,12 @@ class StackMapStream : public ValueObject { inline_infos_start_(0), needed_size_(0), current_dex_register_(0), - in_inline_frame_(false) {} + in_inline_frame_(false) { + stack_maps_.reserve(10); + location_catalog_entries_.reserve(4); + dex_register_locations_.reserve(10 * 4); + inline_infos_.reserve(2); + } // See runtime/stack_map.h to know what these fields contain. struct StackMapEntry { @@ -127,17 +132,17 @@ class StackMapStream : public ValueObject { void EndInlineInfoEntry(); size_t GetNumberOfStackMaps() const { - return stack_maps_.Size(); + return stack_maps_.size(); } const StackMapEntry& GetStackMap(size_t i) const { - DCHECK_LT(i, stack_maps_.Size()); - return stack_maps_.GetRawStorage()[i]; + DCHECK_LT(i, stack_maps_.size()); + return stack_maps_[i]; } void SetStackMapNativePcOffset(size_t i, uint32_t native_pc_offset) { - DCHECK_LT(i, stack_maps_.Size()); - stack_maps_.GetRawStorage()[i].native_pc_offset = native_pc_offset; + DCHECK_LT(i, stack_maps_.size()); + stack_maps_[i].native_pc_offset = native_pc_offset; } uint32_t ComputeMaxNativePcOffset() const; @@ -150,7 +155,7 @@ class StackMapStream : public ValueObject { private: size_t ComputeDexRegisterLocationCatalogSize() const; size_t ComputeDexRegisterMapSize(uint32_t num_dex_registers, - const BitVector& live_dex_registers_mask) const; + const BitVector* live_dex_registers_mask) const; size_t ComputeDexRegisterMapsSize() const; size_t ComputeInlineInfoSize() const; @@ -164,10 +169,10 @@ class StackMapStream : public ValueObject { uint32_t start_index_in_dex_register_locations) const; ArenaAllocator* allocator_; - GrowableArray<StackMapEntry> stack_maps_; + ArenaVector<StackMapEntry> stack_maps_; // A catalog of unique [location_kind, register_value] pairs (per method). - GrowableArray<DexRegisterLocation> location_catalog_entries_; + ArenaVector<DexRegisterLocation> location_catalog_entries_; // Map from Dex register location catalog entries to their indices in the // location catalog. typedef HashMap<DexRegisterLocation, size_t, LocationCatalogEntriesIndicesEmptyFn, @@ -175,14 +180,14 @@ class StackMapStream : public ValueObject { LocationCatalogEntriesIndices location_catalog_entries_indices_; // A set of concatenated maps of Dex register locations indices to `location_catalog_entries_`. - GrowableArray<size_t> dex_register_locations_; - GrowableArray<InlineInfoEntry> inline_infos_; + ArenaVector<size_t> dex_register_locations_; + ArenaVector<InlineInfoEntry> inline_infos_; int stack_mask_max_; uint32_t dex_pc_max_; uint32_t register_mask_max_; size_t number_of_stack_maps_with_inline_info_; - ArenaSafeMap<uint32_t, GrowableArray<uint32_t>> dex_map_hash_to_stack_map_indices_; + ArenaSafeMap<uint32_t, ArenaVector<uint32_t>> dex_map_hash_to_stack_map_indices_; StackMapEntry current_entry_; InlineInfoEntry current_inline_info_; diff --git a/compiler/utils/array_ref.h b/compiler/utils/array_ref.h index 303e0d5ad4..48f0328dce 100644 --- a/compiler/utils/array_ref.h +++ b/compiler/utils/array_ref.h @@ -161,6 +161,15 @@ class ArrayRef { value_type* data() { return array_; } const value_type* data() const { return array_; } + ArrayRef SubArray(size_type pos) const { + return SubArray(pos, size_ - pos); + } + ArrayRef SubArray(size_type pos, size_type length) const { + DCHECK_LE(pos, size()); + DCHECK_LE(length, size() - pos); + return ArrayRef(array_ + pos, length); + } + private: T* array_; size_t size_; diff --git a/compiler/utils/growable_array.h b/compiler/utils/growable_array.h deleted file mode 100644 index f85e026f16..0000000000 --- a/compiler/utils/growable_array.h +++ /dev/null @@ -1,174 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_UTILS_GROWABLE_ARRAY_H_ -#define ART_COMPILER_UTILS_GROWABLE_ARRAY_H_ - -#include <stdint.h> -#include <stddef.h> - -#include "base/arena_object.h" - -namespace art { - -// Deprecated -// TODO: Replace all uses with ArenaVector<T>. -template<typename T> -class GrowableArray : public ArenaObject<kArenaAllocGrowableArray> { - public: - GrowableArray(ArenaAllocator* arena, size_t init_length) - : arena_(arena), - num_allocated_(init_length), - num_used_(0) { - elem_list_ = arena_->AllocArray<T>(init_length, kArenaAllocGrowableArray); - } - - GrowableArray(ArenaAllocator* arena, size_t init_length, T initial_data) - : arena_(arena), - num_allocated_(init_length), - num_used_(init_length) { - elem_list_ = arena_->AllocArray<T>(init_length, kArenaAllocGrowableArray); - for (size_t i = 0; i < init_length; ++i) { - elem_list_[i] = initial_data; - } - } - - bool Contains(T value, size_t start_from = 0) const { - for (size_t i = start_from; i < num_used_; ++i) { - if (elem_list_[i] == value) { - return true; - } - } - return false; - } - - // Expand the list size to at least new length. - void Resize(size_t new_length) { - if (new_length <= num_allocated_) return; - // If it's a small list double the size, else grow 1.5x. - size_t target_length = - (num_allocated_ < 128) ? num_allocated_ << 1 : num_allocated_ + (num_allocated_ >> 1); - if (new_length > target_length) { - target_length = new_length; - } - T* new_array = arena_->AllocArray<T>(target_length, kArenaAllocGrowableArray); - memcpy(new_array, elem_list_, sizeof(T) * num_allocated_); - num_allocated_ = target_length; - elem_list_ = new_array; - } - - // NOTE: does not return storage, just resets use count. - void Reset() { - num_used_ = 0; - } - - // Insert an element to the end of a list, resizing if necessary. - void Insert(T elem) { - if (num_used_ == num_allocated_) { - Resize(num_used_ + 1); - } - elem_list_[num_used_++] = elem; - } - - void InsertAt(size_t index, T elem) { - DCHECK(index <= Size()); - Insert(elem); - for (size_t i = Size() - 1; i > index; --i) { - elem_list_[i] = elem_list_[i - 1]; - } - elem_list_[index] = elem; - } - - void Add(T elem) { - Insert(elem); - } - - T Get(size_t index) const { - DCHECK_LT(index, num_used_); - return elem_list_[index]; - } - - // Overwrite existing element at position index. List must be large enough. - void Put(size_t index, T elem) { - DCHECK_LT(index, num_used_); - elem_list_[index] = elem; - } - - void Increment(size_t index) { - DCHECK_LT(index, num_used_); - elem_list_[index]++; - } - - /* - * Remove an existing element from list. If there are more than one copy - * of the element, only the first one encountered will be deleted. - */ - // TODO: consider renaming this. - void Delete(T element) { - bool found = false; - for (size_t i = 0; i < num_used_ - 1; i++) { - if (!found && elem_list_[i] == element) { - found = true; - } - if (found) { - elem_list_[i] = elem_list_[i+1]; - } - } - // We should either have found the element, or it was the last (unscanned) element. - DCHECK(found || (element == elem_list_[num_used_ - 1])); - num_used_--; - } - - void DeleteAt(size_t index) { - for (size_t i = index; i < num_used_ - 1; i++) { - elem_list_[i] = elem_list_[i + 1]; - } - num_used_--; - } - - size_t GetNumAllocated() const { return num_allocated_; } - - size_t Size() const { return num_used_; } - - bool IsEmpty() const { return num_used_ == 0; } - - T Pop() { - DCHECK_GE(num_used_, (size_t)0); - return elem_list_[--num_used_]; - } - - T Peek() const { - DCHECK_GE(num_used_, (size_t)0); - return elem_list_[num_used_ - 1]; - } - - void SetSize(size_t new_size) { - Resize(new_size); - num_used_ = new_size; - } - - T* GetRawStorage() const { return elem_list_; } - - private: - ArenaAllocator* const arena_; - size_t num_allocated_; - size_t num_used_; - T* elem_list_; -}; - -} // namespace art - -#endif // ART_COMPILER_UTILS_GROWABLE_ARRAY_H_ diff --git a/dex2oat/Android.mk b/dex2oat/Android.mk index 3cfdc4c80e..e252765eaa 100644 --- a/dex2oat/Android.mk +++ b/dex2oat/Android.mk @@ -58,14 +58,16 @@ endif ifeq ($(ART_BUILD_HOST_NDEBUG),true) $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libcutils libart-compiler libsigchain libziparchive-host,art/compiler,host,ndebug,$(dex2oat_host_arch))) ifeq ($(ART_BUILD_HOST_STATIC),true) - $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libart libart-compiler libart libziparchive-host libnativehelper libnativebridge libsigchain_dummy libvixl liblog libz libbacktrace libcutils libunwindbacktrace libutils libbase,art/compiler,host,ndebug,$(dex2oat_host_arch),static)) + $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libart libart-compiler libart libziparchive-host libnativehelper libnativebridge libsigchain_dummy libvixl liblog libz \ + libbacktrace libLLVMObject libLLVMBitReader libLLVMMC libLLVMMCParser libLLVMCore libLLVMSupport libcutils libunwindbacktrace libutils libbase,art/compiler,host,ndebug,$(dex2oat_host_arch),static)) endif endif ifeq ($(ART_BUILD_HOST_DEBUG),true) $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libcutils libartd-compiler libsigchain libziparchive-host,art/compiler,host,debug,$(dex2oat_host_arch))) ifeq ($(ART_BUILD_HOST_STATIC),true) - $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libartd libartd-compiler libartd libziparchive-host libnativehelper libnativebridge libsigchain_dummy libvixld liblog libz libbacktrace libcutils libunwindbacktrace libutils libbase,art/compiler,host,debug,$(dex2oat_host_arch),static)) + $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libartd libartd-compiler libartd libziparchive-host libnativehelper libnativebridge libsigchain_dummy libvixld liblog libz \ + libbacktrace libLLVMObject libLLVMBitReader libLLVMMC libLLVMMCParser libLLVMCore libLLVMSupport libcutils libunwindbacktrace libutils libbase,art/compiler,host,debug,$(dex2oat_host_arch),static)) endif endif diff --git a/dexdump/Android.mk b/dexdump/Android.mk index a208ccf89b..ec2529e18f 100755 --- a/dexdump/Android.mk +++ b/dexdump/Android.mk @@ -34,8 +34,6 @@ LOCAL_C_INCLUDES := $(dexdump_c_includes) LOCAL_CFLAGS += -Wall LOCAL_SHARED_LIBRARIES += $(dexdump_libraries) LOCAL_MODULE := dexdump2 -LOCAL_MODULE_TAGS := optional -LOCAL_MODULE_PATH := $(TARGET_OUT_OPTIONAL_EXECUTABLES) include $(BUILD_EXECUTABLE) endif # !SDK_ONLY diff --git a/dexdump/dexdump_test.cc b/dexdump/dexdump_test.cc index d9b210d767..4230cb26b7 100644 --- a/dexdump/dexdump_test.cc +++ b/dexdump/dexdump_test.cc @@ -43,12 +43,7 @@ class DexDumpTest : public CommonRuntimeTest { // Runs test with given arguments. bool Exec(const std::vector<std::string>& args, std::string* error_msg) { // TODO(ajcbik): dexdump2 -> dexdump - std::string file_path = GetTestAndroidRoot(); - if (IsHost()) { - file_path += "/bin/dexdump2"; - } else { - file_path += "/xbin/dexdump2"; - } + std::string file_path = GetTestAndroidRoot() + "/bin/dexdump2"; EXPECT_TRUE(OS::FileExists(file_path.c_str())) << file_path << " should be a valid file path"; std::vector<std::string> exec_argv = { file_path }; exec_argv.insert(exec_argv.end(), args.begin(), args.end()); diff --git a/runtime/Android.mk b/runtime/Android.mk index 995a1d5c0d..059c4cdd57 100644 --- a/runtime/Android.mk +++ b/runtime/Android.mk @@ -104,6 +104,7 @@ LIBART_COMMON_SRC_FILES := \ lambda/box_table.cc \ lambda/closure.cc \ lambda/closure_builder.cc \ + lambda/leaking_allocator.cc \ jni_internal.cc \ jobject_comparator.cc \ linear_alloc.cc \ diff --git a/runtime/arch/arm/context_arm.cc b/runtime/arch/arm/context_arm.cc index 403d348752..8f6b1ff0a5 100644 --- a/runtime/arch/arm/context_arm.cc +++ b/runtime/arch/arm/context_arm.cc @@ -30,9 +30,11 @@ void ArmContext::Reset() { std::fill_n(fprs_, arraysize(fprs_), nullptr); gprs_[SP] = &sp_; gprs_[PC] = &pc_; + gprs_[R0] = &arg0_; // Initialize registers with easy to spot debug values. sp_ = ArmContext::kBadGprBase + SP; pc_ = ArmContext::kBadGprBase + PC; + arg0_ = 0; } void ArmContext::FillCalleeSaves(const StackVisitor& fr) { diff --git a/runtime/arch/arm/context_arm.h b/runtime/arch/arm/context_arm.h index 77bb5c8399..ea31055e9d 100644 --- a/runtime/arch/arm/context_arm.h +++ b/runtime/arch/arm/context_arm.h @@ -45,6 +45,10 @@ class ArmContext : public Context { SetGPR(PC, new_pc); } + void SetArg0(uintptr_t new_arg0_value) OVERRIDE { + SetGPR(R0, new_arg0_value); + } + bool IsAccessibleGPR(uint32_t reg) OVERRIDE { DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCoreRegisters)); return gprs_[reg] != nullptr; @@ -84,7 +88,7 @@ class ArmContext : public Context { uintptr_t* gprs_[kNumberOfCoreRegisters]; uint32_t* fprs_[kNumberOfSRegisters]; // Hold values for sp and pc if they are not located within a stack frame. - uintptr_t sp_, pc_; + uintptr_t sp_, pc_, arg0_; }; } // namespace arm diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S index e45d828584..dc1cf8ab51 100644 --- a/runtime/arch/arm/quick_entrypoints_arm.S +++ b/runtime/arch/arm/quick_entrypoints_arm.S @@ -437,8 +437,8 @@ ARM_ENTRY art_quick_do_long_jump ldr r14, [r0, #56] @ (LR from gprs_ 56=4*14) add r0, r0, #12 @ increment r0 to skip gprs_[0..2] 12=4*3 ldm r0, {r3-r13} @ load remaining gprs from argument gprs_ - mov r0, #0 @ clear result registers r0 and r1 - mov r1, #0 + ldr r0, [r0, #-12] @ load r0 value + mov r1, #0 @ clear result register r1 bx r2 @ do long jump END art_quick_do_long_jump @@ -1142,7 +1142,7 @@ END art_quick_deoptimize /* * Compiled code has requested that we deoptimize into the interpreter. The deoptimization - * will long jump to the upcall with a special exception of -1. + * will long jump to the interpreter bridge. */ .extern artDeoptimizeFromCompiledCode ENTRY art_quick_deoptimize_from_compiled_code diff --git a/runtime/arch/arm64/context_arm64.cc b/runtime/arch/arm64/context_arm64.cc index 60becc6aea..4477631c67 100644 --- a/runtime/arch/arm64/context_arm64.cc +++ b/runtime/arch/arm64/context_arm64.cc @@ -31,10 +31,12 @@ void Arm64Context::Reset() { std::fill_n(gprs_, arraysize(gprs_), nullptr); std::fill_n(fprs_, arraysize(fprs_), nullptr); gprs_[SP] = &sp_; - gprs_[LR] = &pc_; + gprs_[kPC] = &pc_; + gprs_[X0] = &arg0_; // Initialize registers with easy to spot debug values. sp_ = Arm64Context::kBadGprBase + SP; - pc_ = Arm64Context::kBadGprBase + LR; + pc_ = Arm64Context::kBadGprBase + kPC; + arg0_ = 0; } void Arm64Context::FillCalleeSaves(const StackVisitor& fr) { @@ -58,8 +60,8 @@ void Arm64Context::FillCalleeSaves(const StackVisitor& fr) { } void Arm64Context::SetGPR(uint32_t reg, uintptr_t value) { - DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfXRegisters)); - DCHECK_NE(reg, static_cast<uint32_t>(XZR)); + DCHECK_LT(reg, arraysize(gprs_)); + // Note: we use kPC == XZR, so do not ensure that reg != XZR. DCHECK(IsAccessibleGPR(reg)); DCHECK_NE(gprs_[reg], &gZero); // Can't overwrite this static value since they are never reset. *gprs_[reg] = value; @@ -124,13 +126,13 @@ void Arm64Context::SmashCallerSaves() { extern "C" NO_RETURN void art_quick_do_long_jump(uint64_t*, uint64_t*); void Arm64Context::DoLongJump() { - uint64_t gprs[kNumberOfXRegisters]; + uint64_t gprs[arraysize(gprs_)]; uint64_t fprs[kNumberOfDRegisters]; // The long jump routine called below expects to find the value for SP at index 31. DCHECK_EQ(SP, 31); - for (size_t i = 0; i < kNumberOfXRegisters; ++i) { + for (size_t i = 0; i < arraysize(gprs_); ++i) { gprs[i] = gprs_[i] != nullptr ? *gprs_[i] : Arm64Context::kBadGprBase + i; } for (size_t i = 0; i < kNumberOfDRegisters; ++i) { diff --git a/runtime/arch/arm64/context_arm64.h b/runtime/arch/arm64/context_arm64.h index 1c99f3c42d..11314e08ed 100644 --- a/runtime/arch/arm64/context_arm64.h +++ b/runtime/arch/arm64/context_arm64.h @@ -42,20 +42,25 @@ class Arm64Context : public Context { } void SetPC(uintptr_t new_lr) OVERRIDE { - SetGPR(LR, new_lr); + SetGPR(kPC, new_lr); + } + + void SetArg0(uintptr_t new_arg0_value) OVERRIDE { + SetGPR(X0, new_arg0_value); } bool IsAccessibleGPR(uint32_t reg) OVERRIDE { - DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfXRegisters)); + DCHECK_LT(reg, arraysize(gprs_)); return gprs_[reg] != nullptr; } uintptr_t* GetGPRAddress(uint32_t reg) OVERRIDE { - DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfXRegisters)); + DCHECK_LT(reg, arraysize(gprs_)); return gprs_[reg]; } uintptr_t GetGPR(uint32_t reg) OVERRIDE { + // Note: PC isn't an available GPR (outside of internals), so don't allow retrieving the value. DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfXRegisters)); DCHECK(IsAccessibleGPR(reg)); return *gprs_[reg]; @@ -79,12 +84,15 @@ class Arm64Context : public Context { void SmashCallerSaves() OVERRIDE; NO_RETURN void DoLongJump() OVERRIDE; + static constexpr size_t kPC = kNumberOfXRegisters; + private: - // Pointers to register locations, initialized to null or the specific registers below. - uintptr_t* gprs_[kNumberOfXRegisters]; + // Pointers to register locations, initialized to null or the specific registers below. We need + // an additional one for the PC. + uintptr_t* gprs_[kNumberOfXRegisters + 1]; uint64_t * fprs_[kNumberOfDRegisters]; - // Hold values for sp and pc if they are not located within a stack frame. - uintptr_t sp_, pc_; + // Hold values for sp, pc and arg0 if they are not located within a stack frame. + uintptr_t sp_, pc_, arg0_; }; } // namespace arm64 diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S index 169bc384a8..68121781ca 100644 --- a/runtime/arch/arm64/quick_entrypoints_arm64.S +++ b/runtime/arch/arm64/quick_entrypoints_arm64.S @@ -941,7 +941,7 @@ ENTRY art_quick_do_long_jump // Load GPRs // TODO: lots of those are smashed, could optimize. add x0, x0, #30*8 - ldp x30, x1, [x0], #-16 + ldp x30, x1, [x0], #-16 // LR & SP ldp x28, x29, [x0], #-16 ldp x26, x27, [x0], #-16 ldp x24, x25, [x0], #-16 @@ -958,10 +958,12 @@ ENTRY art_quick_do_long_jump ldp x2, x3, [x0], #-16 mov sp, x1 - // TODO: Is it really OK to use LR for the target PC? - mov x0, #0 - mov x1, #0 - br xLR + // Need to load PC, it's at the end (after the space for the unused XZR). Use x1. + ldr x1, [x0, #33*8] + // And the value of x0. + ldr x0, [x0] + + br x1 END art_quick_do_long_jump /* diff --git a/runtime/arch/context.h b/runtime/arch/context.h index 9ef761e981..9af7c04f5c 100644 --- a/runtime/arch/context.h +++ b/runtime/arch/context.h @@ -50,6 +50,9 @@ class Context { // Sets the program counter value. virtual void SetPC(uintptr_t new_pc) = 0; + // Sets the first argument register. + virtual void SetArg0(uintptr_t new_arg0_value) = 0; + // Returns whether the given GPR is accessible (read or write). virtual bool IsAccessibleGPR(uint32_t reg) = 0; diff --git a/runtime/arch/mips/context_mips.cc b/runtime/arch/mips/context_mips.cc index bc2bf68993..08ab356855 100644 --- a/runtime/arch/mips/context_mips.cc +++ b/runtime/arch/mips/context_mips.cc @@ -30,9 +30,11 @@ void MipsContext::Reset() { std::fill_n(fprs_, arraysize(fprs_), nullptr); gprs_[SP] = &sp_; gprs_[RA] = &ra_; + gprs_[A0] = &arg0_; // Initialize registers with easy to spot debug values. sp_ = MipsContext::kBadGprBase + SP; ra_ = MipsContext::kBadGprBase + RA; + arg0_ = 0; } void MipsContext::FillCalleeSaves(const StackVisitor& fr) { diff --git a/runtime/arch/mips/context_mips.h b/runtime/arch/mips/context_mips.h index 38cf29a6aa..0affe5397a 100644 --- a/runtime/arch/mips/context_mips.h +++ b/runtime/arch/mips/context_mips.h @@ -78,12 +78,17 @@ class MipsContext : public Context { void SmashCallerSaves() OVERRIDE; NO_RETURN void DoLongJump() OVERRIDE; + void SetArg0(uintptr_t new_arg0_value) OVERRIDE { + SetGPR(A0, new_arg0_value); + } + private: // Pointers to registers in the stack, initialized to null except for the special cases below. uintptr_t* gprs_[kNumberOfCoreRegisters]; uint32_t* fprs_[kNumberOfFRegisters]; - // Hold values for sp and ra (return address) if they are not located within a stack frame. - uintptr_t sp_, ra_; + // Hold values for sp and ra (return address) if they are not located within a stack frame, as + // well as the first argument. + uintptr_t sp_, ra_, arg0_; }; } // namespace mips } // namespace art diff --git a/runtime/arch/mips64/context_mips64.cc b/runtime/arch/mips64/context_mips64.cc index 6637c371d2..2c17f1c118 100644 --- a/runtime/arch/mips64/context_mips64.cc +++ b/runtime/arch/mips64/context_mips64.cc @@ -29,10 +29,12 @@ void Mips64Context::Reset() { std::fill_n(gprs_, arraysize(gprs_), nullptr); std::fill_n(fprs_, arraysize(fprs_), nullptr); gprs_[SP] = &sp_; - gprs_[RA] = &ra_; + gprs_[T9] = &t9_; + gprs_[A0] = &arg0_; // Initialize registers with easy to spot debug values. sp_ = Mips64Context::kBadGprBase + SP; - ra_ = Mips64Context::kBadGprBase + RA; + t9_ = Mips64Context::kBadGprBase + T9; + arg0_ = 0; } void Mips64Context::FillCalleeSaves(const StackVisitor& fr) { diff --git a/runtime/arch/mips64/context_mips64.h b/runtime/arch/mips64/context_mips64.h index e4a144f420..84b1c9bad4 100644 --- a/runtime/arch/mips64/context_mips64.h +++ b/runtime/arch/mips64/context_mips64.h @@ -41,7 +41,7 @@ class Mips64Context : public Context { } void SetPC(uintptr_t new_pc) OVERRIDE { - SetGPR(RA, new_pc); + SetGPR(T9, new_pc); } bool IsAccessibleGPR(uint32_t reg) OVERRIDE { @@ -78,13 +78,20 @@ class Mips64Context : public Context { void SmashCallerSaves() OVERRIDE; NO_RETURN void DoLongJump() OVERRIDE; + void SetArg0(uintptr_t new_arg0_value) OVERRIDE { + SetGPR(A0, new_arg0_value); + } + private: // Pointers to registers in the stack, initialized to null except for the special cases below. uintptr_t* gprs_[kNumberOfGpuRegisters]; uint64_t* fprs_[kNumberOfFpuRegisters]; - // Hold values for sp and ra (return address) if they are not located within a stack frame. - uintptr_t sp_, ra_; + // Hold values for sp and t9 if they are not located within a stack frame. We use t9 for the + // PC (as ra is required to be valid for single-frame deopt and must not be clobbered). We + // also need the first argument for single-frame deopt. + uintptr_t sp_, t9_, arg0_; }; + } // namespace mips64 } // namespace art diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S index 1b50b2e246..ce1b2f3d24 100644 --- a/runtime/arch/mips64/quick_entrypoints_mips64.S +++ b/runtime/arch/mips64/quick_entrypoints_mips64.S @@ -431,7 +431,7 @@ ENTRY_NO_GP art_quick_do_long_jump ld $ra, 248($a0) ld $a0, 32($a0) move $v0, $zero # clear result registers v0 and v1 - jalr $zero, $ra # do long jump + jalr $zero, $t9 # do long jump (do not use ra, it must not be clobbered) move $v1, $zero END art_quick_do_long_jump diff --git a/runtime/arch/mips64/registers_mips64.h b/runtime/arch/mips64/registers_mips64.h index 38bc8f2687..cd94d5ec66 100644 --- a/runtime/arch/mips64/registers_mips64.h +++ b/runtime/arch/mips64/registers_mips64.h @@ -52,6 +52,7 @@ enum GpuRegister { S6 = 22, S7 = 23, T8 = 24, // More temporaries. + TMP = T8, // scratch register (in addition to AT) T9 = 25, K0 = 26, // Reserved for trap handler. K1 = 27, diff --git a/runtime/arch/x86/context_x86.cc b/runtime/arch/x86/context_x86.cc index 7096c82aad..987ad60fd8 100644 --- a/runtime/arch/x86/context_x86.cc +++ b/runtime/arch/x86/context_x86.cc @@ -29,9 +29,11 @@ void X86Context::Reset() { std::fill_n(gprs_, arraysize(gprs_), nullptr); std::fill_n(fprs_, arraysize(fprs_), nullptr); gprs_[ESP] = &esp_; + gprs_[EAX] = &arg0_; // Initialize registers with easy to spot debug values. esp_ = X86Context::kBadGprBase + ESP; eip_ = X86Context::kBadGprBase + kNumberOfCpuRegisters; + arg0_ = 0; } void X86Context::FillCalleeSaves(const StackVisitor& fr) { diff --git a/runtime/arch/x86/context_x86.h b/runtime/arch/x86/context_x86.h index c4a11d8a88..59beb12ffa 100644 --- a/runtime/arch/x86/context_x86.h +++ b/runtime/arch/x86/context_x86.h @@ -44,6 +44,10 @@ class X86Context : public Context { eip_ = new_pc; } + void SetArg0(uintptr_t new_arg0_value) OVERRIDE { + SetGPR(EAX, new_arg0_value); + } + bool IsAccessibleGPR(uint32_t reg) OVERRIDE { DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCpuRegisters)); return gprs_[reg] != nullptr; @@ -95,10 +99,10 @@ class X86Context : public Context { // Pointers to register locations. Values are initialized to null or the special registers below. uintptr_t* gprs_[kNumberOfCpuRegisters]; uint32_t* fprs_[kNumberOfFloatRegisters]; - // Hold values for esp and eip if they are not located within a stack frame. EIP is somewhat + // Hold values for esp, eip and arg0 if they are not located within a stack frame. EIP is somewhat // special in that it cannot be encoded normally as a register operand to an instruction (except // in 64bit addressing modes). - uintptr_t esp_, eip_; + uintptr_t esp_, eip_, arg0_; }; } // namespace x86 } // namespace art diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S index 029a296e5a..f3b15c9ab2 100644 --- a/runtime/arch/x86/quick_entrypoints_x86.S +++ b/runtime/arch/x86/quick_entrypoints_x86.S @@ -1695,7 +1695,7 @@ END_FUNCTION art_quick_deoptimize /* * Compiled code has requested that we deoptimize into the interpreter. The deoptimization - * will long jump to the upcall with a special exception of -1. + * will long jump to the interpreter bridge. */ DEFINE_FUNCTION art_quick_deoptimize_from_compiled_code SETUP_SAVE_ALL_CALLEE_SAVE_FRAME ebx, ebx diff --git a/runtime/arch/x86_64/context_x86_64.cc b/runtime/arch/x86_64/context_x86_64.cc index 1fe2ef8fd8..3dc7d71df4 100644 --- a/runtime/arch/x86_64/context_x86_64.cc +++ b/runtime/arch/x86_64/context_x86_64.cc @@ -29,9 +29,11 @@ void X86_64Context::Reset() { std::fill_n(gprs_, arraysize(gprs_), nullptr); std::fill_n(fprs_, arraysize(fprs_), nullptr); gprs_[RSP] = &rsp_; + gprs_[RDI] = &arg0_; // Initialize registers with easy to spot debug values. rsp_ = X86_64Context::kBadGprBase + RSP; rip_ = X86_64Context::kBadGprBase + kNumberOfCpuRegisters; + arg0_ = 0; } void X86_64Context::FillCalleeSaves(const StackVisitor& fr) { diff --git a/runtime/arch/x86_64/context_x86_64.h b/runtime/arch/x86_64/context_x86_64.h index 30bb9ec362..f05b7f093f 100644 --- a/runtime/arch/x86_64/context_x86_64.h +++ b/runtime/arch/x86_64/context_x86_64.h @@ -44,6 +44,10 @@ class X86_64Context : public Context { rip_ = new_pc; } + void SetArg0(uintptr_t new_arg0_value) OVERRIDE { + SetGPR(RDI, new_arg0_value); + } + bool IsAccessibleGPR(uint32_t reg) OVERRIDE { DCHECK_LT(reg, static_cast<uint32_t>(kNumberOfCpuRegisters)); return gprs_[reg] != nullptr; @@ -82,10 +86,10 @@ class X86_64Context : public Context { // Pointers to register locations. Values are initialized to null or the special registers below. uintptr_t* gprs_[kNumberOfCpuRegisters]; uint64_t* fprs_[kNumberOfFloatRegisters]; - // Hold values for rsp and rip if they are not located within a stack frame. RIP is somewhat + // Hold values for rsp, rip and arg0 if they are not located within a stack frame. RIP is somewhat // special in that it cannot be encoded normally as a register operand to an instruction (except // in 64bit addressing modes). - uintptr_t rsp_, rip_; + uintptr_t rsp_, rip_, arg0_; }; } // namespace x86_64 } // namespace art diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S index 861f8025a5..2f438a3c8f 100644 --- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S @@ -1724,18 +1724,18 @@ END_FUNCTION art_quick_instrumentation_exit * will long jump to the upcall with a special exception of -1. */ DEFINE_FUNCTION art_quick_deoptimize - pushq %rsi // Entry point for a jump. Fake that we were called. - // Use hidden arg. + pushq %rsi // Entry point for a jump. Fake that we were called. + // Use hidden arg. SETUP_SAVE_ALL_CALLEE_SAVE_FRAME - // Stack should be aligned now. - movq %gs:THREAD_SELF_OFFSET, %rdi // Pass Thread. - call SYMBOL(artDeoptimize) // artDeoptimize(Thread*) + // Stack should be aligned now. + movq %gs:THREAD_SELF_OFFSET, %rdi // Pass Thread. + call SYMBOL(artDeoptimize) // artDeoptimize(Thread*) UNREACHABLE END_FUNCTION art_quick_deoptimize /* * Compiled code has requested that we deoptimize into the interpreter. The deoptimization - * will long jump to the upcall with a special exception of -1. + * will long jump to the interpreter bridge. */ DEFINE_FUNCTION art_quick_deoptimize_from_compiled_code SETUP_SAVE_ALL_CALLEE_SAVE_FRAME diff --git a/runtime/base/arena_allocator.cc b/runtime/base/arena_allocator.cc index 4e51f5555d..345428c2a6 100644 --- a/runtime/base/arena_allocator.cc +++ b/runtime/base/arena_allocator.cc @@ -55,6 +55,7 @@ const char* const ArenaAllocatorStatsImpl<kCount>::kAllocNames[] = { "RegAlloc ", "Data ", "STL ", + "GraphBuilder ", "Graph ", "BasicBlock ", "BlockList ", @@ -74,12 +75,25 @@ const char* const ArenaAllocatorStatsImpl<kCount>::kAllocNames[] = { "Environment ", "EnvVRegs ", "EnvLocations ", + "LocSummary ", "SsaBuilder ", "MoveOperands ", "CodeBuffer ", "StackMaps ", "BaselineMaps ", "Optimization ", + "GVN ", + "InductionVar ", + "BCE ", + "SsaLiveness ", + "SsaPhiElim ", + "RefTypeProp ", + "PrimTypeProp ", + "SideEffects ", + "RegAllocator ", + "StackMapStm ", + "CodeGen ", + "ParallelMove ", }; template <bool kCount> diff --git a/runtime/base/arena_allocator.h b/runtime/base/arena_allocator.h index c5eb741b76..b4f19ee8da 100644 --- a/runtime/base/arena_allocator.h +++ b/runtime/base/arena_allocator.h @@ -65,6 +65,7 @@ enum ArenaAllocKind { kArenaAllocRegAlloc, kArenaAllocData, kArenaAllocSTL, + kArenaAllocGraphBuilder, kArenaAllocGraph, kArenaAllocBasicBlock, kArenaAllocBlockList, @@ -84,12 +85,25 @@ enum ArenaAllocKind { kArenaAllocEnvironment, kArenaAllocEnvironmentVRegs, kArenaAllocEnvironmentLocations, + kArenaAllocLocationSummary, kArenaAllocSsaBuilder, kArenaAllocMoveOperands, kArenaAllocCodeBuffer, kArenaAllocStackMaps, kArenaAllocBaselineMaps, kArenaAllocOptimization, + kArenaAllocGvn, + kArenaAllocInductionVarAnalysis, + kArenaAllocBoundsCheckElimination, + kArenaAllocSsaLiveness, + kArenaAllocSsaPhiElimination, + kArenaAllocReferenceTypePropagation, + kArenaAllocPrimitiveTypePropagation, + kArenaAllocSideEffectsAnalysis, + kArenaAllocRegisterAllocator, + kArenaAllocStackMapStream, + kArenaAllocCodeGenerator, + kArenaAllocParallelMoveResolver, kNumArenaAllocKinds }; diff --git a/runtime/base/arena_object.h b/runtime/base/arena_object.h index ab97d0cb66..56e35d8751 100644 --- a/runtime/base/arena_object.h +++ b/runtime/base/arena_object.h @@ -40,6 +40,10 @@ class ArenaObject { LOG(FATAL) << "UNREACHABLE"; UNREACHABLE(); } + + // NOTE: Providing placement new (and matching delete) for constructing container elements. + ALWAYS_INLINE void* operator new(size_t, void* ptr) noexcept { return ptr; } + ALWAYS_INLINE void operator delete(void*, void*) noexcept { } }; diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc index bc8a9f4936..dbc5ceca0d 100644 --- a/runtime/class_linker.cc +++ b/runtime/class_linker.cc @@ -128,7 +128,11 @@ void ClassLinker::ThrowEarlierClassFailure(mirror::Class* c) { // the previous error. Runtime* const runtime = Runtime::Current(); if (!runtime->IsAotCompiler()) { // Give info if this occurs at runtime. - LOG(INFO) << "Rejecting re-init on previously-failed class " << PrettyClass(c); + std::string extra; + if (c->GetVerifyErrorClass() != nullptr) { + extra = PrettyDescriptor(c->GetVerifyErrorClass()); + } + LOG(INFO) << "Rejecting re-init on previously-failed class " << PrettyClass(c) << ": " << extra; } CHECK(c->IsErroneous()) << PrettyClass(c) << " " << c->GetStatus(); @@ -1316,14 +1320,6 @@ void ClassLinker::VisitClassRoots(RootVisitor* visitor, VisitRootFlags flags) { // Need to make sure to not copy ArtMethods without doing read barriers since the roots are // marked concurrently and we don't hold the classlinker_classes_lock_ when we do the copy. boot_class_table_.VisitRoots(buffered_visitor); - // TODO: Avoid marking these to enable class unloading. - JavaVMExt* const vm = Runtime::Current()->GetJavaVM(); - for (jweak weak_root : class_loaders_) { - mirror::Object* class_loader = - down_cast<mirror::ClassLoader*>(vm->DecodeWeakGlobal(self, weak_root)); - // Don't need to update anything since the class loaders will be updated by SweepSystemWeaks. - visitor->VisitRootIfNonNull(&class_loader, RootInfo(kRootVMInternal)); - } } else if ((flags & kVisitRootFlagNewRoots) != 0) { for (auto& root : new_class_roots_) { mirror::Class* old_ref = root.Read<kWithoutReadBarrier>(); @@ -1503,13 +1499,10 @@ ClassLinker::~ClassLinker() { STLDeleteElements(&oat_files_); Thread* const self = Thread::Current(); JavaVMExt* const vm = Runtime::Current()->GetJavaVM(); - for (jweak weak_root : class_loaders_) { - auto* const class_loader = down_cast<mirror::ClassLoader*>( - vm->DecodeWeakGlobalDuringShutdown(self, weak_root)); - if (class_loader != nullptr) { - delete class_loader->GetClassTable(); - } - vm->DeleteWeakGlobalRef(self, weak_root); + for (const ClassLoaderData& data : class_loaders_) { + vm->DeleteWeakGlobalRef(self, data.weak_root); + delete data.allocator; + delete data.class_table; } class_loaders_.clear(); } @@ -2375,21 +2368,25 @@ void ClassLinker::LoadClass(Thread* self, } } -LengthPrefixedArray<ArtField>* ClassLinker::AllocArtFieldArray(Thread* self, size_t length) { +LengthPrefixedArray<ArtField>* ClassLinker::AllocArtFieldArray(Thread* self, + LinearAlloc* allocator, + size_t length) { if (length == 0) { return nullptr; } // If the ArtField alignment changes, review all uses of LengthPrefixedArray<ArtField>. static_assert(alignof(ArtField) == 4, "ArtField alignment is expected to be 4."); size_t storage_size = LengthPrefixedArray<ArtField>::ComputeSize(length); - void* array_storage = Runtime::Current()->GetLinearAlloc()->Alloc(self, storage_size); + void* array_storage = allocator->Alloc(self, storage_size); auto* ret = new(array_storage) LengthPrefixedArray<ArtField>(length); CHECK(ret != nullptr); std::uninitialized_fill_n(&ret->At(0), length, ArtField()); return ret; } -LengthPrefixedArray<ArtMethod>* ClassLinker::AllocArtMethodArray(Thread* self, size_t length) { +LengthPrefixedArray<ArtMethod>* ClassLinker::AllocArtMethodArray(Thread* self, + LinearAlloc* allocator, + size_t length) { if (length == 0) { return nullptr; } @@ -2397,7 +2394,7 @@ LengthPrefixedArray<ArtMethod>* ClassLinker::AllocArtMethodArray(Thread* self, s const size_t method_size = ArtMethod::Size(image_pointer_size_); const size_t storage_size = LengthPrefixedArray<ArtMethod>::ComputeSize(length, method_size, method_alignment); - void* array_storage = Runtime::Current()->GetLinearAlloc()->Alloc(self, storage_size); + void* array_storage = allocator->Alloc(self, storage_size); auto* ret = new (array_storage) LengthPrefixedArray<ArtMethod>(length); CHECK(ret != nullptr); for (size_t i = 0; i < length; ++i) { @@ -2406,6 +2403,15 @@ LengthPrefixedArray<ArtMethod>* ClassLinker::AllocArtMethodArray(Thread* self, s return ret; } +LinearAlloc* ClassLinker::GetAllocatorForClassLoader(mirror::ClassLoader* class_loader) { + if (class_loader == nullptr) { + return Runtime::Current()->GetLinearAlloc(); + } + LinearAlloc* allocator = class_loader->GetAllocator(); + DCHECK(allocator != nullptr); + return allocator; +} + void ClassLinker::LoadClassMembers(Thread* self, const DexFile& dex_file, const uint8_t* class_data, @@ -2418,8 +2424,11 @@ void ClassLinker::LoadClassMembers(Thread* self, // Load static fields. // We allow duplicate definitions of the same field in a class_data_item // but ignore the repeated indexes here, b/21868015. + LinearAlloc* const allocator = GetAllocatorForClassLoader(klass->GetClassLoader()); ClassDataItemIterator it(dex_file, class_data); - LengthPrefixedArray<ArtField>* sfields = AllocArtFieldArray(self, it.NumStaticFields()); + LengthPrefixedArray<ArtField>* sfields = AllocArtFieldArray(self, + allocator, + it.NumStaticFields()); size_t num_sfields = 0; uint32_t last_field_idx = 0u; for (; it.HasNextStaticField(); it.Next()) { @@ -2435,7 +2444,9 @@ void ClassLinker::LoadClassMembers(Thread* self, klass->SetSFieldsPtr(sfields); DCHECK_EQ(klass->NumStaticFields(), num_sfields); // Load instance fields. - LengthPrefixedArray<ArtField>* ifields = AllocArtFieldArray(self, it.NumInstanceFields()); + LengthPrefixedArray<ArtField>* ifields = AllocArtFieldArray(self, + allocator, + it.NumInstanceFields()); size_t num_ifields = 0u; last_field_idx = 0u; for (; it.HasNextInstanceField(); it.Next()) { @@ -2458,8 +2469,8 @@ void ClassLinker::LoadClassMembers(Thread* self, klass->SetIFieldsPtr(ifields); DCHECK_EQ(klass->NumInstanceFields(), num_ifields); // Load methods. - klass->SetDirectMethodsPtr(AllocArtMethodArray(self, it.NumDirectMethods())); - klass->SetVirtualMethodsPtr(AllocArtMethodArray(self, it.NumVirtualMethods())); + klass->SetDirectMethodsPtr(AllocArtMethodArray(self, allocator, it.NumDirectMethods())); + klass->SetVirtualMethodsPtr(AllocArtMethodArray(self, allocator, it.NumVirtualMethods())); size_t class_def_method_index = 0; uint32_t last_dex_method_index = DexFile::kDexNoIndex; size_t last_class_def_method_index = 0; @@ -3031,7 +3042,7 @@ void ClassLinker::MoveClassTableToPreZygote() { WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_); boot_class_table_.FreezeSnapshot(); MoveClassTableToPreZygoteVisitor visitor; - VisitClassLoadersAndRemoveClearedLoaders(&visitor); + VisitClassLoaders(&visitor); } mirror::Class* ClassLinker::LookupClassFromImage(const char* descriptor) { @@ -3414,9 +3425,12 @@ mirror::Class* ClassLinker::CreateProxyClass(ScopedObjectAccessAlreadyRunnable& mirror::Class* existing = InsertClass(descriptor.c_str(), klass.Get(), hash); CHECK(existing == nullptr); + // Needs to be after we insert the class so that the allocator field is set. + LinearAlloc* const allocator = GetAllocatorForClassLoader(klass->GetClassLoader()); + // Instance fields are inherited, but we add a couple of static fields... const size_t num_fields = 2; - LengthPrefixedArray<ArtField>* sfields = AllocArtFieldArray(self, num_fields); + LengthPrefixedArray<ArtField>* sfields = AllocArtFieldArray(self, allocator, num_fields); klass->SetSFieldsPtr(sfields); // 1. Create a static field 'interfaces' that holds the _declared_ interfaces implemented by @@ -3433,7 +3447,7 @@ mirror::Class* ClassLinker::CreateProxyClass(ScopedObjectAccessAlreadyRunnable& throws_sfield.SetAccessFlags(kAccStatic | kAccPublic | kAccFinal); // Proxies have 1 direct method, the constructor - LengthPrefixedArray<ArtMethod>* directs = AllocArtMethodArray(self, 1); + LengthPrefixedArray<ArtMethod>* directs = AllocArtMethodArray(self, allocator, 1); // Currently AllocArtMethodArray cannot return null, but the OOM logic is left there in case we // want to throw OOM in the future. if (UNLIKELY(directs == nullptr)) { @@ -3448,7 +3462,7 @@ mirror::Class* ClassLinker::CreateProxyClass(ScopedObjectAccessAlreadyRunnable& DCHECK_EQ(h_methods->GetClass(), mirror::Method::ArrayClass()) << PrettyClass(h_methods->GetClass()); const size_t num_virtual_methods = h_methods->GetLength(); - auto* virtuals = AllocArtMethodArray(self, num_virtual_methods); + auto* virtuals = AllocArtMethodArray(self, allocator, num_virtual_methods); // Currently AllocArtMethodArray cannot return null, but the OOM logic is left there in case we // want to throw OOM in the future. if (UNLIKELY(virtuals == nullptr)) { @@ -4166,9 +4180,16 @@ ClassTable* ClassLinker::InsertClassTableForClassLoader(mirror::ClassLoader* cla if (class_table == nullptr) { class_table = new ClassTable; Thread* const self = Thread::Current(); - class_loaders_.push_back(self->GetJniEnv()->vm->AddWeakGlobalRef(self, class_loader)); + ClassLoaderData data; + data.weak_root = self->GetJniEnv()->vm->AddWeakGlobalRef(self, class_loader); + data.class_table = class_table; + data.allocator = Runtime::Current()->CreateLinearAlloc(); + class_loaders_.push_back(data); // Don't already have a class table, add it to the class loader. - class_loader->SetClassTable(class_table); + CHECK(class_loader->GetClassTable() == nullptr); + CHECK(class_loader->GetAllocator() == nullptr); + class_loader->SetClassTable(data.class_table); + class_loader->SetAllocator(data.allocator); } return class_table; } @@ -4244,6 +4265,11 @@ bool ClassLinker::LinkClass(Thread* self, ClassTable* const table = InsertClassTableForClassLoader(class_loader); mirror::Class* existing = table->UpdateClass(descriptor, h_new_class.Get(), ComputeModifiedUtf8Hash(descriptor)); + if (class_loader != nullptr) { + // We updated the class in the class table, perform the write barrier so that the GC knows + // about the change. + Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(class_loader); + } CHECK_EQ(existing, klass.Get()); if (kIsDebugBuild && class_loader == nullptr && dex_cache_image_class_lookup_required_) { // Check a class loaded with the system class loader matches one in the image if the class @@ -6158,7 +6184,10 @@ jobject ClassLinker::CreatePathClassLoader(Thread* self, std::vector<const DexFi ArtMethod* ClassLinker::CreateRuntimeMethod() { const size_t method_alignment = ArtMethod::Alignment(image_pointer_size_); const size_t method_size = ArtMethod::Size(image_pointer_size_); - LengthPrefixedArray<ArtMethod>* method_array = AllocArtMethodArray(Thread::Current(), 1); + LengthPrefixedArray<ArtMethod>* method_array = AllocArtMethodArray( + Thread::Current(), + Runtime::Current()->GetLinearAlloc(), + 1); ArtMethod* method = &method_array->At(0, method_size, method_alignment); CHECK(method != nullptr); method->SetDexMethodIndex(DexFile::kDexNoIndex); @@ -6171,33 +6200,34 @@ void ClassLinker::DropFindArrayClassCache() { find_array_class_cache_next_victim_ = 0; } -void ClassLinker::VisitClassLoadersAndRemoveClearedLoaders(ClassLoaderVisitor* visitor) { +void ClassLinker::VisitClassLoaders(ClassLoaderVisitor* visitor) const { Thread* const self = Thread::Current(); - Locks::classlinker_classes_lock_->AssertExclusiveHeld(self); JavaVMExt* const vm = self->GetJniEnv()->vm; - for (auto it = class_loaders_.begin(); it != class_loaders_.end();) { - const jweak weak_root = *it; - mirror::ClassLoader* const class_loader = down_cast<mirror::ClassLoader*>( - vm->DecodeWeakGlobal(self, weak_root)); + for (const ClassLoaderData& data : class_loaders_) { + auto* const class_loader = down_cast<mirror::ClassLoader*>( + vm->DecodeWeakGlobal(self, data.weak_root)); if (class_loader != nullptr) { visitor->Visit(class_loader); - ++it; - } else { - // Remove the cleared weak reference from the array. - vm->DeleteWeakGlobalRef(self, weak_root); - it = class_loaders_.erase(it); } } } -void ClassLinker::VisitClassLoaders(ClassLoaderVisitor* visitor) const { +void ClassLinker::CleanupClassLoaders() { Thread* const self = Thread::Current(); - JavaVMExt* const vm = self->GetJniEnv()->vm; - for (jweak weak_root : class_loaders_) { - mirror::ClassLoader* const class_loader = down_cast<mirror::ClassLoader*>( - vm->DecodeWeakGlobal(self, weak_root)); + WriterMutexLock mu(self, *Locks::classlinker_classes_lock_); + JavaVMExt* const vm = Runtime::Current()->GetJavaVM(); + for (auto it = class_loaders_.begin(); it != class_loaders_.end(); ) { + const ClassLoaderData& data = *it; + auto* const class_loader = down_cast<mirror::ClassLoader*>( + vm->DecodeWeakGlobal(self, data.weak_root)); if (class_loader != nullptr) { - visitor->Visit(class_loader); + ++it; + } else { + // Weak reference was cleared, delete the data associated with this class loader. + delete data.class_table; + delete data.allocator; + vm->DeleteWeakGlobalRef(self, data.weak_root); + it = class_loaders_.erase(it); } } } diff --git a/runtime/class_linker.h b/runtime/class_linker.h index fee706625b..739403f6c9 100644 --- a/runtime/class_linker.h +++ b/runtime/class_linker.h @@ -403,9 +403,13 @@ class ClassLinker { SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_); - LengthPrefixedArray<ArtField>* AllocArtFieldArray(Thread* self, size_t length); + LengthPrefixedArray<ArtField>* AllocArtFieldArray(Thread* self, + LinearAlloc* allocator, + size_t length); - LengthPrefixedArray<ArtMethod>* AllocArtMethodArray(Thread* self, size_t length); + LengthPrefixedArray<ArtMethod>* AllocArtMethodArray(Thread* self, + LinearAlloc* allocator, + size_t length); mirror::PointerArray* AllocPointerArray(Thread* self, size_t length) SHARED_REQUIRES(Locks::mutator_lock_) @@ -546,17 +550,24 @@ class ClassLinker { // entries are roots, but potentially not image classes. void DropFindArrayClassCache() SHARED_REQUIRES(Locks::mutator_lock_); - private: - // The RemoveClearedLoaders version removes cleared weak global class loaders and frees their - // class tables. This version can only be called with reader access to the - // classlinker_classes_lock_ since it modifies the class_loaders_ list. - void VisitClassLoadersAndRemoveClearedLoaders(ClassLoaderVisitor* visitor) - REQUIRES(Locks::classlinker_classes_lock_) + // Clean up class loaders, this needs to happen after JNI weak globals are cleared. + void CleanupClassLoaders() + SHARED_REQUIRES(Locks::mutator_lock_) + REQUIRES(!Locks::classlinker_classes_lock_); + + static LinearAlloc* GetAllocatorForClassLoader(mirror::ClassLoader* class_loader) SHARED_REQUIRES(Locks::mutator_lock_); + + private: + struct ClassLoaderData { + jweak weak_root; // Weak root to enable class unloading. + ClassTable* class_table; + LinearAlloc* allocator; + }; + void VisitClassLoaders(ClassLoaderVisitor* visitor) const SHARED_REQUIRES(Locks::classlinker_classes_lock_, Locks::mutator_lock_); - void VisitClassesInternal(ClassVisitor* visitor) SHARED_REQUIRES(Locks::classlinker_classes_lock_, Locks::mutator_lock_); @@ -826,8 +837,8 @@ class ClassLinker { std::vector<const OatFile*> oat_files_ GUARDED_BY(dex_lock_); // This contains the class loaders which have class tables. It is populated by - // InsertClassTableForClassLoader. Weak roots to enable class unloading. - std::list<jweak> class_loaders_ + // InsertClassTableForClassLoader. + std::list<ClassLoaderData> class_loaders_ GUARDED_BY(Locks::classlinker_classes_lock_); // Boot class path table. Since the class loader for this is null. diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc index b4ea3b3460..0926ce3f6a 100644 --- a/runtime/class_linker_test.cc +++ b/runtime/class_linker_test.cc @@ -550,6 +550,7 @@ struct StackTraceElementOffsets : public CheckOffsets<mirror::StackTraceElement> struct ClassLoaderOffsets : public CheckOffsets<mirror::ClassLoader> { ClassLoaderOffsets() : CheckOffsets<mirror::ClassLoader>(false, "Ljava/lang/ClassLoader;") { + addOffset(OFFSETOF_MEMBER(mirror::ClassLoader, allocator_), "allocator"); addOffset(OFFSETOF_MEMBER(mirror::ClassLoader, class_table_), "classTable"); addOffset(OFFSETOF_MEMBER(mirror::ClassLoader, packages_), "packages"); addOffset(OFFSETOF_MEMBER(mirror::ClassLoader, parent_), "parent"); diff --git a/runtime/dex_instruction-inl.h b/runtime/dex_instruction-inl.h index 7344d13805..e160a103d9 100644 --- a/runtime/dex_instruction-inl.h +++ b/runtime/dex_instruction-inl.h @@ -454,8 +454,8 @@ inline bool Instruction::HasVarArgs25x() const { return FormatOf(Opcode()) == k25x; } -// Copies all of the parameter registers into the arg array. Check the length with VRegB_25x()+1. -inline void Instruction::GetAllArgs25x(uint32_t arg[kMaxVarArgRegs]) const { +// Copies all of the parameter registers into the arg array. Check the length with VRegB_25x()+2. +inline void Instruction::GetAllArgs25x(uint32_t (&arg)[kMaxVarArgRegs25x]) const { DCHECK_EQ(FormatOf(Opcode()), k25x); /* @@ -500,19 +500,21 @@ inline void Instruction::GetAllArgs25x(uint32_t arg[kMaxVarArgRegs]) const { */ switch (count) { case 4: - arg[4] = (Fetch16(0) >> 8) & 0x0f; // vG + arg[5] = (Fetch16(0) >> 8) & 0x0f; // vG FALLTHROUGH_INTENDED; case 3: - arg[3] = (reg_list >> 12) & 0x0f; // vF + arg[4] = (reg_list >> 12) & 0x0f; // vF FALLTHROUGH_INTENDED; case 2: - arg[2] = (reg_list >> 8) & 0x0f; // vE + arg[3] = (reg_list >> 8) & 0x0f; // vE FALLTHROUGH_INTENDED; case 1: - arg[1] = (reg_list >> 4) & 0x0f; // vD + arg[2] = (reg_list >> 4) & 0x0f; // vD FALLTHROUGH_INTENDED; default: // case 0 + // The required lambda 'this' is actually a pair, but the pair is implicit. arg[0] = VRegC_25x(); // vC + arg[1] = arg[0] + 1; // vC + 1 break; } } diff --git a/runtime/dex_instruction.cc b/runtime/dex_instruction.cc index fc4df1475a..5250b0d79b 100644 --- a/runtime/dex_instruction.cc +++ b/runtime/dex_instruction.cc @@ -322,10 +322,10 @@ std::string Instruction::DumpString(const DexFile* file) const { } case k25x: { if (Opcode() == INVOKE_LAMBDA) { - uint32_t arg[kMaxVarArgRegs]; + uint32_t arg[kMaxVarArgRegs25x]; GetAllArgs25x(arg); const size_t num_extra_var_args = VRegB_25x(); - DCHECK_LE(num_extra_var_args + 1, kMaxVarArgRegs); + DCHECK_LE(num_extra_var_args + 2, arraysize(arg)); // invoke-lambda vC, {vD, vE, vF, vG} os << opcode << " v" << arg[0] << ", {"; @@ -333,7 +333,7 @@ std::string Instruction::DumpString(const DexFile* file) const { if (i != 0) { os << ", "; } - os << "v" << arg[i+1]; + os << "v" << arg[i+2]; // Don't print the pair of vC registers. Pair is implicit. } os << "}"; break; diff --git a/runtime/dex_instruction.h b/runtime/dex_instruction.h index df2d3799ab..48a12e53af 100644 --- a/runtime/dex_instruction.h +++ b/runtime/dex_instruction.h @@ -180,9 +180,11 @@ class Instruction { kVerifyVarArgRangeNonZero = 0x100000, kVerifyRuntimeOnly = 0x200000, kVerifyError = 0x400000, + kVerifyRegCString = 0x800000, }; static constexpr uint32_t kMaxVarArgRegs = 5; + static constexpr uint32_t kMaxVarArgRegs25x = 6; // lambdas are 2 registers. // Returns the size (in 2 byte code units) of this instruction. size_t SizeInCodeUnits() const { @@ -408,7 +410,7 @@ class Instruction { void GetVarArgs(uint32_t args[kMaxVarArgRegs]) const { return GetVarArgs(args, Fetch16(0)); } - void GetAllArgs25x(uint32_t args[kMaxVarArgRegs]) const; + void GetAllArgs25x(uint32_t (&args)[kMaxVarArgRegs25x]) const; // Returns the opcode field of the instruction. The given "inst_data" parameter must be the first // 16 bits of instruction. @@ -536,7 +538,7 @@ class Instruction { int GetVerifyTypeArgumentC() const { return (kInstructionVerifyFlags[Opcode()] & (kVerifyRegC | kVerifyRegCField | - kVerifyRegCNewArray | kVerifyRegCType | kVerifyRegCWide)); + kVerifyRegCNewArray | kVerifyRegCType | kVerifyRegCWide | kVerifyRegCString)); } int GetVerifyExtraFlags() const { diff --git a/runtime/dex_instruction_list.h b/runtime/dex_instruction_list.h index a176772a84..9d7e0c4409 100644 --- a/runtime/dex_instruction_list.h +++ b/runtime/dex_instruction_list.h @@ -263,10 +263,10 @@ V(0xF2, IGET_SHORT_QUICK, "iget-short-quick", k22c, true, kIndexFieldOffset, kContinue | kThrow | kLoad | kRegCFieldOrConstant, kVerifyRegA | kVerifyRegB | kVerifyRuntimeOnly) \ V(0xF3, INVOKE_LAMBDA, "invoke-lambda", k25x, false, kIndexNone, kContinue | kThrow | kInvoke | kExperimental, kVerifyRegC /*TODO: | kVerifyVarArg*/) \ V(0xF4, UNUSED_F4, "unused-f4", k10x, false, kIndexUnknown, 0, kVerifyError) \ - V(0xF5, UNUSED_F5, "unused-f5", k10x, false, kIndexUnknown, 0, kVerifyError) \ + V(0xF5, CAPTURE_VARIABLE, "capture-variable", k21c, false, kIndexStringRef, kExperimental, kVerifyRegA | kVerifyRegBString) \ /* TODO(iam): get rid of the unused 'false' column */ \ V(0xF6, CREATE_LAMBDA, "create-lambda", k21c, false_UNUSED, kIndexMethodRef, kContinue | kThrow | kExperimental, kVerifyRegA | kVerifyRegBMethod) \ - V(0xF7, UNUSED_F7, "unused-f7", k10x, false, kIndexUnknown, 0, kVerifyError) \ + V(0xF7, LIBERATE_VARIABLE, "liberate-variable", k22c, false, kIndexStringRef, kExperimental, kVerifyRegA | kVerifyRegB | kVerifyRegCString) \ V(0xF8, BOX_LAMBDA, "box-lambda", k22x, true, kIndexNone, kContinue | kExperimental, kVerifyRegA | kVerifyRegB) \ V(0xF9, UNBOX_LAMBDA, "unbox-lambda", k22c, true, kIndexTypeRef, kContinue | kThrow | kExperimental, kVerifyRegA | kVerifyRegB | kVerifyRegCType) \ V(0xFA, UNUSED_FA, "unused-fa", k10x, false, kIndexUnknown, 0, kVerifyError) \ diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h index cc3eefed34..8ae0b0796f 100644 --- a/runtime/entrypoints/entrypoint_utils-inl.h +++ b/runtime/entrypoints/entrypoint_utils-inl.h @@ -71,44 +71,6 @@ inline ArtMethod* GetResolvedMethod(ArtMethod* outer_method, *outer_method->GetDexFile(), method_index, dex_cache, class_loader, nullptr, invoke_type); } -inline ArtMethod* GetCalleeSaveMethodCaller(ArtMethod** sp, - Runtime::CalleeSaveType type, - bool do_caller_check = false) - SHARED_REQUIRES(Locks::mutator_lock_) { - DCHECK_EQ(*sp, Runtime::Current()->GetCalleeSaveMethod(type)); - - const size_t callee_frame_size = GetCalleeSaveFrameSize(kRuntimeISA, type); - auto** caller_sp = reinterpret_cast<ArtMethod**>( - reinterpret_cast<uintptr_t>(sp) + callee_frame_size); - ArtMethod* outer_method = *caller_sp; - ArtMethod* caller = outer_method; - - if ((outer_method != nullptr) && outer_method->IsOptimized(sizeof(void*))) { - const size_t callee_return_pc_offset = GetCalleeSaveReturnPcOffset(kRuntimeISA, type); - uintptr_t caller_pc = *reinterpret_cast<uintptr_t*>( - (reinterpret_cast<uint8_t*>(sp) + callee_return_pc_offset)); - uintptr_t native_pc_offset = outer_method->NativeQuickPcOffset(caller_pc); - CodeInfo code_info = outer_method->GetOptimizedCodeInfo(); - StackMapEncoding encoding = code_info.ExtractEncoding(); - StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding); - DCHECK(stack_map.IsValid()); - if (stack_map.HasInlineInfo(encoding)) { - InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding); - caller = GetResolvedMethod(outer_method, inline_info, inline_info.GetDepth() - 1); - } - } - - if (kIsDebugBuild && do_caller_check) { - // Note that do_caller_check is optional, as this method can be called by - // stubs, and tests without a proper call stack. - NthCallerVisitor visitor(Thread::Current(), 1, true); - visitor.WalkStack(); - CHECK_EQ(caller, visitor.caller); - } - - return caller; -} - inline ArtMethod* GetCalleeSaveMethodCaller(Thread* self, Runtime::CalleeSaveType type) SHARED_REQUIRES(Locks::mutator_lock_) { return GetCalleeSaveMethodCaller( diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc index 94aced27ed..f1939993f7 100644 --- a/runtime/entrypoints/entrypoint_utils.cc +++ b/runtime/entrypoints/entrypoint_utils.cc @@ -21,11 +21,15 @@ #include "base/mutex.h" #include "class_linker-inl.h" #include "dex_file-inl.h" +#include "entrypoints/entrypoint_utils-inl.h" +#include "entrypoints/quick/callee_save_frame.h" +#include "entrypoints/runtime_asm_entrypoints.h" #include "gc/accounting/card_table-inl.h" #include "mirror/class-inl.h" #include "mirror/method.h" #include "mirror/object-inl.h" #include "mirror/object_array-inl.h" +#include "nth_caller_visitor.h" #include "reflection.h" #include "scoped_thread_state_change.h" #include "ScopedLocalRef.h" @@ -345,4 +349,54 @@ bool FillArrayData(mirror::Object* obj, const Instruction::ArrayDataPayload* pay return true; } +ArtMethod* GetCalleeSaveMethodCaller(ArtMethod** sp, + Runtime::CalleeSaveType type, + bool do_caller_check) + SHARED_REQUIRES(Locks::mutator_lock_) { + DCHECK_EQ(*sp, Runtime::Current()->GetCalleeSaveMethod(type)); + + const size_t callee_frame_size = GetCalleeSaveFrameSize(kRuntimeISA, type); + auto** caller_sp = reinterpret_cast<ArtMethod**>( + reinterpret_cast<uintptr_t>(sp) + callee_frame_size); + ArtMethod* outer_method = *caller_sp; + ArtMethod* caller = outer_method; + + if ((outer_method != nullptr) && outer_method->IsOptimized(sizeof(void*))) { + const size_t callee_return_pc_offset = GetCalleeSaveReturnPcOffset(kRuntimeISA, type); + uintptr_t caller_pc = *reinterpret_cast<uintptr_t*>( + (reinterpret_cast<uint8_t*>(sp) + callee_return_pc_offset)); + if (LIKELY(caller_pc != reinterpret_cast<uintptr_t>(GetQuickInstrumentationExitPc()))) { + uintptr_t native_pc_offset = outer_method->NativeQuickPcOffset(caller_pc); + CodeInfo code_info = outer_method->GetOptimizedCodeInfo(); + StackMapEncoding encoding = code_info.ExtractEncoding(); + StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding); + DCHECK(stack_map.IsValid()); + if (stack_map.HasInlineInfo(encoding)) { + InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding); + caller = GetResolvedMethod(outer_method, inline_info, inline_info.GetDepth() - 1); + } + } else { + // We're instrumenting, just use the StackVisitor which knows how to + // handle instrumented frames. + NthCallerVisitor visitor(Thread::Current(), 1, true); + visitor.WalkStack(); + caller = visitor.caller; + if (kIsDebugBuild) { + // Avoid doing the check below. + do_caller_check = false; + } + } + } + + if (kIsDebugBuild && do_caller_check) { + // Note that do_caller_check is optional, as this method can be called by + // stubs, and tests without a proper call stack. + NthCallerVisitor visitor(Thread::Current(), 1, true); + visitor.WalkStack(); + CHECK_EQ(caller, visitor.caller); + } + + return caller; +} + } // namespace art diff --git a/runtime/entrypoints/entrypoint_utils.h b/runtime/entrypoints/entrypoint_utils.h index 53f2677e7e..4217cab697 100644 --- a/runtime/entrypoints/entrypoint_utils.h +++ b/runtime/entrypoints/entrypoint_utils.h @@ -26,6 +26,7 @@ #include "gc/allocator_type.h" #include "invoke_type.h" #include "jvalue.h" +#include "runtime.h" namespace art { @@ -179,6 +180,10 @@ bool FillArrayData(mirror::Object* obj, const Instruction::ArrayDataPayload* pay template <typename INT_TYPE, typename FLOAT_TYPE> inline INT_TYPE art_float_to_integral(FLOAT_TYPE f); +ArtMethod* GetCalleeSaveMethodCaller(ArtMethod** sp, + Runtime::CalleeSaveType type, + bool do_caller_check = false); + } // namespace art #endif // ART_RUNTIME_ENTRYPOINTS_ENTRYPOINT_UTILS_H_ diff --git a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc index d749664d12..dfd9fcddb8 100644 --- a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc @@ -22,13 +22,16 @@ #include "mirror/class-inl.h" #include "mirror/object_array-inl.h" #include "mirror/object-inl.h" +#include "quick_exception_handler.h" #include "stack.h" #include "thread.h" #include "verifier/method_verifier.h" namespace art { -NO_RETURN static void artDeoptimizeImpl(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) { +extern "C" NO_RETURN void artDeoptimize(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) { + ScopedQuickEntrypointChecks sqec(self); + if (VLOG_IS_ON(deopt)) { LOG(INFO) << "Deopting:"; self->Dump(LOG(INFO)); @@ -39,19 +42,26 @@ NO_RETURN static void artDeoptimizeImpl(Thread* self) SHARED_REQUIRES(Locks::mut self->QuickDeliverException(); } -extern "C" NO_RETURN void artDeoptimize(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) { - ScopedQuickEntrypointChecks sqec(self); - artDeoptimizeImpl(self); -} - extern "C" NO_RETURN void artDeoptimizeFromCompiledCode(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) { ScopedQuickEntrypointChecks sqec(self); + + // Deopt logging will be in DeoptimizeSingleFrame. It is there to take advantage of the + // specialized visitor that will show whether a method is Quick or Shadow. + // Before deoptimizing to interpreter, we must push the deoptimization context. JValue return_value; return_value.SetJ(0); // we never deoptimize from compiled code with an invoke result. self->PushDeoptimizationContext(return_value, false, self->GetException()); - artDeoptimizeImpl(self); + + QuickExceptionHandler exception_handler(self, true); + exception_handler.DeoptimizeSingleFrame(); + exception_handler.UpdateInstrumentationStack(); + exception_handler.DeoptimizeSingleFrameArchDependentFixup(); + // We cannot smash the caller-saves, as we need the ArtMethod in a parameter register that would + // be caller-saved. This has the downside that we cannot track incorrect register usage down the + // line. + exception_handler.DoLongJump(false); } } // namespace art diff --git a/runtime/entrypoints/quick/quick_jni_entrypoints.cc b/runtime/entrypoints/quick/quick_jni_entrypoints.cc index f69c39e8bc..fc5c52e75a 100644 --- a/runtime/entrypoints/quick/quick_jni_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_jni_entrypoints.cc @@ -112,4 +112,61 @@ extern mirror::Object* JniMethodEndWithReferenceSynchronized(jobject result, return JniMethodEndWithReferenceHandleResult(result, saved_local_ref_cookie, self); } +extern uint64_t GenericJniMethodEnd(Thread* self, + uint32_t saved_local_ref_cookie, + jvalue result, + uint64_t result_f, + ArtMethod* called, + HandleScope* handle_scope) + // TODO: NO_THREAD_SAFETY_ANALYSIS as GoToRunnable() is NO_THREAD_SAFETY_ANALYSIS + NO_THREAD_SAFETY_ANALYSIS { + GoToRunnable(self); + // We need the mutator lock (i.e., calling GoToRunnable()) before accessing the shorty or the + // locked object. + jobject locked = called->IsSynchronized() ? handle_scope->GetHandle(0).ToJObject() : nullptr; + char return_shorty_char = called->GetShorty()[0]; + if (return_shorty_char == 'L') { + if (locked != nullptr) { + UnlockJniSynchronizedMethod(locked, self); + } + return reinterpret_cast<uint64_t>(JniMethodEndWithReferenceHandleResult( + result.l, saved_local_ref_cookie, self)); + } else { + if (locked != nullptr) { + UnlockJniSynchronizedMethod(locked, self); // Must decode before pop. + } + PopLocalReferences(saved_local_ref_cookie, self); + switch (return_shorty_char) { + case 'F': { + if (kRuntimeISA == kX86) { + // Convert back the result to float. + double d = bit_cast<double, uint64_t>(result_f); + return bit_cast<uint32_t, float>(static_cast<float>(d)); + } else { + return result_f; + } + } + case 'D': + return result_f; + case 'Z': + return result.z; + case 'B': + return result.b; + case 'C': + return result.c; + case 'S': + return result.s; + case 'I': + return result.i; + case 'J': + return result.j; + case 'V': + return 0; + default: + LOG(FATAL) << "Unexpected return shorty character " << return_shorty_char; + return 0; + } + } +} + } // namespace art diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc index 1302c5f17b..5d3ac73d77 100644 --- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc @@ -29,8 +29,10 @@ #include "mirror/method.h" #include "mirror/object-inl.h" #include "mirror/object_array-inl.h" +#include "quick_exception_handler.h" #include "runtime.h" #include "scoped_thread_state_change.h" +#include "stack.h" #include "debugger.h" namespace art { @@ -646,27 +648,86 @@ extern "C" uint64_t artQuickToInterpreterBridge(ArtMethod* method, Thread* self, if (method->IsAbstract()) { ThrowAbstractMethodError(method); return 0; + } + + JValue tmp_value; + ShadowFrame* deopt_frame = self->PopStackedShadowFrame( + StackedShadowFrameType::kSingleFrameDeoptimizationShadowFrame, false); + const DexFile::CodeItem* code_item = method->GetCodeItem(); + DCHECK(code_item != nullptr) << PrettyMethod(method); + ManagedStack fragment; + + DCHECK(!method->IsNative()) << PrettyMethod(method); + uint32_t shorty_len = 0; + auto* non_proxy_method = method->GetInterfaceMethodIfProxy(sizeof(void*)); + const char* shorty = non_proxy_method->GetShorty(&shorty_len); + + JValue result; + + if (deopt_frame != nullptr) { + // Coming from single-frame deopt. + + if (kIsDebugBuild) { + // Sanity-check: are the methods as expected? We check that the last shadow frame (the bottom + // of the call-stack) corresponds to the called method. + ShadowFrame* linked = deopt_frame; + while (linked->GetLink() != nullptr) { + linked = linked->GetLink(); + } + CHECK_EQ(method, linked->GetMethod()) << PrettyMethod(method) << " " + << PrettyMethod(linked->GetMethod()); + } + + if (VLOG_IS_ON(deopt)) { + // Print out the stack to verify that it was a single-frame deopt. + LOG(INFO) << "Continue-ing from deopt. Stack is:"; + QuickExceptionHandler::DumpFramesWithType(self, true); + } + + mirror::Throwable* pending_exception = nullptr; + self->PopDeoptimizationContext(&result, &pending_exception); + + // Push a transition back into managed code onto the linked list in thread. + self->PushManagedStackFragment(&fragment); + + // Ensure that the stack is still in order. + if (kIsDebugBuild) { + class DummyStackVisitor : public StackVisitor { + public: + explicit DummyStackVisitor(Thread* self_in) SHARED_REQUIRES(Locks::mutator_lock_) + : StackVisitor(self_in, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames) {} + + bool VisitFrame() OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) { + // Nothing to do here. In a debug build, SanityCheckFrame will do the work in the walking + // logic. Just always say we want to continue. + return true; + } + }; + DummyStackVisitor dsv(self); + dsv.WalkStack(); + } + + // Restore the exception that was pending before deoptimization then interpret the + // deoptimized frames. + if (pending_exception != nullptr) { + self->SetException(pending_exception); + } + interpreter::EnterInterpreterFromDeoptimize(self, deopt_frame, &result); } else { - DCHECK(!method->IsNative()) << PrettyMethod(method); const char* old_cause = self->StartAssertNoThreadSuspension( "Building interpreter shadow frame"); - const DexFile::CodeItem* code_item = method->GetCodeItem(); - DCHECK(code_item != nullptr) << PrettyMethod(method); uint16_t num_regs = code_item->registers_size_; - void* memory = alloca(ShadowFrame::ComputeSize(num_regs)); // No last shadow coming from quick. - ShadowFrame* shadow_frame(ShadowFrame::Create(num_regs, nullptr, method, 0, memory)); + ShadowFrameAllocaUniquePtr shadow_frame_unique_ptr = + CREATE_SHADOW_FRAME(num_regs, nullptr, method, 0); + ShadowFrame* shadow_frame = shadow_frame_unique_ptr.get(); size_t first_arg_reg = code_item->registers_size_ - code_item->ins_size_; - uint32_t shorty_len = 0; - auto* non_proxy_method = method->GetInterfaceMethodIfProxy(sizeof(void*)); - const char* shorty = non_proxy_method->GetShorty(&shorty_len); BuildQuickShadowFrameVisitor shadow_frame_builder(sp, method->IsStatic(), shorty, shorty_len, shadow_frame, first_arg_reg); shadow_frame_builder.VisitArguments(); const bool needs_initialization = method->IsStatic() && !method->GetDeclaringClass()->IsInitialized(); // Push a transition back into managed code onto the linked list in thread. - ManagedStack fragment; self->PushManagedStackFragment(&fragment); self->PushShadowFrame(shadow_frame); self->EndAssertNoThreadSuspension(old_cause); @@ -681,24 +742,26 @@ extern "C" uint64_t artQuickToInterpreterBridge(ArtMethod* method, Thread* self, return 0; } } - JValue result = interpreter::EnterInterpreterFromEntryPoint(self, code_item, shadow_frame); - // Pop transition. - self->PopManagedStackFragment(fragment); - - // Request a stack deoptimization if needed - ArtMethod* caller = QuickArgumentVisitor::GetCallingMethod(sp); - if (UNLIKELY(Dbg::IsForcedInterpreterNeededForUpcall(self, caller))) { - // Push the context of the deoptimization stack so we can restore the return value and the - // exception before executing the deoptimized frames. - self->PushDeoptimizationContext(result, shorty[0] == 'L', self->GetException()); - - // Set special exception to cause deoptimization. - self->SetException(Thread::GetDeoptimizationException()); - } - // No need to restore the args since the method has already been run by the interpreter. - return result.GetJ(); + result = interpreter::EnterInterpreterFromEntryPoint(self, code_item, shadow_frame); + } + + // Pop transition. + self->PopManagedStackFragment(fragment); + + // Request a stack deoptimization if needed + ArtMethod* caller = QuickArgumentVisitor::GetCallingMethod(sp); + if (UNLIKELY(Dbg::IsForcedInterpreterNeededForUpcall(self, caller))) { + // Push the context of the deoptimization stack so we can restore the return value and the + // exception before executing the deoptimized frames. + self->PushDeoptimizationContext(result, shorty[0] == 'L', self->GetException()); + + // Set special exception to cause deoptimization. + self->SetException(Thread::GetDeoptimizationException()); } + + // No need to restore the args since the method has already been run by the interpreter. + return result.GetJ(); } // Visits arguments on the stack placing them into the args vector, Object* arguments are converted @@ -1926,62 +1989,27 @@ extern "C" TwoWordReturn artQuickGenericJniTrampoline(Thread* self, ArtMethod** reinterpret_cast<uintptr_t>(nativeCode)); } +// Defined in quick_jni_entrypoints.cc. +extern uint64_t GenericJniMethodEnd(Thread* self, uint32_t saved_local_ref_cookie, + jvalue result, uint64_t result_f, ArtMethod* called, + HandleScope* handle_scope); /* * Is called after the native JNI code. Responsible for cleanup (handle scope, saved state) and * unlocking. */ -extern "C" uint64_t artQuickGenericJniEndTrampoline(Thread* self, jvalue result, uint64_t result_f) - SHARED_REQUIRES(Locks::mutator_lock_) { +extern "C" uint64_t artQuickGenericJniEndTrampoline(Thread* self, + jvalue result, + uint64_t result_f) { + // We're here just back from a native call. We don't have the shared mutator lock at this point + // yet until we call GoToRunnable() later in GenericJniMethodEnd(). Accessing objects or doing + // anything that requires a mutator lock before that would cause problems as GC may have the + // exclusive mutator lock and may be moving objects, etc. ArtMethod** sp = self->GetManagedStack()->GetTopQuickFrame(); uint32_t* sp32 = reinterpret_cast<uint32_t*>(sp); ArtMethod* called = *sp; uint32_t cookie = *(sp32 - 1); - - jobject lock = nullptr; - if (called->IsSynchronized()) { - HandleScope* table = reinterpret_cast<HandleScope*>(reinterpret_cast<uint8_t*>(sp) - + sizeof(*sp)); - lock = table->GetHandle(0).ToJObject(); - } - - char return_shorty_char = called->GetShorty()[0]; - - if (return_shorty_char == 'L') { - return artQuickGenericJniEndJNIRef(self, cookie, result.l, lock); - } else { - artQuickGenericJniEndJNINonRef(self, cookie, lock); - - switch (return_shorty_char) { - case 'F': { - if (kRuntimeISA == kX86) { - // Convert back the result to float. - double d = bit_cast<double, uint64_t>(result_f); - return bit_cast<uint32_t, float>(static_cast<float>(d)); - } else { - return result_f; - } - } - case 'D': - return result_f; - case 'Z': - return result.z; - case 'B': - return result.b; - case 'C': - return result.c; - case 'S': - return result.s; - case 'I': - return result.i; - case 'J': - return result.j; - case 'V': - return 0; - default: - LOG(FATAL) << "Unexpected return shorty character " << return_shorty_char; - return 0; - } - } + HandleScope* table = reinterpret_cast<HandleScope*>(reinterpret_cast<uint8_t*>(sp) + sizeof(*sp)); + return GenericJniMethodEnd(self, cookie, result, result_f, called, table); } // We use TwoWordReturn to optimize scalar returns. We use the hi value for code, and the lo value diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc index 399591b93d..468179c9d5 100644 --- a/runtime/gc/collector/concurrent_copying.cc +++ b/runtime/gc/collector/concurrent_copying.cc @@ -457,6 +457,8 @@ void ConcurrentCopying::MarkingPhase() { CheckEmptyMarkStack(); // Re-enable weak ref accesses. ReenableWeakRefAccess(self); + // Free data for class loaders that we unloaded. + Runtime::Current()->GetClassLinker()->CleanupClassLoaders(); // Marking is done. Disable marking. DisableMarking(); CheckEmptyMarkStack(); diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc index 60f833b349..f561764ce4 100644 --- a/runtime/gc/collector/mark_compact.cc +++ b/runtime/gc/collector/mark_compact.cc @@ -205,6 +205,7 @@ void MarkCompact::MarkingPhase() { ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_); SweepSystemWeaks(); } + Runtime::Current()->GetClassLinker()->CleanupClassLoaders(); // Revoke buffers before measuring how many objects were moved since the TLABs need to be revoked // before they are properly counted. RevokeAllThreadLocalBuffers(); diff --git a/runtime/gc/collector/mark_sweep-inl.h b/runtime/gc/collector/mark_sweep-inl.h index 56edcc9d09..e72277ffb2 100644 --- a/runtime/gc/collector/mark_sweep-inl.h +++ b/runtime/gc/collector/mark_sweep-inl.h @@ -29,7 +29,8 @@ namespace gc { namespace collector { template<typename MarkVisitor, typename ReferenceVisitor> -inline void MarkSweep::ScanObjectVisit(mirror::Object* obj, const MarkVisitor& visitor, +inline void MarkSweep::ScanObjectVisit(mirror::Object* obj, + const MarkVisitor& visitor, const ReferenceVisitor& ref_visitor) { DCHECK(IsMarked(obj)) << "Scanning unmarked object " << obj << "\n" << heap_->DumpSpaces(); obj->VisitReferences(visitor, ref_visitor); diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc index 089f453888..77a288ba68 100644 --- a/runtime/gc/collector/mark_sweep.cc +++ b/runtime/gc/collector/mark_sweep.cc @@ -95,10 +95,13 @@ MarkSweep::MarkSweep(Heap* heap, bool is_concurrent, const std::string& name_pre : GarbageCollector(heap, name_prefix + (is_concurrent ? "concurrent mark sweep": "mark sweep")), - current_space_bitmap_(nullptr), mark_bitmap_(nullptr), mark_stack_(nullptr), + current_space_bitmap_(nullptr), + mark_bitmap_(nullptr), + mark_stack_(nullptr), gc_barrier_(new Barrier(0)), mark_stack_lock_("mark sweep mark stack lock", kMarkSweepMarkStackLock), - is_concurrent_(is_concurrent), live_stack_freeze_size_(0) { + is_concurrent_(is_concurrent), + live_stack_freeze_size_(0) { std::string error_msg; MemMap* mem_map = MemMap::MapAnonymous( "mark sweep sweep array free buffer", nullptr, @@ -173,7 +176,10 @@ void MarkSweep::RunPhases() { void MarkSweep::ProcessReferences(Thread* self) { WriterMutexLock mu(self, *Locks::heap_bitmap_lock_); GetHeap()->GetReferenceProcessor()->ProcessReferences( - true, GetTimings(), GetCurrentIteration()->GetClearSoftReferences(), this); + true, + GetTimings(), + GetCurrentIteration()->GetClearSoftReferences(), + this); } void MarkSweep::PausePhase() { @@ -265,8 +271,9 @@ void MarkSweep::MarkingPhase() { void MarkSweep::UpdateAndMarkModUnion() { for (const auto& space : heap_->GetContinuousSpaces()) { if (immune_region_.ContainsSpace(space)) { - const char* name = space->IsZygoteSpace() ? "UpdateAndMarkZygoteModUnionTable" : - "UpdateAndMarkImageModUnionTable"; + const char* name = space->IsZygoteSpace() + ? "UpdateAndMarkZygoteModUnionTable" + : "UpdateAndMarkImageModUnionTable"; TimingLogger::ScopedTiming t(name, GetTimings()); accounting::ModUnionTable* mod_union_table = heap_->FindModUnionTableFromSpace(space); CHECK(mod_union_table != nullptr); @@ -283,11 +290,15 @@ void MarkSweep::MarkReachableObjects() { void MarkSweep::ReclaimPhase() { TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); - Thread* self = Thread::Current(); + Thread* const self = Thread::Current(); // Process the references concurrently. ProcessReferences(self); SweepSystemWeaks(self); - Runtime::Current()->AllowNewSystemWeaks(); + Runtime* const runtime = Runtime::Current(); + runtime->AllowNewSystemWeaks(); + // Clean up class loaders after system weaks are swept since that is how we know if class + // unloading occurred. + runtime->GetClassLinker()->CleanupClassLoaders(); { WriterMutexLock mu(self, *Locks::heap_bitmap_lock_); GetHeap()->RecordFreeRevoke(); @@ -361,10 +372,10 @@ bool MarkSweep::IsMarkedHeapReference(mirror::HeapReference<mirror::Object>* ref class MarkSweepMarkObjectSlowPath { public: - explicit MarkSweepMarkObjectSlowPath(MarkSweep* mark_sweep, mirror::Object* holder = nullptr, + explicit MarkSweepMarkObjectSlowPath(MarkSweep* mark_sweep, + mirror::Object* holder = nullptr, MemberOffset offset = MemberOffset(0)) - : mark_sweep_(mark_sweep), holder_(holder), offset_(offset) { - } + : mark_sweep_(mark_sweep), holder_(holder), offset_(offset) {} void operator()(const mirror::Object* obj) const NO_THREAD_SAFETY_ANALYSIS { if (kProfileLargeObjects) { @@ -441,7 +452,8 @@ class MarkSweepMarkObjectSlowPath { MemberOffset offset_; }; -inline void MarkSweep::MarkObjectNonNull(mirror::Object* obj, mirror::Object* holder, +inline void MarkSweep::MarkObjectNonNull(mirror::Object* obj, + mirror::Object* holder, MemberOffset offset) { DCHECK(obj != nullptr); if (kUseBakerOrBrooksReadBarrier) { @@ -508,7 +520,8 @@ void MarkSweep::MarkHeapReference(mirror::HeapReference<mirror::Object>* ref) { } // Used to mark objects when processing the mark stack. If an object is null, it is not marked. -inline void MarkSweep::MarkObject(mirror::Object* obj, mirror::Object* holder, +inline void MarkSweep::MarkObject(mirror::Object* obj, + mirror::Object* holder, MemberOffset offset) { if (obj != nullptr) { MarkObjectNonNull(obj, holder, offset); @@ -530,14 +543,16 @@ class VerifyRootMarkedVisitor : public SingleRootVisitor { MarkSweep* const collector_; }; -void MarkSweep::VisitRoots(mirror::Object*** roots, size_t count, +void MarkSweep::VisitRoots(mirror::Object*** roots, + size_t count, const RootInfo& info ATTRIBUTE_UNUSED) { for (size_t i = 0; i < count; ++i) { MarkObjectNonNull(*roots[i]); } } -void MarkSweep::VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count, +void MarkSweep::VisitRoots(mirror::CompressedReference<mirror::Object>** roots, + size_t count, const RootInfo& info ATTRIBUTE_UNUSED) { for (size_t i = 0; i < count; ++i) { MarkObjectNonNull(roots[i]->AsMirrorPtr()); @@ -596,8 +611,10 @@ class ScanObjectVisitor { explicit ScanObjectVisitor(MarkSweep* const mark_sweep) ALWAYS_INLINE : mark_sweep_(mark_sweep) {} - void operator()(mirror::Object* obj) const ALWAYS_INLINE - SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_) { + void operator()(mirror::Object* obj) const + ALWAYS_INLINE + REQUIRES(Locks::heap_bitmap_lock_) + SHARED_REQUIRES(Locks::mutator_lock_) { if (kCheckLocks) { Locks::mutator_lock_->AssertSharedHeld(Thread::Current()); Locks::heap_bitmap_lock_->AssertExclusiveHeld(Thread::Current()); @@ -611,12 +628,11 @@ class ScanObjectVisitor { class DelayReferenceReferentVisitor { public: - explicit DelayReferenceReferentVisitor(MarkSweep* collector) : collector_(collector) { - } + explicit DelayReferenceReferentVisitor(MarkSweep* collector) : collector_(collector) {} void operator()(mirror::Class* klass, mirror::Reference* ref) const - SHARED_REQUIRES(Locks::mutator_lock_) - REQUIRES(Locks::heap_bitmap_lock_) { + REQUIRES(Locks::heap_bitmap_lock_) + SHARED_REQUIRES(Locks::mutator_lock_) { collector_->DelayReferenceReferent(klass, ref); } @@ -627,7 +643,9 @@ class DelayReferenceReferentVisitor { template <bool kUseFinger = false> class MarkStackTask : public Task { public: - MarkStackTask(ThreadPool* thread_pool, MarkSweep* mark_sweep, size_t mark_stack_size, + MarkStackTask(ThreadPool* thread_pool, + MarkSweep* mark_sweep, + size_t mark_stack_size, StackReference<mirror::Object>* mark_stack) : mark_sweep_(mark_sweep), thread_pool_(thread_pool), @@ -652,8 +670,10 @@ class MarkStackTask : public Task { MarkSweep* mark_sweep) : chunk_task_(chunk_task), mark_sweep_(mark_sweep) {} - void operator()(mirror::Object* obj, MemberOffset offset, bool /* static */) const - ALWAYS_INLINE SHARED_REQUIRES(Locks::mutator_lock_) { + ALWAYS_INLINE void operator()(mirror::Object* obj, + MemberOffset offset, + bool is_static ATTRIBUTE_UNUSED) const + SHARED_REQUIRES(Locks::mutator_lock_) { Mark(obj->GetFieldObject<mirror::Object>(offset)); } @@ -674,7 +694,7 @@ class MarkStackTask : public Task { } private: - void Mark(mirror::Object* ref) const ALWAYS_INLINE SHARED_REQUIRES(Locks::mutator_lock_) { + ALWAYS_INLINE void Mark(mirror::Object* ref) const SHARED_REQUIRES(Locks::mutator_lock_) { if (ref != nullptr && mark_sweep_->MarkObjectParallel(ref)) { if (kUseFinger) { std::atomic_thread_fence(std::memory_order_seq_cst); @@ -693,12 +713,13 @@ class MarkStackTask : public Task { class ScanObjectParallelVisitor { public: - explicit ScanObjectParallelVisitor(MarkStackTask<kUseFinger>* chunk_task) ALWAYS_INLINE + ALWAYS_INLINE explicit ScanObjectParallelVisitor(MarkStackTask<kUseFinger>* chunk_task) : chunk_task_(chunk_task) {} // No thread safety analysis since multiple threads will use this visitor. - void operator()(mirror::Object* obj) const SHARED_REQUIRES(Locks::mutator_lock_) - REQUIRES(Locks::heap_bitmap_lock_) { + void operator()(mirror::Object* obj) const + REQUIRES(Locks::heap_bitmap_lock_) + SHARED_REQUIRES(Locks::mutator_lock_) { MarkSweep* const mark_sweep = chunk_task_->mark_sweep_; MarkObjectParallelVisitor mark_visitor(chunk_task_, mark_sweep); DelayReferenceReferentVisitor ref_visitor(mark_sweep); @@ -729,7 +750,9 @@ class MarkStackTask : public Task { if (UNLIKELY(mark_stack_pos_ == kMaxSize)) { // Mark stack overflow, give 1/2 the stack to the thread pool as a new work task. mark_stack_pos_ /= 2; - auto* task = new MarkStackTask(thread_pool_, mark_sweep_, kMaxSize - mark_stack_pos_, + auto* task = new MarkStackTask(thread_pool_, + mark_sweep_, + kMaxSize - mark_stack_pos_, mark_stack_ + mark_stack_pos_); thread_pool_->AddTask(Thread::Current(), task); } @@ -743,9 +766,9 @@ class MarkStackTask : public Task { } // Scans all of the objects - virtual void Run(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) - REQUIRES(Locks::heap_bitmap_lock_) { - UNUSED(self); + virtual void Run(Thread* self ATTRIBUTE_UNUSED) + REQUIRES(Locks::heap_bitmap_lock_) + SHARED_REQUIRES(Locks::mutator_lock_) { ScanObjectParallelVisitor visitor(this); // TODO: Tune this. static const size_t kFifoSize = 4; @@ -778,16 +801,21 @@ class MarkStackTask : public Task { class CardScanTask : public MarkStackTask<false> { public: - CardScanTask(ThreadPool* thread_pool, MarkSweep* mark_sweep, + CardScanTask(ThreadPool* thread_pool, + MarkSweep* mark_sweep, accounting::ContinuousSpaceBitmap* bitmap, - uint8_t* begin, uint8_t* end, uint8_t minimum_age, size_t mark_stack_size, - StackReference<mirror::Object>* mark_stack_obj, bool clear_card) + uint8_t* begin, + uint8_t* end, + uint8_t minimum_age, + size_t mark_stack_size, + StackReference<mirror::Object>* mark_stack_obj, + bool clear_card) : MarkStackTask<false>(thread_pool, mark_sweep, mark_stack_size, mark_stack_obj), bitmap_(bitmap), begin_(begin), end_(end), - minimum_age_(minimum_age), clear_card_(clear_card) { - } + minimum_age_(minimum_age), + clear_card_(clear_card) {} protected: accounting::ContinuousSpaceBitmap* const bitmap_; @@ -803,9 +831,9 @@ class CardScanTask : public MarkStackTask<false> { virtual void Run(Thread* self) NO_THREAD_SAFETY_ANALYSIS { ScanObjectParallelVisitor visitor(this); accounting::CardTable* card_table = mark_sweep_->GetHeap()->GetCardTable(); - size_t cards_scanned = clear_card_ ? - card_table->Scan<true>(bitmap_, begin_, end_, visitor, minimum_age_) : - card_table->Scan<false>(bitmap_, begin_, end_, visitor, minimum_age_); + size_t cards_scanned = clear_card_ + ? card_table->Scan<true>(bitmap_, begin_, end_, visitor, minimum_age_) + : card_table->Scan<false>(bitmap_, begin_, end_, visitor, minimum_age_); VLOG(heap) << "Parallel scanning cards " << reinterpret_cast<void*>(begin_) << " - " << reinterpret_cast<void*>(end_) << " = " << cards_scanned; // Finish by emptying our local mark stack. @@ -873,9 +901,15 @@ void MarkSweep::ScanGrayObjects(bool paused, uint8_t minimum_age) { mark_stack_->PopBackCount(static_cast<int32_t>(mark_stack_increment)); DCHECK_EQ(mark_stack_end, mark_stack_->End()); // Add the new task to the thread pool. - auto* task = new CardScanTask(thread_pool, this, space->GetMarkBitmap(), card_begin, - card_begin + card_increment, minimum_age, - mark_stack_increment, mark_stack_end, clear_card); + auto* task = new CardScanTask(thread_pool, + this, + space->GetMarkBitmap(), + card_begin, + card_begin + card_increment, + minimum_age, + mark_stack_increment, + mark_stack_end, + clear_card); thread_pool->AddTask(self, task); card_begin += card_increment; } @@ -911,10 +945,16 @@ void MarkSweep::ScanGrayObjects(bool paused, uint8_t minimum_age) { ScanObjectVisitor visitor(this); bool clear_card = paused && !space->IsZygoteSpace() && !space->IsImageSpace(); if (clear_card) { - card_table->Scan<true>(space->GetMarkBitmap(), space->Begin(), space->End(), visitor, + card_table->Scan<true>(space->GetMarkBitmap(), + space->Begin(), + space->End(), + visitor, minimum_age); } else { - card_table->Scan<false>(space->GetMarkBitmap(), space->Begin(), space->End(), visitor, + card_table->Scan<false>(space->GetMarkBitmap(), + space->Begin(), + space->End(), + visitor, minimum_age); } } @@ -924,11 +964,15 @@ void MarkSweep::ScanGrayObjects(bool paused, uint8_t minimum_age) { class RecursiveMarkTask : public MarkStackTask<false> { public: - RecursiveMarkTask(ThreadPool* thread_pool, MarkSweep* mark_sweep, - accounting::ContinuousSpaceBitmap* bitmap, uintptr_t begin, uintptr_t end) - : MarkStackTask<false>(thread_pool, mark_sweep, 0, nullptr), bitmap_(bitmap), begin_(begin), - end_(end) { - } + RecursiveMarkTask(ThreadPool* thread_pool, + MarkSweep* mark_sweep, + accounting::ContinuousSpaceBitmap* bitmap, + uintptr_t begin, + uintptr_t end) + : MarkStackTask<false>(thread_pool, mark_sweep, 0, nullptr), + bitmap_(bitmap), + begin_(begin), + end_(end) {} protected: accounting::ContinuousSpaceBitmap* const bitmap_; @@ -985,7 +1029,10 @@ void MarkSweep::RecursiveMark() { delta = RoundUp(delta, KB); if (delta < 16 * KB) delta = end - begin; begin += delta; - auto* task = new RecursiveMarkTask(thread_pool, this, current_space_bitmap_, start, + auto* task = new RecursiveMarkTask(thread_pool, + this, + current_space_bitmap_, + start, begin); thread_pool->AddTask(self, task); } @@ -1032,7 +1079,8 @@ class VerifySystemWeakVisitor : public IsMarkedVisitor { public: explicit VerifySystemWeakVisitor(MarkSweep* mark_sweep) : mark_sweep_(mark_sweep) {} - virtual mirror::Object* IsMarked(mirror::Object* obj) OVERRIDE + virtual mirror::Object* IsMarked(mirror::Object* obj) + OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_, Locks::heap_bitmap_lock_) { mark_sweep_->VerifyIsLive(obj); return obj; @@ -1073,7 +1121,8 @@ class CheckpointMarkThreadRoots : public Closure, public RootVisitor { } } - void VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count, + void VisitRoots(mirror::CompressedReference<mirror::Object>** roots, + size_t count, const RootInfo& info ATTRIBUTE_UNUSED) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_) { @@ -1247,7 +1296,8 @@ void MarkSweep::Sweep(bool swap_bitmaps) { if (space->IsContinuousMemMapAllocSpace()) { space::ContinuousMemMapAllocSpace* alloc_space = space->AsContinuousMemMapAllocSpace(); TimingLogger::ScopedTiming split( - alloc_space->IsZygoteSpace() ? "SweepZygoteSpace" : "SweepMallocSpace", GetTimings()); + alloc_space->IsZygoteSpace() ? "SweepZygoteSpace" : "SweepMallocSpace", + GetTimings()); RecordFree(alloc_space->Sweep(swap_bitmaps)); } } @@ -1270,12 +1320,13 @@ void MarkSweep::DelayReferenceReferent(mirror::Class* klass, mirror::Reference* class MarkVisitor { public: - explicit MarkVisitor(MarkSweep* const mark_sweep) ALWAYS_INLINE : mark_sweep_(mark_sweep) { - } + ALWAYS_INLINE explicit MarkVisitor(MarkSweep* const mark_sweep) : mark_sweep_(mark_sweep) {} - void operator()(mirror::Object* obj, MemberOffset offset, bool is_static ATTRIBUTE_UNUSED) const - ALWAYS_INLINE SHARED_REQUIRES(Locks::mutator_lock_) - REQUIRES(Locks::heap_bitmap_lock_) { + ALWAYS_INLINE void operator()(mirror::Object* obj, + MemberOffset offset, + bool is_static ATTRIBUTE_UNUSED) const + REQUIRES(Locks::heap_bitmap_lock_) + SHARED_REQUIRES(Locks::mutator_lock_) { if (kCheckLocks) { Locks::mutator_lock_->AssertSharedHeld(Thread::Current()); Locks::heap_bitmap_lock_->AssertExclusiveHeld(Thread::Current()); @@ -1284,14 +1335,16 @@ class MarkVisitor { } void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const - SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_) { + REQUIRES(Locks::heap_bitmap_lock_) + SHARED_REQUIRES(Locks::mutator_lock_) { if (!root->IsNull()) { VisitRoot(root); } } void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const - SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_) { + REQUIRES(Locks::heap_bitmap_lock_) + SHARED_REQUIRES(Locks::mutator_lock_) { if (kCheckLocks) { Locks::mutator_lock_->AssertSharedHeld(Thread::Current()); Locks::heap_bitmap_lock_->AssertExclusiveHeld(Thread::Current()); diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h index 371bba531d..8f7df78d53 100644 --- a/runtime/gc/collector/mark_sweep.h +++ b/runtime/gc/collector/mark_sweep.h @@ -33,9 +33,9 @@ namespace art { namespace mirror { - class Class; - class Object; - class Reference; +class Class; +class Object; +class Reference; } // namespace mirror class Thread; @@ -46,8 +46,8 @@ namespace gc { class Heap; namespace accounting { - template<typename T> class AtomicStack; - typedef AtomicStack<mirror::Object> ObjectStack; +template<typename T> class AtomicStack; +typedef AtomicStack<mirror::Object> ObjectStack; } // namespace accounting namespace collector { @@ -60,12 +60,14 @@ class MarkSweep : public GarbageCollector { virtual void RunPhases() OVERRIDE REQUIRES(!mark_stack_lock_); void InitializePhase(); - void MarkingPhase() SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_); - void PausePhase() REQUIRES(Locks::mutator_lock_, !mark_stack_lock_); - void ReclaimPhase() SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_); + void MarkingPhase() REQUIRES(!mark_stack_lock_) SHARED_REQUIRES(Locks::mutator_lock_); + void PausePhase() REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_); + void ReclaimPhase() REQUIRES(!mark_stack_lock_) SHARED_REQUIRES(Locks::mutator_lock_); void FinishPhase(); virtual void MarkReachableObjects() - SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_, !mark_stack_lock_); + REQUIRES(Locks::heap_bitmap_lock_) + REQUIRES(!mark_stack_lock_) + SHARED_REQUIRES(Locks::mutator_lock_); bool IsConcurrent() const { return is_concurrent_; @@ -87,20 +89,30 @@ class MarkSweep : public GarbageCollector { // Marks all objects in the root set at the start of a garbage collection. void MarkRoots(Thread* self) - REQUIRES(Locks::heap_bitmap_lock_, !mark_stack_lock_) SHARED_REQUIRES(Locks::mutator_lock_); + REQUIRES(Locks::heap_bitmap_lock_) + REQUIRES(!mark_stack_lock_) + SHARED_REQUIRES(Locks::mutator_lock_); void MarkNonThreadRoots() - REQUIRES(Locks::heap_bitmap_lock_, !mark_stack_lock_) SHARED_REQUIRES(Locks::mutator_lock_); + REQUIRES(Locks::heap_bitmap_lock_) + REQUIRES(!mark_stack_lock_) + SHARED_REQUIRES(Locks::mutator_lock_); void MarkConcurrentRoots(VisitRootFlags flags) - REQUIRES(Locks::heap_bitmap_lock_, !mark_stack_lock_) SHARED_REQUIRES(Locks::mutator_lock_); + REQUIRES(Locks::heap_bitmap_lock_) + REQUIRES(!mark_stack_lock_) + SHARED_REQUIRES(Locks::mutator_lock_); void MarkRootsCheckpoint(Thread* self, bool revoke_ros_alloc_thread_local_buffers_at_checkpoint) - REQUIRES(Locks::heap_bitmap_lock_, !mark_stack_lock_) SHARED_REQUIRES(Locks::mutator_lock_); + REQUIRES(Locks::heap_bitmap_lock_) + REQUIRES(!mark_stack_lock_) + SHARED_REQUIRES(Locks::mutator_lock_); // Builds a mark stack and recursively mark until it empties. void RecursiveMark() - REQUIRES(Locks::heap_bitmap_lock_, !mark_stack_lock_) SHARED_REQUIRES(Locks::mutator_lock_); + REQUIRES(Locks::heap_bitmap_lock_) + REQUIRES(!mark_stack_lock_) + SHARED_REQUIRES(Locks::mutator_lock_); // Bind the live bits to the mark bits of bitmaps for spaces that are never collected, ie // the image. Mark that portion of the heap as immune. @@ -108,26 +120,35 @@ class MarkSweep : public GarbageCollector { // Builds a mark stack with objects on dirty cards and recursively mark until it empties. void RecursiveMarkDirtyObjects(bool paused, uint8_t minimum_age) - REQUIRES(Locks::heap_bitmap_lock_, !mark_stack_lock_) SHARED_REQUIRES(Locks::mutator_lock_); + REQUIRES(Locks::heap_bitmap_lock_) + REQUIRES(!mark_stack_lock_) + SHARED_REQUIRES(Locks::mutator_lock_); // Remarks the root set after completing the concurrent mark. void ReMarkRoots() - REQUIRES(Locks::heap_bitmap_lock_, !mark_stack_lock_) SHARED_REQUIRES(Locks::mutator_lock_); + REQUIRES(Locks::heap_bitmap_lock_) + REQUIRES(!mark_stack_lock_) + SHARED_REQUIRES(Locks::mutator_lock_); void ProcessReferences(Thread* self) - SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_); + REQUIRES(!mark_stack_lock_) + SHARED_REQUIRES(Locks::mutator_lock_); // Update and mark references from immune spaces. void UpdateAndMarkModUnion() - SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_); + REQUIRES(!mark_stack_lock_) + SHARED_REQUIRES(Locks::mutator_lock_); // Pre clean cards to reduce how much work is needed in the pause. void PreCleanCards() - REQUIRES(Locks::heap_bitmap_lock_, !mark_stack_lock_) SHARED_REQUIRES(Locks::mutator_lock_); + REQUIRES(Locks::heap_bitmap_lock_) + REQUIRES(!mark_stack_lock_) + SHARED_REQUIRES(Locks::mutator_lock_); // Sweeps unmarked objects to complete the garbage collection. Virtual as by default it sweeps // all allocation spaces. Partial and sticky GCs want to just sweep a subset of the heap. - virtual void Sweep(bool swap_bitmaps) REQUIRES(Locks::heap_bitmap_lock_) + virtual void Sweep(bool swap_bitmaps) + REQUIRES(Locks::heap_bitmap_lock_) SHARED_REQUIRES(Locks::mutator_lock_); // Sweeps unmarked objects to complete the garbage collection. @@ -135,20 +156,27 @@ class MarkSweep : public GarbageCollector { // Sweep only pointers within an array. WARNING: Trashes objects. void SweepArray(accounting::ObjectStack* allocation_stack_, bool swap_bitmaps) - SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_); + REQUIRES(Locks::heap_bitmap_lock_) + SHARED_REQUIRES(Locks::mutator_lock_); // Blackens an object. void ScanObject(mirror::Object* obj) - REQUIRES(Locks::heap_bitmap_lock_, !mark_stack_lock_) SHARED_REQUIRES(Locks::mutator_lock_); + REQUIRES(Locks::heap_bitmap_lock_) + REQUIRES(!mark_stack_lock_) + SHARED_REQUIRES(Locks::mutator_lock_); // No thread safety analysis due to lambdas. template<typename MarkVisitor, typename ReferenceVisitor> - void ScanObjectVisit(mirror::Object* obj, const MarkVisitor& visitor, + void ScanObjectVisit(mirror::Object* obj, + const MarkVisitor& visitor, const ReferenceVisitor& ref_visitor) - SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_, !mark_stack_lock_); + REQUIRES(Locks::heap_bitmap_lock_) + REQUIRES(!mark_stack_lock_) + SHARED_REQUIRES(Locks::mutator_lock_); void SweepSystemWeaks(Thread* self) - SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Locks::heap_bitmap_lock_); + REQUIRES(!Locks::heap_bitmap_lock_) + SHARED_REQUIRES(Locks::mutator_lock_); static mirror::Object* VerifySystemWeakIsLiveCallback(mirror::Object* obj, void* arg) SHARED_REQUIRES(Locks::heap_bitmap_lock_, Locks::mutator_lock_); @@ -161,22 +189,36 @@ class MarkSweep : public GarbageCollector { SHARED_REQUIRES(Locks::mutator_lock_, Locks::heap_bitmap_lock_); virtual bool IsMarkedHeapReference(mirror::HeapReference<mirror::Object>* ref) OVERRIDE - SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_); + REQUIRES(Locks::heap_bitmap_lock_) + SHARED_REQUIRES(Locks::mutator_lock_); virtual void VisitRoots(mirror::Object*** roots, size_t count, const RootInfo& info) OVERRIDE - SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_, !mark_stack_lock_); + REQUIRES(Locks::heap_bitmap_lock_) + REQUIRES(!mark_stack_lock_) + SHARED_REQUIRES(Locks::mutator_lock_); - virtual void VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count, + virtual void VisitRoots(mirror::CompressedReference<mirror::Object>** roots, + size_t count, const RootInfo& info) OVERRIDE - SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_, !mark_stack_lock_); + REQUIRES(Locks::heap_bitmap_lock_) + REQUIRES(!mark_stack_lock_) + SHARED_REQUIRES(Locks::mutator_lock_); // Marks an object. virtual mirror::Object* MarkObject(mirror::Object* obj) OVERRIDE - SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_, !mark_stack_lock_); + REQUIRES(Locks::heap_bitmap_lock_) + REQUIRES(!mark_stack_lock_) + SHARED_REQUIRES(Locks::mutator_lock_); + void MarkObject(mirror::Object* obj, mirror::Object* holder, MemberOffset offset) - SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_, !mark_stack_lock_); + REQUIRES(Locks::heap_bitmap_lock_) + REQUIRES(!mark_stack_lock_) + SHARED_REQUIRES(Locks::mutator_lock_); + virtual void MarkHeapReference(mirror::HeapReference<mirror::Object>* ref) OVERRIDE - SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_, !mark_stack_lock_); + REQUIRES(Locks::heap_bitmap_lock_) + REQUIRES(!mark_stack_lock_) + SHARED_REQUIRES(Locks::mutator_lock_); Barrier& GetBarrier() { return *gc_barrier_; @@ -191,13 +233,17 @@ class MarkSweep : public GarbageCollector { virtual mirror::Object* IsMarked(mirror::Object* object) OVERRIDE SHARED_REQUIRES(Locks::heap_bitmap_lock_); - void MarkObjectNonNull(mirror::Object* obj, mirror::Object* holder = nullptr, + void MarkObjectNonNull(mirror::Object* obj, + mirror::Object* holder = nullptr, MemberOffset offset = MemberOffset(0)) - SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_, !mark_stack_lock_); + REQUIRES(Locks::heap_bitmap_lock_) + REQUIRES(!mark_stack_lock_) + SHARED_REQUIRES(Locks::mutator_lock_); // Marks an object atomically, safe to use from multiple threads. void MarkObjectNonNullParallel(mirror::Object* obj) - SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_); + REQUIRES(!mark_stack_lock_) + SHARED_REQUIRES(Locks::mutator_lock_); // Returns true if we need to add obj to a mark stack. bool MarkObjectParallel(mirror::Object* obj) NO_THREAD_SAFETY_ANALYSIS; @@ -208,9 +254,12 @@ class MarkSweep : public GarbageCollector { NO_THREAD_SAFETY_ANALYSIS; // Expand mark stack to 2x its current size. - void ExpandMarkStack() REQUIRES(mark_stack_lock_) + void ExpandMarkStack() + REQUIRES(mark_stack_lock_) SHARED_REQUIRES(Locks::mutator_lock_); - void ResizeMarkStack(size_t new_size) REQUIRES(mark_stack_lock_) + + void ResizeMarkStack(size_t new_size) + REQUIRES(mark_stack_lock_) SHARED_REQUIRES(Locks::mutator_lock_); // Returns how many threads we should use for the current GC phase based on if we are paused, @@ -218,24 +267,34 @@ class MarkSweep : public GarbageCollector { size_t GetThreadCount(bool paused) const; // Push a single reference on a mark stack. - void PushOnMarkStack(mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_) - REQUIRES(!mark_stack_lock_); + void PushOnMarkStack(mirror::Object* obj) + REQUIRES(!mark_stack_lock_) + SHARED_REQUIRES(Locks::mutator_lock_); // Blackens objects grayed during a garbage collection. void ScanGrayObjects(bool paused, uint8_t minimum_age) - REQUIRES(Locks::heap_bitmap_lock_, !mark_stack_lock_) SHARED_REQUIRES(Locks::mutator_lock_); + REQUIRES(Locks::heap_bitmap_lock_) + REQUIRES(!mark_stack_lock_) + SHARED_REQUIRES(Locks::mutator_lock_); - virtual void ProcessMarkStack() OVERRIDE REQUIRES(Locks::heap_bitmap_lock_, !mark_stack_lock_) + virtual void ProcessMarkStack() + OVERRIDE + REQUIRES(Locks::heap_bitmap_lock_) + REQUIRES(!mark_stack_lock_) SHARED_REQUIRES(Locks::mutator_lock_) { ProcessMarkStack(false); } // Recursively blackens objects on the mark stack. void ProcessMarkStack(bool paused) - REQUIRES(Locks::heap_bitmap_lock_, !mark_stack_lock_) SHARED_REQUIRES(Locks::mutator_lock_); + REQUIRES(Locks::heap_bitmap_lock_) + REQUIRES(!mark_stack_lock_) + SHARED_REQUIRES(Locks::mutator_lock_); void ProcessMarkStackParallel(size_t thread_count) - REQUIRES(Locks::heap_bitmap_lock_, !mark_stack_lock_) SHARED_REQUIRES(Locks::mutator_lock_); + REQUIRES(Locks::heap_bitmap_lock_) + REQUIRES(!mark_stack_lock_) + SHARED_REQUIRES(Locks::mutator_lock_); // Used to Get around thread safety annotations. The call is from MarkingPhase and is guarded by // IsExclusiveHeld. @@ -293,23 +352,15 @@ class MarkSweep : public GarbageCollector { std::unique_ptr<MemMap> sweep_array_free_buffer_mem_map_; private: - friend class AddIfReachesAllocSpaceVisitor; // Used by mod-union table. friend class CardScanTask; friend class CheckBitmapVisitor; friend class CheckReferenceVisitor; friend class CheckpointMarkThreadRoots; - friend class art::gc::Heap; + friend class Heap; friend class FifoMarkStackChunk; friend class MarkObjectVisitor; template<bool kUseFinger> friend class MarkStackTask; friend class MarkSweepMarkObjectSlowPath; - friend class ModUnionCheckReferences; - friend class ModUnionClearCardVisitor; - friend class ModUnionReferenceVisitor; - friend class ModUnionScanImageRootVisitor; - friend class ModUnionTableBitmap; - friend class ModUnionTableReferenceCache; - friend class ModUnionVisitor; friend class VerifyRootMarkedVisitor; friend class VerifyRootVisitor; diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc index ed63ed049f..7f57f30b27 100644 --- a/runtime/gc/collector/semi_space.cc +++ b/runtime/gc/collector/semi_space.cc @@ -248,6 +248,7 @@ void SemiSpace::MarkingPhase() { ReaderMutexLock mu(self_, *Locks::heap_bitmap_lock_); SweepSystemWeaks(); } + Runtime::Current()->GetClassLinker()->CleanupClassLoaders(); // Revoke buffers before measuring how many objects were moved since the TLABs need to be revoked // before they are properly counted. RevokeAllThreadLocalBuffers(); diff --git a/runtime/gc/collector/sticky_mark_sweep.cc b/runtime/gc/collector/sticky_mark_sweep.cc index 5be3db712b..6c32658e43 100644 --- a/runtime/gc/collector/sticky_mark_sweep.cc +++ b/runtime/gc/collector/sticky_mark_sweep.cc @@ -25,8 +25,7 @@ namespace gc { namespace collector { StickyMarkSweep::StickyMarkSweep(Heap* heap, bool is_concurrent, const std::string& name_prefix) - : PartialMarkSweep(heap, is_concurrent, - name_prefix.empty() ? "sticky " : name_prefix) { + : PartialMarkSweep(heap, is_concurrent, name_prefix.empty() ? "sticky " : name_prefix) { cumulative_timings_.SetName(GetName()); } diff --git a/runtime/gc/collector/sticky_mark_sweep.h b/runtime/gc/collector/sticky_mark_sweep.h index e8f0672426..abaf97845d 100644 --- a/runtime/gc/collector/sticky_mark_sweep.h +++ b/runtime/gc/collector/sticky_mark_sweep.h @@ -38,13 +38,15 @@ class StickyMarkSweep FINAL : public PartialMarkSweep { // alloc space will be marked as immune. void BindBitmaps() OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_); - void MarkReachableObjects() OVERRIDE - SHARED_REQUIRES(Locks::mutator_lock_) - REQUIRES(Locks::heap_bitmap_lock_); - - void Sweep(bool swap_bitmaps) OVERRIDE - SHARED_REQUIRES(Locks::mutator_lock_) - REQUIRES(Locks::heap_bitmap_lock_); + void MarkReachableObjects() + OVERRIDE + REQUIRES(Locks::heap_bitmap_lock_) + SHARED_REQUIRES(Locks::mutator_lock_); + + void Sweep(bool swap_bitmaps) + OVERRIDE + REQUIRES(Locks::heap_bitmap_lock_) + SHARED_REQUIRES(Locks::mutator_lock_); private: DISALLOW_IMPLICIT_CONSTRUCTORS(StickyMarkSweep); diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc index cfe77135b7..7d664faa40 100644 --- a/runtime/gc/heap.cc +++ b/runtime/gc/heap.cc @@ -1963,6 +1963,10 @@ HomogeneousSpaceCompactResult Heap::PerformHomogeneousSpaceCompact() { GrowForUtilization(semi_space_collector_); LogGC(kGcCauseHomogeneousSpaceCompact, collector); FinishGC(self, collector::kGcTypeFull); + { + ScopedObjectAccess soa(self); + soa.Vm()->UnloadNativeLibraries(); + } return HomogeneousSpaceCompactResult::kSuccess; } @@ -2104,6 +2108,10 @@ void Heap::TransitionCollector(CollectorType collector_type) { DCHECK(collector != nullptr); LogGC(kGcCauseCollectorTransition, collector); FinishGC(self, collector::kGcTypeFull); + { + ScopedObjectAccess soa(self); + soa.Vm()->UnloadNativeLibraries(); + } int32_t after_allocated = num_bytes_allocated_.LoadSequentiallyConsistent(); int32_t delta_allocated = before_allocated - after_allocated; std::string saved_str; @@ -2588,6 +2596,12 @@ collector::GcType Heap::CollectGarbageInternal(collector::GcType gc_type, FinishGC(self, gc_type); // Inform DDMS that a GC completed. Dbg::GcDidFinish(); + // Unload native libraries for class unloading. We do this after calling FinishGC to prevent + // deadlocks in case the JNI_OnUnload function does allocations. + { + ScopedObjectAccess soa(self); + soa.Vm()->UnloadNativeLibraries(); + } return gc_type; } diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc index 3ac80c6642..f783b04b95 100644 --- a/runtime/interpreter/interpreter.cc +++ b/runtime/interpreter/interpreter.cc @@ -21,6 +21,7 @@ #include "mirror/string-inl.h" #include "scoped_thread_state_change.h" #include "ScopedLocalRef.h" +#include "stack.h" #include "unstarted_runtime.h" namespace art { @@ -330,8 +331,9 @@ void EnterInterpreterFromInvoke(Thread* self, ArtMethod* method, Object* receive } // Set up shadow frame with matching number of reference slots to vregs. ShadowFrame* last_shadow_frame = self->GetManagedStack()->GetTopShadowFrame(); - void* memory = alloca(ShadowFrame::ComputeSize(num_regs)); - ShadowFrame* shadow_frame(ShadowFrame::Create(num_regs, last_shadow_frame, method, 0, memory)); + ShadowFrameAllocaUniquePtr shadow_frame_unique_ptr = + CREATE_SHADOW_FRAME(num_regs, last_shadow_frame, method, 0); + ShadowFrame* shadow_frame = shadow_frame_unique_ptr.get(); self->PushShadowFrame(shadow_frame); size_t cur_reg = num_regs - num_ins; diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc index 02ec90c78f..ad34c9ad9e 100644 --- a/runtime/interpreter/interpreter_common.cc +++ b/runtime/interpreter/interpreter_common.cc @@ -21,12 +21,16 @@ #include "debugger.h" #include "entrypoints/runtime_asm_entrypoints.h" #include "mirror/array-inl.h" +#include "stack.h" #include "unstarted_runtime.h" #include "verifier/method_verifier.h" namespace art { namespace interpreter { +// All lambda closures have to be a consecutive pair of virtual registers. +static constexpr size_t kLambdaVirtualRegisterWidth = 2; + void ThrowNullPointerExceptionFromInterpreter() { ThrowNullPointerExceptionFromDexPC(); } @@ -483,13 +487,16 @@ void AbortTransactionV(Thread* self, const char* fmt, va_list args) { } // Separate declaration is required solely for the attributes. -template<bool is_range, bool do_assignability_check> SHARED_REQUIRES(Locks::mutator_lock_) +template <bool is_range, + bool do_assignability_check, + size_t kVarArgMax> + SHARED_REQUIRES(Locks::mutator_lock_) static inline bool DoCallCommon(ArtMethod* called_method, Thread* self, ShadowFrame& shadow_frame, JValue* result, uint16_t number_of_inputs, - uint32_t arg[Instruction::kMaxVarArgRegs], + uint32_t (&arg)[kVarArgMax], uint32_t vregC) ALWAYS_INLINE; SHARED_REQUIRES(Locks::mutator_lock_) @@ -509,13 +516,15 @@ static inline bool NeedsInterpreter(Thread* self, ShadowFrame* new_shadow_frame) Dbg::IsForcedInterpreterNeededForCalling(self, target); } -template<bool is_range, bool do_assignability_check> +template <bool is_range, + bool do_assignability_check, + size_t kVarArgMax> static inline bool DoCallCommon(ArtMethod* called_method, Thread* self, ShadowFrame& shadow_frame, JValue* result, uint16_t number_of_inputs, - uint32_t arg[Instruction::kMaxVarArgRegs], + uint32_t (&arg)[kVarArgMax], uint32_t vregC) { bool string_init = false; // Replace calls to String.<init> with equivalent StringFactory call. @@ -560,10 +569,10 @@ static inline bool DoCallCommon(ArtMethod* called_method, number_of_inputs--; // Rewrite the var-args, dropping the 0th argument ("this") - for (uint32_t i = 1; i < Instruction::kMaxVarArgRegs; ++i) { + for (uint32_t i = 1; i < arraysize(arg); ++i) { arg[i - 1] = arg[i]; } - arg[Instruction::kMaxVarArgRegs - 1] = 0; + arg[arraysize(arg) - 1] = 0; // Rewrite the non-var-arg case vregC++; // Skips the 0th vreg in the range ("this"). @@ -576,9 +585,9 @@ static inline bool DoCallCommon(ArtMethod* called_method, // Allocate shadow frame on the stack. const char* old_cause = self->StartAssertNoThreadSuspension("DoCallCommon"); - void* memory = alloca(ShadowFrame::ComputeSize(num_regs)); - ShadowFrame* new_shadow_frame(ShadowFrame::Create(num_regs, &shadow_frame, called_method, 0, - memory)); + ShadowFrameAllocaUniquePtr shadow_frame_unique_ptr = + CREATE_SHADOW_FRAME(num_regs, &shadow_frame, called_method, 0); + ShadowFrame* new_shadow_frame = shadow_frame_unique_ptr.get(); // Initialize new shadow frame by copying the registers from the callee shadow frame. if (do_assignability_check) { @@ -669,7 +678,7 @@ static inline bool DoCallCommon(ArtMethod* called_method, AssignRegister(new_shadow_frame, shadow_frame, dest_reg, src_reg); } } else { - DCHECK_LE(number_of_inputs, Instruction::kMaxVarArgRegs); + DCHECK_LE(number_of_inputs, arraysize(arg)); for (; arg_index < number_of_inputs; ++arg_index) { AssignRegister(new_shadow_frame, shadow_frame, first_dest_reg + arg_index, arg[arg_index]); @@ -696,22 +705,24 @@ static inline bool DoCallCommon(ArtMethod* called_method, // new result of the StringFactory. Use the verifier to find this set of registers. ArtMethod* method = shadow_frame.GetMethod(); MethodReference method_ref = method->ToMethodReference(); - SafeMap<uint32_t, std::set<uint32_t>> string_init_map; - SafeMap<uint32_t, std::set<uint32_t>>* string_init_map_ptr; + SafeMap<uint32_t, std::set<uint32_t>>* string_init_map_ptr = nullptr; MethodRefToStringInitRegMap& method_to_string_init_map = Runtime::Current()->GetStringInitMap(); - MethodRefToStringInitRegMap::iterator it; { MutexLock mu(self, *Locks::interpreter_string_init_map_lock_); - it = method_to_string_init_map.find(method_ref); + auto it = method_to_string_init_map.find(method_ref); + if (it != method_to_string_init_map.end()) { + string_init_map_ptr = &it->second; + } } - if (it == method_to_string_init_map.end()) { - string_init_map = std::move(verifier::MethodVerifier::FindStringInitMap(method)); - { - MutexLock mu(self, *Locks::interpreter_string_init_map_lock_); - method_to_string_init_map.Overwrite(method_ref, string_init_map); + if (string_init_map_ptr == nullptr) { + SafeMap<uint32_t, std::set<uint32_t>> string_init_map = + verifier::MethodVerifier::FindStringInitMap(method); + MutexLock mu(self, *Locks::interpreter_string_init_map_lock_); + auto it = method_to_string_init_map.lower_bound(method_ref); + if (it == method_to_string_init_map.end() || + method_to_string_init_map.key_comp()(method_ref, it->first)) { + it = method_to_string_init_map.PutBefore(it, method_ref, std::move(string_init_map)); } - string_init_map_ptr = &string_init_map; - } else { string_init_map_ptr = &it->second; } if (string_init_map_ptr->size() != 0) { @@ -734,12 +745,13 @@ bool DoLambdaCall(ArtMethod* called_method, Thread* self, ShadowFrame& shadow_fr const Instruction* inst, uint16_t inst_data, JValue* result) { const uint4_t num_additional_registers = inst->VRegB_25x(); // Argument word count. - const uint16_t number_of_inputs = num_additional_registers + 1; - // The first input register is always present and is not encoded in the count. + const uint16_t number_of_inputs = num_additional_registers + kLambdaVirtualRegisterWidth; + // The lambda closure register is always present and is not encoded in the count. + // Furthermore, the lambda closure register is always wide, so it counts as 2 inputs. // TODO: find a cleaner way to separate non-range and range information without duplicating // code. - uint32_t arg[Instruction::kMaxVarArgRegs]; // only used in invoke-XXX. + uint32_t arg[Instruction::kMaxVarArgRegs25x]; // only used in invoke-XXX. uint32_t vregC = 0; // only used in invoke-XXX-range. if (is_range) { vregC = inst->VRegC_3rc(); @@ -765,7 +777,7 @@ bool DoCall(ArtMethod* called_method, Thread* self, ShadowFrame& shadow_frame, // TODO: find a cleaner way to separate non-range and range information without duplicating // code. - uint32_t arg[Instruction::kMaxVarArgRegs]; // only used in invoke-XXX. + uint32_t arg[Instruction::kMaxVarArgRegs] = {}; // only used in invoke-XXX. uint32_t vregC = 0; if (is_range) { vregC = inst->VRegC_3rc(); diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h index 7398778d15..f57bddbb4f 100644 --- a/runtime/interpreter/interpreter_common.h +++ b/runtime/interpreter/interpreter_common.h @@ -34,7 +34,12 @@ #include "dex_instruction-inl.h" #include "entrypoints/entrypoint_utils-inl.h" #include "handle_scope-inl.h" +#include "lambda/art_lambda_method.h" #include "lambda/box_table.h" +#include "lambda/closure.h" +#include "lambda/closure_builder-inl.h" +#include "lambda/leaking_allocator.h" +#include "lambda/shorty_field_type.h" #include "mirror/class-inl.h" #include "mirror/method.h" #include "mirror/object-inl.h" @@ -133,32 +138,44 @@ static inline bool IsValidLambdaTargetOrThrow(ArtMethod* called_method) return success; } -// Write out the 'ArtMethod*' into vreg and vreg+1 +// Write out the 'Closure*' into vreg and vreg+1, as if it was a jlong. static inline void WriteLambdaClosureIntoVRegs(ShadowFrame& shadow_frame, - const ArtMethod& called_method, + const lambda::Closure* lambda_closure, uint32_t vreg) { // Split the method into a lo and hi 32 bits so we can encode them into 2 virtual registers. - uint32_t called_method_lo = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(&called_method)); - uint32_t called_method_hi = static_cast<uint32_t>(reinterpret_cast<uint64_t>(&called_method) + uint32_t closure_lo = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(lambda_closure)); + uint32_t closure_hi = static_cast<uint32_t>(reinterpret_cast<uint64_t>(lambda_closure) >> BitSizeOf<uint32_t>()); // Use uint64_t instead of uintptr_t to allow shifting past the max on 32-bit. static_assert(sizeof(uint64_t) >= sizeof(uintptr_t), "Impossible"); - DCHECK_NE(called_method_lo | called_method_hi, 0u); + DCHECK_NE(closure_lo | closure_hi, 0u); - shadow_frame.SetVReg(vreg, called_method_lo); - shadow_frame.SetVReg(vreg + 1, called_method_hi); + shadow_frame.SetVReg(vreg, closure_lo); + shadow_frame.SetVReg(vreg + 1, closure_hi); } // Handles create-lambda instructions. // Returns true on success, otherwise throws an exception and returns false. // (Exceptions are thrown by creating a new exception and then being put in the thread TLS) // +// The closure must be allocated big enough to hold the data, and should not be +// pre-initialized. It is initialized with the actual captured variables as a side-effect, +// although this should be unimportant to the caller since this function also handles storing it to +// the ShadowFrame. +// // As a work-in-progress implementation, this shoves the ArtMethod object corresponding // to the target dex method index into the target register vA and vA + 1. template<bool do_access_check> -static inline bool DoCreateLambda(Thread* self, ShadowFrame& shadow_frame, - const Instruction* inst) { +static inline bool DoCreateLambda(Thread* self, + const Instruction* inst, + /*inout*/ShadowFrame& shadow_frame, + /*inout*/lambda::ClosureBuilder* closure_builder, + /*inout*/lambda::Closure* uninitialized_closure) { + DCHECK(closure_builder != nullptr); + DCHECK(uninitialized_closure != nullptr); + DCHECK_ALIGNED(uninitialized_closure, alignof(lambda::Closure)); + /* * create-lambda is opcode 0x21c * - vA is the target register where the closure will be stored into @@ -171,16 +188,69 @@ static inline bool DoCreateLambda(Thread* self, ShadowFrame& shadow_frame, ArtMethod* const called_method = FindMethodFromCode<kStatic, do_access_check>( method_idx, &receiver, sf_method, self); - uint32_t vregA = inst->VRegA_21c(); + uint32_t vreg_dest_closure = inst->VRegA_21c(); if (UNLIKELY(!IsValidLambdaTargetOrThrow(called_method))) { CHECK(self->IsExceptionPending()); - shadow_frame.SetVReg(vregA, 0u); - shadow_frame.SetVReg(vregA + 1, 0u); + shadow_frame.SetVReg(vreg_dest_closure, 0u); + shadow_frame.SetVReg(vreg_dest_closure + 1, 0u); return false; } - WriteLambdaClosureIntoVRegs(shadow_frame, *called_method, vregA); + lambda::ArtLambdaMethod* initialized_lambda_method; + // Initialize the ArtLambdaMethod with the right data. + { + lambda::ArtLambdaMethod* uninitialized_lambda_method = + reinterpret_cast<lambda::ArtLambdaMethod*>( + lambda::LeakingAllocator::AllocateMemory(self, sizeof(lambda::ArtLambdaMethod))); + + std::string captured_variables_shorty = closure_builder->GetCapturedVariableShortyTypes(); + std::string captured_variables_long_type_desc; + + // Synthesize a long type descriptor from the short one. + for (char shorty : captured_variables_shorty) { + lambda::ShortyFieldType shorty_field_type(shorty); + if (shorty_field_type.IsObject()) { + // Not the true type, but good enough until we implement verifier support. + captured_variables_long_type_desc += "Ljava/lang/Object;"; + UNIMPLEMENTED(FATAL) << "create-lambda with an object captured variable"; + } else if (shorty_field_type.IsLambda()) { + // Not the true type, but good enough until we implement verifier support. + captured_variables_long_type_desc += "Ljava/lang/Runnable;"; + UNIMPLEMENTED(FATAL) << "create-lambda with a lambda captured variable"; + } else { + // The primitive types have the same length shorty or not, so this is always correct. + DCHECK(shorty_field_type.IsPrimitive()); + captured_variables_long_type_desc += shorty_field_type; + } + } + + // Copy strings to dynamically allocated storage. This leaks, but that's ok. Fix it later. + // TODO: Strings need to come from the DexFile, so they won't need their own allocations. + char* captured_variables_type_desc = lambda::LeakingAllocator::MakeFlexibleInstance<char>( + self, + captured_variables_long_type_desc.size() + 1); + strcpy(captured_variables_type_desc, captured_variables_long_type_desc.c_str()); + char* captured_variables_shorty_copy = lambda::LeakingAllocator::MakeFlexibleInstance<char>( + self, + captured_variables_shorty.size() + 1); + strcpy(captured_variables_shorty_copy, captured_variables_shorty.c_str()); + + new (uninitialized_lambda_method) lambda::ArtLambdaMethod(called_method, + captured_variables_type_desc, + captured_variables_shorty_copy, + true); // innate lambda + initialized_lambda_method = uninitialized_lambda_method; + } + + // Write all the closure captured variables and the closure header into the closure. + lambda::Closure* initialized_closure; + { + initialized_closure = + closure_builder->CreateInPlace(uninitialized_closure, initialized_lambda_method); + } + + WriteLambdaClosureIntoVRegs(/*inout*/shadow_frame, initialized_closure, vreg_dest_closure); return true; } @@ -189,13 +259,11 @@ static inline bool DoCreateLambda(Thread* self, ShadowFrame& shadow_frame, // Validates that the art method points to a valid lambda function, otherwise throws // an exception and returns null. // (Exceptions are thrown by creating a new exception and then being put in the thread TLS) -static inline ArtMethod* ReadLambdaClosureFromVRegsOrThrow(ShadowFrame& shadow_frame, - uint32_t vreg) +static inline lambda::Closure* ReadLambdaClosureFromVRegsOrThrow(ShadowFrame& shadow_frame, + uint32_t vreg) SHARED_REQUIRES(Locks::mutator_lock_) { - // TODO(iam): Introduce a closure abstraction that will contain the captured variables - // instead of just an ArtMethod. - // This is temporarily using 2 vregs because a native ArtMethod can be up to 64-bit, - // but once proper variable capture is implemented it will only use 1 vreg. + // Lambda closures take up a consecutive pair of 2 virtual registers. + // On 32-bit the high bits are always 0. uint32_t vc_value_lo = shadow_frame.GetVReg(vreg); uint32_t vc_value_hi = shadow_frame.GetVReg(vreg + 1); @@ -204,17 +272,285 @@ static inline ArtMethod* ReadLambdaClosureFromVRegsOrThrow(ShadowFrame& shadow_f // Use uint64_t instead of uintptr_t to allow left-shifting past the max on 32-bit. static_assert(sizeof(uint64_t) >= sizeof(uintptr_t), "Impossible"); - ArtMethod* const called_method = reinterpret_cast<ArtMethod* const>(vc_value_ptr); + lambda::Closure* const lambda_closure = reinterpret_cast<lambda::Closure*>(vc_value_ptr); + DCHECK_ALIGNED(lambda_closure, alignof(lambda::Closure)); // Guard against the user passing a null closure, which is odd but (sadly) semantically valid. - if (UNLIKELY(called_method == nullptr)) { + if (UNLIKELY(lambda_closure == nullptr)) { ThrowNullPointerExceptionFromInterpreter(); return nullptr; - } else if (UNLIKELY(!IsValidLambdaTargetOrThrow(called_method))) { + } else if (UNLIKELY(!IsValidLambdaTargetOrThrow(lambda_closure->GetTargetMethod()))) { + // Sanity check against data corruption. return nullptr; } - return called_method; + return lambda_closure; +} + +// Forward declaration for lock annotations. See below for documentation. +template <bool do_access_check> +static inline const char* GetStringDataByDexStringIndexOrThrow(ShadowFrame& shadow_frame, + uint32_t string_idx) + SHARED_REQUIRES(Locks::mutator_lock_); + +// Find the c-string data corresponding to a dex file's string index. +// Otherwise, returns null if not found and throws a VerifyError. +// +// Note that with do_access_check=false, we never return null because the verifier +// must guard against invalid string indices. +// (Exceptions are thrown by creating a new exception and then being put in the thread TLS) +template <bool do_access_check> +static inline const char* GetStringDataByDexStringIndexOrThrow(ShadowFrame& shadow_frame, + uint32_t string_idx) { + ArtMethod* method = shadow_frame.GetMethod(); + const DexFile* dex_file = method->GetDexFile(); + + mirror::Class* declaring_class = method->GetDeclaringClass(); + if (!do_access_check) { + // MethodVerifier refuses methods with string_idx out of bounds. + DCHECK_LT(string_idx, declaring_class->GetDexCache()->NumStrings()); + } else { + // Access checks enabled: perform string index bounds ourselves. + if (string_idx >= dex_file->GetHeader().string_ids_size_) { + ThrowVerifyError(declaring_class, "String index '%" PRIu32 "' out of bounds", + string_idx); + return nullptr; + } + } + + const char* type_string = dex_file->StringDataByIdx(string_idx); + + if (UNLIKELY(type_string == nullptr)) { + CHECK_EQ(false, do_access_check) + << " verifier should've caught invalid string index " << string_idx; + CHECK_EQ(true, do_access_check) + << " string idx size check should've caught invalid string index " << string_idx; + } + + return type_string; +} + +// Handles capture-variable instructions. +// Returns true on success, otherwise throws an exception and returns false. +// (Exceptions are thrown by creating a new exception and then being put in the thread TLS) +template<bool do_access_check> +static inline bool DoCaptureVariable(Thread* self, + const Instruction* inst, + /*inout*/ShadowFrame& shadow_frame, + /*inout*/lambda::ClosureBuilder* closure_builder) { + DCHECK(closure_builder != nullptr); + using lambda::ShortyFieldType; + /* + * capture-variable is opcode 0xf6, fmt 0x21c + * - vA is the source register of the variable that will be captured + * - vB is the string ID of the variable's type that will be captured + */ + const uint32_t source_vreg = inst->VRegA_21c(); + const uint32_t string_idx = inst->VRegB_21c(); + // TODO: this should be a proper [type id] instead of a [string ID] pointing to a type. + + const char* type_string = GetStringDataByDexStringIndexOrThrow<do_access_check>(shadow_frame, + string_idx); + if (UNLIKELY(type_string == nullptr)) { + CHECK(self->IsExceptionPending()); + return false; + } + + char type_first_letter = type_string[0]; + ShortyFieldType shorty_type; + if (do_access_check && + UNLIKELY(!ShortyFieldType::MaybeCreate(type_first_letter, /*out*/&shorty_type))) { // NOLINT: [whitespace/comma] [3] + ThrowVerifyError(shadow_frame.GetMethod()->GetDeclaringClass(), + "capture-variable vB must be a valid type"); + return false; + } else { + // Already verified that the type is valid. + shorty_type = ShortyFieldType(type_first_letter); + } + + const size_t captured_variable_count = closure_builder->GetCaptureCount(); + + // Note: types are specified explicitly so that the closure is packed tightly. + switch (shorty_type) { + case ShortyFieldType::kBoolean: { + uint32_t primitive_narrow_value = shadow_frame.GetVReg(source_vreg); + closure_builder->CaptureVariablePrimitive<bool>(primitive_narrow_value); + break; + } + case ShortyFieldType::kByte: { + uint32_t primitive_narrow_value = shadow_frame.GetVReg(source_vreg); + closure_builder->CaptureVariablePrimitive<int8_t>(primitive_narrow_value); + break; + } + case ShortyFieldType::kChar: { + uint32_t primitive_narrow_value = shadow_frame.GetVReg(source_vreg); + closure_builder->CaptureVariablePrimitive<uint16_t>(primitive_narrow_value); + break; + } + case ShortyFieldType::kShort: { + uint32_t primitive_narrow_value = shadow_frame.GetVReg(source_vreg); + closure_builder->CaptureVariablePrimitive<int16_t>(primitive_narrow_value); + break; + } + case ShortyFieldType::kInt: { + uint32_t primitive_narrow_value = shadow_frame.GetVReg(source_vreg); + closure_builder->CaptureVariablePrimitive<int32_t>(primitive_narrow_value); + break; + } + case ShortyFieldType::kDouble: { + closure_builder->CaptureVariablePrimitive(shadow_frame.GetVRegDouble(source_vreg)); + break; + } + case ShortyFieldType::kFloat: { + closure_builder->CaptureVariablePrimitive(shadow_frame.GetVRegFloat(source_vreg)); + break; + } + case ShortyFieldType::kLambda: { + UNIMPLEMENTED(FATAL) << " capture-variable with type kLambda"; + // TODO: Capturing lambdas recursively will be done at a later time. + UNREACHABLE(); + } + case ShortyFieldType::kLong: { + closure_builder->CaptureVariablePrimitive(shadow_frame.GetVRegLong(source_vreg)); + break; + } + case ShortyFieldType::kObject: { + closure_builder->CaptureVariableObject(shadow_frame.GetVRegReference(source_vreg)); + UNIMPLEMENTED(FATAL) << " capture-variable with type kObject"; + // TODO: finish implementing this. disabled for now since we can't track lambda refs for GC. + UNREACHABLE(); + } + + default: + LOG(FATAL) << "Invalid shorty type value " << shorty_type; + UNREACHABLE(); + } + + DCHECK_EQ(captured_variable_count + 1, closure_builder->GetCaptureCount()); + + return true; +} + +// Handles capture-variable instructions. +// Returns true on success, otherwise throws an exception and returns false. +// (Exceptions are thrown by creating a new exception and then being put in the thread TLS) +template<bool do_access_check> +static inline bool DoLiberateVariable(Thread* self, + const Instruction* inst, + size_t captured_variable_index, + /*inout*/ShadowFrame& shadow_frame) { + using lambda::ShortyFieldType; + /* + * liberate-variable is opcode 0xf7, fmt 0x22c + * - vA is the destination register + * - vB is the register with the lambda closure in it + * - vC is the string ID which needs to be a valid field type descriptor + */ + + const uint32_t dest_vreg = inst->VRegA_22c(); + const uint32_t closure_vreg = inst->VRegB_22c(); + const uint32_t string_idx = inst->VRegC_22c(); + // TODO: this should be a proper [type id] instead of a [string ID] pointing to a type. + + + // Synthesize a long type descriptor from a shorty type descriptor list. + // TODO: Fix the dex encoding to contain the long and short type descriptors. + const char* type_string = GetStringDataByDexStringIndexOrThrow<do_access_check>(shadow_frame, + string_idx); + if (UNLIKELY(do_access_check && type_string == nullptr)) { + CHECK(self->IsExceptionPending()); + shadow_frame.SetVReg(dest_vreg, 0); + return false; + } + + char type_first_letter = type_string[0]; + ShortyFieldType shorty_type; + if (do_access_check && + UNLIKELY(!ShortyFieldType::MaybeCreate(type_first_letter, /*out*/&shorty_type))) { // NOLINT: [whitespace/comma] [3] + ThrowVerifyError(shadow_frame.GetMethod()->GetDeclaringClass(), + "liberate-variable vC must be a valid type"); + shadow_frame.SetVReg(dest_vreg, 0); + return false; + } else { + // Already verified that the type is valid. + shorty_type = ShortyFieldType(type_first_letter); + } + + // Check for closure being null *after* the type check. + // This way we can access the type info in case we fail later, to know how many vregs to clear. + const lambda::Closure* lambda_closure = + ReadLambdaClosureFromVRegsOrThrow(/*inout*/shadow_frame, closure_vreg); + + // Failed lambda target runtime check, an exception was raised. + if (UNLIKELY(lambda_closure == nullptr)) { + CHECK(self->IsExceptionPending()); + + // Clear the destination vreg(s) to be safe. + shadow_frame.SetVReg(dest_vreg, 0); + if (shorty_type.IsPrimitiveWide() || shorty_type.IsLambda()) { + shadow_frame.SetVReg(dest_vreg + 1, 0); + } + return false; + } + + if (do_access_check && + UNLIKELY(captured_variable_index >= lambda_closure->GetNumberOfCapturedVariables())) { + ThrowVerifyError(shadow_frame.GetMethod()->GetDeclaringClass(), + "liberate-variable captured variable index %zu out of bounds", + lambda_closure->GetNumberOfCapturedVariables()); + // Clear the destination vreg(s) to be safe. + shadow_frame.SetVReg(dest_vreg, 0); + if (shorty_type.IsPrimitiveWide() || shorty_type.IsLambda()) { + shadow_frame.SetVReg(dest_vreg + 1, 0); + } + return false; + } + + // Verify that the runtime type of the captured-variable matches the requested dex type. + if (do_access_check) { + ShortyFieldType actual_type = lambda_closure->GetCapturedShortyType(captured_variable_index); + if (actual_type != shorty_type) { + ThrowVerifyError(shadow_frame.GetMethod()->GetDeclaringClass(), + "cannot liberate-variable of runtime type '%c' to dex type '%c'", + static_cast<char>(actual_type), + static_cast<char>(shorty_type)); + + shadow_frame.SetVReg(dest_vreg, 0); + if (shorty_type.IsPrimitiveWide() || shorty_type.IsLambda()) { + shadow_frame.SetVReg(dest_vreg + 1, 0); + } + return false; + } + + if (actual_type.IsLambda() || actual_type.IsObject()) { + UNIMPLEMENTED(FATAL) << "liberate-variable type checks needs to " + << "parse full type descriptor for objects and lambdas"; + } + } + + // Unpack the captured variable from the closure into the correct type, then save it to the vreg. + if (shorty_type.IsPrimitiveNarrow()) { + uint32_t primitive_narrow_value = + lambda_closure->GetCapturedPrimitiveNarrow(captured_variable_index); + shadow_frame.SetVReg(dest_vreg, primitive_narrow_value); + } else if (shorty_type.IsPrimitiveWide()) { + uint64_t primitive_wide_value = + lambda_closure->GetCapturedPrimitiveWide(captured_variable_index); + shadow_frame.SetVRegLong(dest_vreg, static_cast<int64_t>(primitive_wide_value)); + } else if (shorty_type.IsObject()) { + mirror::Object* unpacked_object = + lambda_closure->GetCapturedObject(captured_variable_index); + shadow_frame.SetVRegReference(dest_vreg, unpacked_object); + + UNIMPLEMENTED(FATAL) << "liberate-variable cannot unpack objects yet"; + } else if (shorty_type.IsLambda()) { + UNIMPLEMENTED(FATAL) << "liberate-variable cannot unpack lambdas yet"; + } else { + LOG(FATAL) << "unreachable"; + UNREACHABLE(); + } + + return true; } template<bool do_access_check> @@ -229,22 +565,24 @@ static inline bool DoInvokeLambda(Thread* self, ShadowFrame& shadow_frame, const * * - reading var-args for 0x25 gets us vD,vE,vF,vG (but not vB) */ - uint32_t vC = inst->VRegC_25x(); - ArtMethod* const called_method = ReadLambdaClosureFromVRegsOrThrow(shadow_frame, vC); + uint32_t vreg_closure = inst->VRegC_25x(); + const lambda::Closure* lambda_closure = + ReadLambdaClosureFromVRegsOrThrow(shadow_frame, vreg_closure); // Failed lambda target runtime check, an exception was raised. - if (UNLIKELY(called_method == nullptr)) { + if (UNLIKELY(lambda_closure == nullptr)) { CHECK(self->IsExceptionPending()); result->SetJ(0); return false; } + ArtMethod* const called_method = lambda_closure->GetTargetMethod(); // Invoke a non-range lambda return DoLambdaCall<false, do_access_check>(called_method, self, shadow_frame, inst, inst_data, result); } -// Handles invoke-XXX/range instructions. +// Handles invoke-XXX/range instructions (other than invoke-lambda[-range]). // Returns true on success, otherwise throws an exception and returns false. template<InvokeType type, bool is_range, bool do_access_check> static inline bool DoInvoke(Thread* self, ShadowFrame& shadow_frame, const Instruction* inst, @@ -521,17 +859,17 @@ static inline bool DoBoxLambda(Thread* self, ShadowFrame& shadow_frame, const In uint32_t vreg_target_object = inst->VRegA_22x(inst_data); uint32_t vreg_source_closure = inst->VRegB_22x(); - ArtMethod* closure_method = ReadLambdaClosureFromVRegsOrThrow(shadow_frame, - vreg_source_closure); + lambda::Closure* lambda_closure = ReadLambdaClosureFromVRegsOrThrow(shadow_frame, + vreg_source_closure); // Failed lambda target runtime check, an exception was raised. - if (UNLIKELY(closure_method == nullptr)) { + if (UNLIKELY(lambda_closure == nullptr)) { CHECK(self->IsExceptionPending()); return false; } mirror::Object* closure_as_object = - Runtime::Current()->GetLambdaBoxTable()->BoxLambda(closure_method); + Runtime::Current()->GetLambdaBoxTable()->BoxLambda(lambda_closure); // Failed to box the lambda, an exception was raised. if (UNLIKELY(closure_as_object == nullptr)) { @@ -564,16 +902,16 @@ static inline bool DoUnboxLambda(Thread* self, return false; } - ArtMethod* unboxed_closure = nullptr; + lambda::Closure* unboxed_closure = nullptr; // Raise an exception if unboxing fails. if (!Runtime::Current()->GetLambdaBoxTable()->UnboxLambda(boxed_closure_object, - &unboxed_closure)) { + /*out*/&unboxed_closure)) { CHECK(self->IsExceptionPending()); return false; } DCHECK(unboxed_closure != nullptr); - WriteLambdaClosureIntoVRegs(shadow_frame, *unboxed_closure, vreg_target_closure); + WriteLambdaClosureIntoVRegs(/*inout*/shadow_frame, unboxed_closure, vreg_target_closure); return true; } @@ -650,10 +988,13 @@ EXPLICIT_DO_INVOKE_VIRTUAL_QUICK_TEMPLATE_DECL(true); // invoke-virtual-quick- #undef EXPLICIT_INSTANTIATION_DO_INVOKE_VIRTUAL_QUICK // Explicitly instantiate all DoCreateLambda functions. -#define EXPLICIT_DO_CREATE_LAMBDA_DECL(_do_check) \ -template SHARED_REQUIRES(Locks::mutator_lock_) \ -bool DoCreateLambda<_do_check>(Thread* self, ShadowFrame& shadow_frame, \ - const Instruction* inst) +#define EXPLICIT_DO_CREATE_LAMBDA_DECL(_do_check) \ +template SHARED_REQUIRES(Locks::mutator_lock_) \ +bool DoCreateLambda<_do_check>(Thread* self, \ + const Instruction* inst, \ + /*inout*/ShadowFrame& shadow_frame, \ + /*inout*/lambda::ClosureBuilder* closure_builder, \ + /*inout*/lambda::Closure* uninitialized_closure); EXPLICIT_DO_CREATE_LAMBDA_DECL(false); // create-lambda EXPLICIT_DO_CREATE_LAMBDA_DECL(true); // create-lambda @@ -689,7 +1030,29 @@ EXPLICIT_DO_UNBOX_LAMBDA_DECL(false); // unbox-lambda EXPLICIT_DO_UNBOX_LAMBDA_DECL(true); // unbox-lambda #undef EXPLICIT_DO_BOX_LAMBDA_DECL +// Explicitly instantiate all DoCaptureVariable functions. +#define EXPLICIT_DO_CAPTURE_VARIABLE_DECL(_do_check) \ +template SHARED_REQUIRES(Locks::mutator_lock_) \ +bool DoCaptureVariable<_do_check>(Thread* self, \ + const Instruction* inst, \ + ShadowFrame& shadow_frame, \ + lambda::ClosureBuilder* closure_builder); + +EXPLICIT_DO_CAPTURE_VARIABLE_DECL(false); // capture-variable +EXPLICIT_DO_CAPTURE_VARIABLE_DECL(true); // capture-variable +#undef EXPLICIT_DO_CREATE_LAMBDA_DECL +// Explicitly instantiate all DoLiberateVariable functions. +#define EXPLICIT_DO_LIBERATE_VARIABLE_DECL(_do_check) \ +template SHARED_REQUIRES(Locks::mutator_lock_) \ +bool DoLiberateVariable<_do_check>(Thread* self, \ + const Instruction* inst, \ + size_t captured_variable_index, \ + ShadowFrame& shadow_frame); \ + +EXPLICIT_DO_LIBERATE_VARIABLE_DECL(false); // liberate-variable +EXPLICIT_DO_LIBERATE_VARIABLE_DECL(true); // liberate-variable +#undef EXPLICIT_DO_LIBERATE_LAMBDA_DECL } // namespace interpreter } // namespace art diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc index 72e2ba0e7b..9677d79de3 100644 --- a/runtime/interpreter/interpreter_goto_table_impl.cc +++ b/runtime/interpreter/interpreter_goto_table_impl.cc @@ -17,9 +17,13 @@ #if !defined(__clang__) // Clang 3.4 fails to build the goto interpreter implementation. + +#include "base/stl_util.h" // MakeUnique #include "interpreter_common.h" #include "safe_math.h" +#include <memory> // std::unique_ptr + namespace art { namespace interpreter { @@ -179,6 +183,9 @@ JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item, ShadowF } } + std::unique_ptr<lambda::ClosureBuilder> lambda_closure_builder; + size_t lambda_captured_variable_index = 0; + // Jump to first instruction. ADVANCE(0); UNREACHABLE_CODE_CHECK(); @@ -2412,7 +2419,20 @@ JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item, ShadowF HANDLE_INSTRUCTION_END(); HANDLE_EXPERIMENTAL_INSTRUCTION_START(CREATE_LAMBDA) { - bool success = DoCreateLambda<true>(self, shadow_frame, inst); + if (lambda_closure_builder == nullptr) { + // DoCreateLambda always needs a ClosureBuilder, even if it has 0 captured variables. + lambda_closure_builder = MakeUnique<lambda::ClosureBuilder>(); + } + + // TODO: these allocations should not leak, and the lambda method should not be local. + lambda::Closure* lambda_closure = + reinterpret_cast<lambda::Closure*>(alloca(lambda_closure_builder->GetSize())); + bool success = DoCreateLambda<do_access_check>(self, + inst, + /*inout*/shadow_frame, + /*inout*/lambda_closure_builder.get(), + /*inout*/lambda_closure); + lambda_closure_builder.reset(nullptr); // reset state of variables captured POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2); } HANDLE_EXPERIMENTAL_INSTRUCTION_END(); @@ -2429,6 +2449,31 @@ JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item, ShadowF } HANDLE_EXPERIMENTAL_INSTRUCTION_END(); + HANDLE_EXPERIMENTAL_INSTRUCTION_START(CAPTURE_VARIABLE) { + if (lambda_closure_builder == nullptr) { + lambda_closure_builder = MakeUnique<lambda::ClosureBuilder>(); + } + + bool success = DoCaptureVariable<do_access_check>(self, + inst, + /*inout*/shadow_frame, + /*inout*/lambda_closure_builder.get()); + + POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2); + } + HANDLE_EXPERIMENTAL_INSTRUCTION_END(); + + HANDLE_EXPERIMENTAL_INSTRUCTION_START(LIBERATE_VARIABLE) { + bool success = DoLiberateVariable<do_access_check>(self, + inst, + lambda_captured_variable_index, + /*inout*/shadow_frame); + // Temporarily only allow sequences of 'liberate-variable, liberate-variable, ...' + lambda_captured_variable_index++; + POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, 2); + } + HANDLE_EXPERIMENTAL_INSTRUCTION_END(); + HANDLE_INSTRUCTION_START(UNUSED_3E) UnexpectedOpcode(inst, shadow_frame); HANDLE_INSTRUCTION_END(); @@ -2465,14 +2510,6 @@ JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item, ShadowF UnexpectedOpcode(inst, shadow_frame); HANDLE_INSTRUCTION_END(); - HANDLE_INSTRUCTION_START(UNUSED_F5) - UnexpectedOpcode(inst, shadow_frame); - HANDLE_INSTRUCTION_END(); - - HANDLE_INSTRUCTION_START(UNUSED_F7) - UnexpectedOpcode(inst, shadow_frame); - HANDLE_INSTRUCTION_END(); - HANDLE_INSTRUCTION_START(UNUSED_FA) UnexpectedOpcode(inst, shadow_frame); HANDLE_INSTRUCTION_END(); diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc index b5cc11e070..083dfb5267 100644 --- a/runtime/interpreter/interpreter_switch_impl.cc +++ b/runtime/interpreter/interpreter_switch_impl.cc @@ -14,9 +14,12 @@ * limitations under the License. */ +#include "base/stl_util.h" // MakeUnique #include "interpreter_common.h" #include "safe_math.h" +#include <memory> // std::unique_ptr + namespace art { namespace interpreter { @@ -82,6 +85,11 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, const uint16_t* const insns = code_item->insns_; const Instruction* inst = Instruction::At(insns + dex_pc); uint16_t inst_data; + + // TODO: collapse capture-variable+create-lambda into one opcode, then we won't need + // to keep this live for the scope of the entire function call. + std::unique_ptr<lambda::ClosureBuilder> lambda_closure_builder; + size_t lambda_captured_variable_index = 0; while (true) { dex_pc = inst->GetDexPc(insns); shadow_frame.SetDexPC(dex_pc); @@ -2235,19 +2243,63 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx); break; } + case Instruction::CAPTURE_VARIABLE: { + if (!IsExperimentalInstructionEnabled(inst)) { + UnexpectedOpcode(inst, shadow_frame); + } + + if (lambda_closure_builder == nullptr) { + lambda_closure_builder = MakeUnique<lambda::ClosureBuilder>(); + } + + PREAMBLE(); + bool success = DoCaptureVariable<do_access_check>(self, + inst, + /*inout*/shadow_frame, + /*inout*/lambda_closure_builder.get()); + POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx); + break; + } case Instruction::CREATE_LAMBDA: { if (!IsExperimentalInstructionEnabled(inst)) { UnexpectedOpcode(inst, shadow_frame); } PREAMBLE(); - bool success = DoCreateLambda<do_access_check>(self, shadow_frame, inst); + + if (lambda_closure_builder == nullptr) { + // DoCreateLambda always needs a ClosureBuilder, even if it has 0 captured variables. + lambda_closure_builder = MakeUnique<lambda::ClosureBuilder>(); + } + + // TODO: these allocations should not leak, and the lambda method should not be local. + lambda::Closure* lambda_closure = + reinterpret_cast<lambda::Closure*>(alloca(lambda_closure_builder->GetSize())); + bool success = DoCreateLambda<do_access_check>(self, + inst, + /*inout*/shadow_frame, + /*inout*/lambda_closure_builder.get(), + /*inout*/lambda_closure); + lambda_closure_builder.reset(nullptr); // reset state of variables captured + POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx); + break; + } + case Instruction::LIBERATE_VARIABLE: { + if (!IsExperimentalInstructionEnabled(inst)) { + UnexpectedOpcode(inst, shadow_frame); + } + + PREAMBLE(); + bool success = DoLiberateVariable<do_access_check>(self, + inst, + lambda_captured_variable_index, + /*inout*/shadow_frame); + // Temporarily only allow sequences of 'liberate-variable, liberate-variable, ...' + lambda_captured_variable_index++; POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_2xx); break; } - case Instruction::UNUSED_F4: - case Instruction::UNUSED_F5: - case Instruction::UNUSED_F7: { + case Instruction::UNUSED_F4: { if (!IsExperimentalInstructionEnabled(inst)) { UnexpectedOpcode(inst, shadow_frame); } diff --git a/runtime/java_vm_ext.cc b/runtime/java_vm_ext.cc index 531e03926a..b5e28e9314 100644 --- a/runtime/java_vm_ext.cc +++ b/runtime/java_vm_ext.cc @@ -60,7 +60,7 @@ class SharedLibrary { : path_(path), handle_(handle), needs_native_bridge_(false), - class_loader_(env->NewGlobalRef(class_loader)), + class_loader_(env->NewWeakGlobalRef(class_loader)), jni_on_load_lock_("JNI_OnLoad lock"), jni_on_load_cond_("JNI_OnLoad condition variable", jni_on_load_lock_), jni_on_load_thread_id_(self->GetThreadId()), @@ -70,11 +70,11 @@ class SharedLibrary { ~SharedLibrary() { Thread* self = Thread::Current(); if (self != nullptr) { - self->GetJniEnv()->DeleteGlobalRef(class_loader_); + self->GetJniEnv()->DeleteWeakGlobalRef(class_loader_); } } - jobject GetClassLoader() const { + jweak GetClassLoader() const { return class_loader_; } @@ -131,7 +131,13 @@ class SharedLibrary { return needs_native_bridge_; } - void* FindSymbol(const std::string& symbol_name) { + void* FindSymbol(const std::string& symbol_name, const char* shorty = nullptr) { + return NeedsNativeBridge() + ? FindSymbolWithNativeBridge(symbol_name.c_str(), shorty) + : FindSymbolWithoutNativeBridge(symbol_name.c_str()); + } + + void* FindSymbolWithoutNativeBridge(const std::string& symbol_name) { CHECK(!NeedsNativeBridge()); return dlsym(handle_, symbol_name.c_str()); @@ -160,9 +166,9 @@ class SharedLibrary { // True if a native bridge is required. bool needs_native_bridge_; - // The ClassLoader this library is associated with, a global JNI reference that is + // The ClassLoader this library is associated with, a weak global JNI reference that is // created/deleted with the scope of the library. - const jobject class_loader_; + const jweak class_loader_; // Guards remaining items. Mutex jni_on_load_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; @@ -184,7 +190,10 @@ class Libraries { STLDeleteValues(&libraries_); } - void Dump(std::ostream& os) const { + // NO_THREAD_SAFETY_ANALYSIS since this may be called from Dumpable. Dumpable can't be annotated + // properly due to the template. The caller should be holding the jni_libraries_lock_. + void Dump(std::ostream& os) const NO_THREAD_SAFETY_ANALYSIS { + Locks::jni_libraries_lock_->AssertHeld(Thread::Current()); bool first = true; for (const auto& library : libraries_) { if (!first) { @@ -195,16 +204,17 @@ class Libraries { } } - size_t size() const { + size_t size() const REQUIRES(Locks::jni_libraries_lock_) { return libraries_.size(); } - SharedLibrary* Get(const std::string& path) { + SharedLibrary* Get(const std::string& path) REQUIRES(Locks::jni_libraries_lock_) { auto it = libraries_.find(path); return (it == libraries_.end()) ? nullptr : it->second; } - void Put(const std::string& path, SharedLibrary* library) { + void Put(const std::string& path, SharedLibrary* library) + REQUIRES(Locks::jni_libraries_lock_) { libraries_.Put(path, library); } @@ -217,24 +227,18 @@ class Libraries { const mirror::ClassLoader* declaring_class_loader = m->GetDeclaringClass()->GetClassLoader(); ScopedObjectAccessUnchecked soa(Thread::Current()); for (const auto& lib : libraries_) { - SharedLibrary* library = lib.second; + SharedLibrary* const library = lib.second; if (soa.Decode<mirror::ClassLoader*>(library->GetClassLoader()) != declaring_class_loader) { // We only search libraries loaded by the appropriate ClassLoader. continue; } // Try the short name then the long name... - void* fn; - if (library->NeedsNativeBridge()) { - const char* shorty = m->GetShorty(); - fn = library->FindSymbolWithNativeBridge(jni_short_name, shorty); - if (fn == nullptr) { - fn = library->FindSymbolWithNativeBridge(jni_long_name, shorty); - } - } else { - fn = library->FindSymbol(jni_short_name); - if (fn == nullptr) { - fn = library->FindSymbol(jni_long_name); - } + const char* shorty = library->NeedsNativeBridge() + ? m->GetShorty() + : nullptr; + void* fn = library->FindSymbol(jni_short_name, shorty); + if (fn == nullptr) { + fn = library->FindSymbol(jni_long_name, shorty); } if (fn != nullptr) { VLOG(jni) << "[Found native code for " << PrettyMethod(m) @@ -249,11 +253,50 @@ class Libraries { return nullptr; } + // Unload native libraries with cleared class loaders. + void UnloadNativeLibraries() + REQUIRES(!Locks::jni_libraries_lock_) + SHARED_REQUIRES(Locks::mutator_lock_) { + ScopedObjectAccessUnchecked soa(Thread::Current()); + typedef void (*JNI_OnUnloadFn)(JavaVM*, void*); + std::vector<JNI_OnUnloadFn> unload_functions; + { + MutexLock mu(soa.Self(), *Locks::jni_libraries_lock_); + for (auto it = libraries_.begin(); it != libraries_.end(); ) { + SharedLibrary* const library = it->second; + // If class loader is null then it was unloaded, call JNI_OnUnload. + const jweak class_loader = library->GetClassLoader(); + // If class_loader is a null jobject then it is the boot class loader. We should not unload + // the native libraries of the boot class loader. + if (class_loader != nullptr && + soa.Decode<mirror::ClassLoader*>(class_loader) == nullptr) { + void* const sym = library->FindSymbol("JNI_OnUnload", nullptr); + if (sym == nullptr) { + VLOG(jni) << "[No JNI_OnUnload found in \"" << library->GetPath() << "\"]"; + } else { + VLOG(jni) << "[JNI_OnUnload found for \"" << library->GetPath() << "\"]"; + JNI_OnUnloadFn jni_on_unload = reinterpret_cast<JNI_OnUnloadFn>(sym); + unload_functions.push_back(jni_on_unload); + } + delete library; + it = libraries_.erase(it); + } else { + ++it; + } + } + } + // Do this without holding the jni libraries lock to prevent possible deadlocks. + for (JNI_OnUnloadFn fn : unload_functions) { + VLOG(jni) << "Calling JNI_OnUnload"; + (*fn)(soa.Vm(), nullptr); + } + } + private: - AllocationTrackingSafeMap<std::string, SharedLibrary*, kAllocatorTagJNILibraries> libraries_; + AllocationTrackingSafeMap<std::string, SharedLibrary*, kAllocatorTagJNILibraries> libraries_ + GUARDED_BY(Locks::jni_libraries_lock_); }; - class JII { public: static jint DestroyJavaVM(JavaVM* vm) { @@ -641,6 +684,10 @@ void JavaVMExt::DumpReferenceTables(std::ostream& os) { } } +void JavaVMExt::UnloadNativeLibraries() { + libraries_.get()->UnloadNativeLibraries(); +} + bool JavaVMExt::LoadNativeLibrary(JNIEnv* env, const std::string& path, jobject class_loader, std::string* error_msg) { error_msg->clear(); @@ -738,10 +785,8 @@ bool JavaVMExt::LoadNativeLibrary(JNIEnv* env, const std::string& path, jobject void* sym; if (needs_native_bridge) { library->SetNeedsNativeBridge(); - sym = library->FindSymbolWithNativeBridge("JNI_OnLoad", nullptr); - } else { - sym = dlsym(handle, "JNI_OnLoad"); } + sym = library->FindSymbol("JNI_OnLoad", nullptr); if (sym == nullptr) { VLOG(jni) << "[No JNI_OnLoad found in \"" << path << "\"]"; was_successful = true; diff --git a/runtime/java_vm_ext.h b/runtime/java_vm_ext.h index b539bbdba3..c1fbdc0389 100644 --- a/runtime/java_vm_ext.h +++ b/runtime/java_vm_ext.h @@ -88,6 +88,11 @@ class JavaVMExt : public JavaVM { bool LoadNativeLibrary(JNIEnv* env, const std::string& path, jobject javaLoader, std::string* error_msg); + // Unload native libraries with cleared class loaders. + void UnloadNativeLibraries() + REQUIRES(!Locks::jni_libraries_lock_) + SHARED_REQUIRES(Locks::mutator_lock_); + /** * Returns a pointer to the code for the native method 'm', found * using dlsym(3) on every native library that's been loaded so far. @@ -184,7 +189,9 @@ class JavaVMExt : public JavaVM { // Not guarded by globals_lock since we sometimes use SynchronizedGet in Thread::DecodeJObject. IndirectReferenceTable globals_; - std::unique_ptr<Libraries> libraries_ GUARDED_BY(Locks::jni_libraries_lock_); + // No lock annotation since UnloadNativeLibraries is called on libraries_ but locks the + // jni_libraries_lock_ internally. + std::unique_ptr<Libraries> libraries_; // Used by -Xcheck:jni. const JNIInvokeInterface* const unchecked_functions_; diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h index 643bc23da3..e73ba82278 100644 --- a/runtime/jit/jit.h +++ b/runtime/jit/jit.h @@ -67,6 +67,9 @@ class Jit { void DumpInfo(std::ostream& os); // Add a timing logger to cumulative_timings_. void AddTimingLogger(const TimingLogger& logger); + JitInstrumentationCache* GetInstrumentationCache() const { + return instrumentation_cache_.get(); + } private: Jit(); diff --git a/runtime/jit/jit_code_cache_test.cc b/runtime/jit/jit_code_cache_test.cc index a6cbb710af..c76dc1110a 100644 --- a/runtime/jit/jit_code_cache_test.cc +++ b/runtime/jit/jit_code_cache_test.cc @@ -49,8 +49,11 @@ TEST_F(JitCodeCacheTest, TestCoverage) { ASSERT_TRUE(reserved_code != nullptr); ASSERT_TRUE(code_cache->ContainsCodePtr(reserved_code)); ASSERT_EQ(code_cache->NumMethods(), 1u); - ClassLinker* const cl = Runtime::Current()->GetClassLinker(); - ArtMethod* method = &cl->AllocArtMethodArray(soa.Self(), 1)->At(0); + Runtime* const runtime = Runtime::Current(); + ClassLinker* const class_linker = runtime->GetClassLinker(); + ArtMethod* method = &class_linker->AllocArtMethodArray(soa.Self(), + runtime->GetLinearAlloc(), + 1)->At(0); ASSERT_FALSE(code_cache->ContainsMethod(method)); method->SetEntryPointFromQuickCompiledCode(reserved_code); ASSERT_TRUE(code_cache->ContainsMethod(method)); diff --git a/runtime/jit/jit_instrumentation.cc b/runtime/jit/jit_instrumentation.cc index d437dd5d56..e9c16c1aa4 100644 --- a/runtime/jit/jit_instrumentation.cc +++ b/runtime/jit/jit_instrumentation.cc @@ -24,11 +24,21 @@ namespace art { namespace jit { -class JitCompileTask : public Task { +class JitCompileTask FINAL : public Task { public: - explicit JitCompileTask(ArtMethod* method) : method_(method) {} + explicit JitCompileTask(ArtMethod* method) : method_(method) { + ScopedObjectAccess soa(Thread::Current()); + // Add a global ref to the class to prevent class unloading until compilation is done. + klass_ = soa.Vm()->AddGlobalRef(soa.Self(), method_->GetDeclaringClass()); + CHECK(klass_ != nullptr); + } + + ~JitCompileTask() { + ScopedObjectAccess soa(Thread::Current()); + soa.Vm()->DeleteGlobalRef(soa.Self(), klass_); + } - virtual void Run(Thread* self) OVERRIDE { + void Run(Thread* self) OVERRIDE { ScopedObjectAccess soa(self); VLOG(jit) << "JitCompileTask compiling method " << PrettyMethod(method_); if (!Runtime::Current()->GetJit()->CompileMethod(method_, self)) { @@ -36,12 +46,13 @@ class JitCompileTask : public Task { } } - virtual void Finalize() OVERRIDE { + void Finalize() OVERRIDE { delete this; } private: ArtMethod* const method_; + jobject klass_; DISALLOW_IMPLICIT_CONSTRUCTORS(JitCompileTask); }; @@ -104,5 +115,9 @@ void JitInstrumentationListener::InvokeVirtualOrInterface(Thread* thread, } } +void JitInstrumentationCache::WaitForCompilationToFinish(Thread* self) { + thread_pool_->Wait(self, false, false); +} + } // namespace jit } // namespace art diff --git a/runtime/jit/jit_instrumentation.h b/runtime/jit/jit_instrumentation.h index 6fdef6585d..9eb464b841 100644 --- a/runtime/jit/jit_instrumentation.h +++ b/runtime/jit/jit_instrumentation.h @@ -50,6 +50,8 @@ class JitInstrumentationCache { SHARED_REQUIRES(Locks::mutator_lock_); void CreateThreadPool(); void DeleteThreadPool(); + // Wait until there is no more pending compilation tasks. + void WaitForCompilationToFinish(Thread* self); private: size_t hot_method_threshold_; diff --git a/runtime/lambda/art_lambda_method.h b/runtime/lambda/art_lambda_method.h index 892d8c6f6b..ea13eb7af6 100644 --- a/runtime/lambda/art_lambda_method.h +++ b/runtime/lambda/art_lambda_method.h @@ -35,7 +35,7 @@ class ArtLambdaMethod { // (Ownership of strings is retained by the caller and the lifetime should exceed this class). ArtLambdaMethod(ArtMethod* target_method, const char* captured_variables_type_descriptor, - const char* captured_variables_shorty_, + const char* captured_variables_shorty, bool innate_lambda = true); // Get the target method for this lambda that would be used by the invoke-lambda dex instruction. diff --git a/runtime/lambda/box_table.cc b/runtime/lambda/box_table.cc index 26575fd995..8eef10bbad 100644 --- a/runtime/lambda/box_table.cc +++ b/runtime/lambda/box_table.cc @@ -18,6 +18,8 @@ #include "base/mutex.h" #include "common_throws.h" #include "gc_root-inl.h" +#include "lambda/closure.h" +#include "lambda/leaking_allocator.h" #include "mirror/method.h" #include "mirror/object-inl.h" #include "thread.h" @@ -26,11 +28,53 @@ namespace art { namespace lambda { +// Temporarily represent the lambda Closure as its raw bytes in an array. +// TODO: Generate a proxy class for the closure when boxing the first time. +using BoxedClosurePointerType = mirror::ByteArray*; + +static mirror::Class* GetBoxedClosureClass() SHARED_REQUIRES(Locks::mutator_lock_) { + return mirror::ByteArray::GetArrayClass(); +} + +namespace { + // Convenience functions to allocating/deleting box table copies of the closures. + struct ClosureAllocator { + // Deletes a Closure that was allocated through ::Allocate. + static void Delete(Closure* ptr) { + delete[] reinterpret_cast<char*>(ptr); + } + + // Returns a well-aligned pointer to a newly allocated Closure on the 'new' heap. + static Closure* Allocate(size_t size) { + DCHECK_GE(size, sizeof(Closure)); + + // TODO: Maybe point to the interior of the boxed closure object after we add proxy support? + Closure* closure = reinterpret_cast<Closure*>(new char[size]); + DCHECK_ALIGNED(closure, alignof(Closure)); + return closure; + } + }; +} // namespace BoxTable::BoxTable() : allow_new_weaks_(true), new_weaks_condition_("lambda box table allowed weaks", *Locks::lambda_table_lock_) {} +BoxTable::~BoxTable() { + // Free all the copies of our closures. + for (auto map_iterator = map_.begin(); map_iterator != map_.end(); ++map_iterator) { + std::pair<UnorderedMapKeyType, ValueType>& key_value_pair = *map_iterator; + + Closure* closure = key_value_pair.first; + + // Remove from the map first, so that it doesn't try to access dangling pointer. + map_iterator = map_.Erase(map_iterator); + + // Safe to delete, no dangling pointers. + ClosureAllocator::Delete(closure); + } +} + mirror::Object* BoxTable::BoxLambda(const ClosureType& closure) { Thread* self = Thread::Current(); @@ -58,22 +102,29 @@ mirror::Object* BoxTable::BoxLambda(const ClosureType& closure) { // Release the lambda table lock here, so that thread suspension is allowed. - // Convert the ArtMethod into a java.lang.reflect.Method which will serve + // Convert the Closure into a managed byte[] which will serve // as the temporary 'boxed' version of the lambda. This is good enough // to check all the basic object identities that a boxed lambda must retain. + // It's also good enough to contain all the captured primitive variables. // TODO: Boxing an innate lambda (i.e. made with create-lambda) should make a proxy class // TODO: Boxing a learned lambda (i.e. made with unbox-lambda) should return the original object - mirror::Method* method_as_object = - mirror::Method::CreateFromArtMethod(self, closure); + BoxedClosurePointerType closure_as_array_object = + mirror::ByteArray::Alloc(self, closure->GetSize()); + // There are no thread suspension points after this, so we don't need to put it into a handle. - if (UNLIKELY(method_as_object == nullptr)) { + if (UNLIKELY(closure_as_array_object == nullptr)) { // Most likely an OOM has occurred. CHECK(self->IsExceptionPending()); return nullptr; } + // Write the raw closure data into the byte[]. + closure->CopyTo(closure_as_array_object->GetRawData(sizeof(uint8_t), // component size + 0 /*index*/), // index + closure_as_array_object->GetLength()); + // The method has been successfully boxed into an object, now insert it into the hash map. { MutexLock mu(self, *Locks::lambda_table_lock_); @@ -87,38 +138,56 @@ mirror::Object* BoxTable::BoxLambda(const ClosureType& closure) { return value.Read(); } - // Otherwise we should insert it into the hash map in this thread. - map_.Insert(std::make_pair(closure, ValueType(method_as_object))); + // Otherwise we need to insert it into the hash map in this thread. + + // Make a copy for the box table to keep, in case the closure gets collected from the stack. + // TODO: GC may need to sweep for roots in the box table's copy of the closure. + Closure* closure_table_copy = ClosureAllocator::Allocate(closure->GetSize()); + closure->CopyTo(closure_table_copy, closure->GetSize()); + + // The closure_table_copy needs to be deleted by us manually when we erase it from the map. + + // Actually insert into the table. + map_.Insert({closure_table_copy, ValueType(closure_as_array_object)}); } - return method_as_object; + return closure_as_array_object; } bool BoxTable::UnboxLambda(mirror::Object* object, ClosureType* out_closure) { DCHECK(object != nullptr); *out_closure = nullptr; + Thread* self = Thread::Current(); + // Note that we do not need to access lambda_table_lock_ here // since we don't need to look at the map. mirror::Object* boxed_closure_object = object; - // Raise ClassCastException if object is not instanceof java.lang.reflect.Method - if (UNLIKELY(!boxed_closure_object->InstanceOf(mirror::Method::StaticClass()))) { - ThrowClassCastException(mirror::Method::StaticClass(), boxed_closure_object->GetClass()); + // Raise ClassCastException if object is not instanceof byte[] + if (UNLIKELY(!boxed_closure_object->InstanceOf(GetBoxedClosureClass()))) { + ThrowClassCastException(GetBoxedClosureClass(), boxed_closure_object->GetClass()); return false; } // TODO(iam): We must check that the closure object extends/implements the type - // specified in [type id]. This is not currently implemented since it's always a Method. + // specified in [type id]. This is not currently implemented since it's always a byte[]. // If we got this far, the inputs are valid. - // Write out the java.lang.reflect.Method's embedded ArtMethod* into the vreg target. - mirror::AbstractMethod* boxed_closure_as_method = - down_cast<mirror::AbstractMethod*>(boxed_closure_object); + // Shuffle the byte[] back into a raw closure, then allocate it, copy, and return it. + BoxedClosurePointerType boxed_closure_as_array = + down_cast<BoxedClosurePointerType>(boxed_closure_object); + + const int8_t* unaligned_interior_closure = boxed_closure_as_array->GetData(); - ArtMethod* unboxed_closure = boxed_closure_as_method->GetArtMethod(); - DCHECK(unboxed_closure != nullptr); + // Allocate a copy that can "escape" and copy the closure data into that. + Closure* unboxed_closure = + LeakingAllocator::MakeFlexibleInstance<Closure>(self, boxed_closure_as_array->GetLength()); + // TODO: don't just memcpy the closure, it's unsafe when we add references to the mix. + memcpy(unboxed_closure, unaligned_interior_closure, boxed_closure_as_array->GetLength()); + + DCHECK_EQ(unboxed_closure->GetSize(), static_cast<size_t>(boxed_closure_as_array->GetLength())); *out_closure = unboxed_closure; return true; @@ -127,7 +196,7 @@ bool BoxTable::UnboxLambda(mirror::Object* object, ClosureType* out_closure) { BoxTable::ValueType BoxTable::FindBoxedLambda(const ClosureType& closure) const { auto map_iterator = map_.Find(closure); if (map_iterator != map_.end()) { - const std::pair<ClosureType, ValueType>& key_value_pair = *map_iterator; + const std::pair<UnorderedMapKeyType, ValueType>& key_value_pair = *map_iterator; const ValueType& value = key_value_pair.second; DCHECK(!value.IsNull()); // Never store null boxes. @@ -157,7 +226,7 @@ void BoxTable::SweepWeakBoxedLambdas(IsMarkedVisitor* visitor) { */ std::vector<ClosureType> remove_list; for (auto map_iterator = map_.begin(); map_iterator != map_.end(); ) { - std::pair<ClosureType, ValueType>& key_value_pair = *map_iterator; + std::pair<UnorderedMapKeyType, ValueType>& key_value_pair = *map_iterator; const ValueType& old_value = key_value_pair.second; @@ -166,10 +235,15 @@ void BoxTable::SweepWeakBoxedLambdas(IsMarkedVisitor* visitor) { mirror::Object* new_value = visitor->IsMarked(old_value_raw); if (new_value == nullptr) { - const ClosureType& closure = key_value_pair.first; // The object has been swept away. + const ClosureType& closure = key_value_pair.first; + // Delete the entry from the map. - map_iterator = map_.Erase(map_.Find(closure)); + map_iterator = map_.Erase(map_iterator); + + // Clean up the memory by deleting the closure. + ClosureAllocator::Delete(closure); + } else { // The object has been moved. // Update the map. @@ -208,16 +282,33 @@ void BoxTable::BroadcastForNewWeakBoxedLambdas() { new_weaks_condition_.Broadcast(self); } -bool BoxTable::EqualsFn::operator()(const ClosureType& lhs, const ClosureType& rhs) const { +void BoxTable::EmptyFn::MakeEmpty(std::pair<UnorderedMapKeyType, ValueType>& item) const { + item.first = nullptr; + + Locks::mutator_lock_->AssertSharedHeld(Thread::Current()); + item.second = ValueType(); // Also clear the GC root. +} + +bool BoxTable::EmptyFn::IsEmpty(const std::pair<UnorderedMapKeyType, ValueType>& item) const { + return item.first == nullptr; +} + +bool BoxTable::EqualsFn::operator()(const UnorderedMapKeyType& lhs, + const UnorderedMapKeyType& rhs) const { // Nothing needs this right now, but leave this assertion for later when // we need to look at the references inside of the closure. - if (kIsDebugBuild) { - Locks::mutator_lock_->AssertSharedHeld(Thread::Current()); - } + Locks::mutator_lock_->AssertSharedHeld(Thread::Current()); + + return lhs->ReferenceEquals(rhs); +} + +size_t BoxTable::HashFn::operator()(const UnorderedMapKeyType& key) const { + const lambda::Closure* closure = key; + DCHECK_ALIGNED(closure, alignof(lambda::Closure)); - // TODO: Need rework to use read barriers once closures have references inside of them that can - // move. Until then, it's safe to just compare the data inside of it directly. - return lhs == rhs; + // Need to hold mutator_lock_ before calling into Closure::GetHashCode. + Locks::mutator_lock_->AssertSharedHeld(Thread::Current()); + return closure->GetHashCode(); } } // namespace lambda diff --git a/runtime/lambda/box_table.h b/runtime/lambda/box_table.h index 9ffda6658f..adb733271e 100644 --- a/runtime/lambda/box_table.h +++ b/runtime/lambda/box_table.h @@ -34,6 +34,7 @@ class Object; // forward declaration } // namespace mirror namespace lambda { +struct Closure; // forward declaration /* * Store a table of boxed lambdas. This is required to maintain object referential equality @@ -44,7 +45,7 @@ namespace lambda { */ class BoxTable FINAL { public: - using ClosureType = art::ArtMethod*; + using ClosureType = art::lambda::Closure*; // Boxes a closure into an object. Returns null and throws an exception on failure. mirror::Object* BoxLambda(const ClosureType& closure) @@ -72,10 +73,9 @@ class BoxTable FINAL { REQUIRES(!Locks::lambda_table_lock_); BoxTable(); - ~BoxTable() = default; + ~BoxTable(); private: - // FIXME: This needs to be a GcRoot. // Explanation: // - After all threads are suspended (exclusive mutator lock), // the concurrent-copying GC can move objects from the "from" space to the "to" space. @@ -97,30 +97,30 @@ class BoxTable FINAL { void BlockUntilWeaksAllowed() SHARED_REQUIRES(Locks::lambda_table_lock_); + // Wrap the Closure into a unique_ptr so that the HashMap can delete its memory automatically. + using UnorderedMapKeyType = ClosureType; + // EmptyFn implementation for art::HashMap struct EmptyFn { - void MakeEmpty(std::pair<ClosureType, ValueType>& item) const { - item.first = nullptr; - } - bool IsEmpty(const std::pair<ClosureType, ValueType>& item) const { - return item.first == nullptr; - } + void MakeEmpty(std::pair<UnorderedMapKeyType, ValueType>& item) const + NO_THREAD_SAFETY_ANALYSIS; // SHARED_REQUIRES(Locks::mutator_lock_) + + bool IsEmpty(const std::pair<UnorderedMapKeyType, ValueType>& item) const; }; // HashFn implementation for art::HashMap struct HashFn { - size_t operator()(const ClosureType& key) const { - // TODO(iam): Rewrite hash function when ClosureType is no longer an ArtMethod* - return static_cast<size_t>(reinterpret_cast<uintptr_t>(key)); - } + size_t operator()(const UnorderedMapKeyType& key) const + NO_THREAD_SAFETY_ANALYSIS; // SHARED_REQUIRES(Locks::mutator_lock_) }; // EqualsFn implementation for art::HashMap struct EqualsFn { - bool operator()(const ClosureType& lhs, const ClosureType& rhs) const; + bool operator()(const UnorderedMapKeyType& lhs, const UnorderedMapKeyType& rhs) const + NO_THREAD_SAFETY_ANALYSIS; // SHARED_REQUIRES(Locks::mutator_lock_) }; - using UnorderedMap = art::HashMap<ClosureType, + using UnorderedMap = art::HashMap<UnorderedMapKeyType, ValueType, EmptyFn, HashFn, diff --git a/runtime/lambda/closure.cc b/runtime/lambda/closure.cc index 95a17c660c..179e4ee7f2 100644 --- a/runtime/lambda/closure.cc +++ b/runtime/lambda/closure.cc @@ -124,6 +124,55 @@ void Closure::CopyTo(void* target, size_t target_size) const { memcpy(target, this, GetSize()); } +ArtMethod* Closure::GetTargetMethod() const { + return const_cast<ArtMethod*>(lambda_info_->GetArtMethod()); +} + +uint32_t Closure::GetHashCode() const { + // Start with a non-zero constant, a prime number. + uint32_t result = 17; + + // Include the hash with the ArtMethod. + { + uintptr_t method = reinterpret_cast<uintptr_t>(GetTargetMethod()); + result = 31 * result + Low32Bits(method); + if (sizeof(method) == sizeof(uint64_t)) { + result = 31 * result + High32Bits(method); + } + } + + // Include a hash for each captured variable. + for (size_t i = 0; i < GetCapturedVariablesSize(); ++i) { + // TODO: not safe for GC-able values since the address can move and the hash code would change. + uint8_t captured_variable_raw_value; + CopyUnsafeAtOffset<uint8_t>(i, /*out*/&captured_variable_raw_value); // NOLINT: [whitespace/comma] [3] + + result = 31 * result + captured_variable_raw_value; + } + + // TODO: Fix above loop to work for objects and lambdas. + static_assert(kClosureSupportsGarbageCollection == false, + "Need to update above loop to read the hash code from the " + "objects and lambdas recursively"); + + return result; +} + +bool Closure::ReferenceEquals(const Closure* other) const { + DCHECK(other != nullptr); + + // TODO: Need rework to use read barriers once closures have references inside of them that can + // move. Until then, it's safe to just compare the data inside of it directly. + static_assert(kClosureSupportsReferences == false, + "Unsafe to use memcmp in read barrier collector"); + + if (GetSize() != other->GetSize()) { + return false; + } + + return memcmp(this, other, GetSize()); +} + size_t Closure::GetNumberOfCapturedVariables() const { // TODO: refactor into art_lambda_method.h. Parsing should only be required here as a DCHECK. VariableInfo variable_info = diff --git a/runtime/lambda/closure.h b/runtime/lambda/closure.h index 60d117e9e2..31ff1944d2 100644 --- a/runtime/lambda/closure.h +++ b/runtime/lambda/closure.h @@ -49,6 +49,19 @@ struct PACKED(sizeof(ArtLambdaMethod*)) Closure { // The target_size must be at least as large as GetSize(). void CopyTo(void* target, size_t target_size) const; + // Get the target method, i.e. the method that will be dispatched into with invoke-lambda. + ArtMethod* GetTargetMethod() const; + + // Calculates the hash code. Value is recomputed each time. + uint32_t GetHashCode() const SHARED_REQUIRES(Locks::mutator_lock_); + + // Is this the same closure as other? e.g. same target method, same variables captured. + // + // Determines whether the two Closures are interchangeable instances. + // Does *not* call Object#equals recursively. If two Closures compare ReferenceEquals true that + // means that they are interchangeable values (usually for the purpose of boxing/unboxing). + bool ReferenceEquals(const Closure* other) const SHARED_REQUIRES(Locks::mutator_lock_); + // How many variables were captured? size_t GetNumberOfCapturedVariables() const; diff --git a/runtime/lambda/closure_builder-inl.h b/runtime/lambda/closure_builder-inl.h index 41a803baf2..3cec21f3ba 100644 --- a/runtime/lambda/closure_builder-inl.h +++ b/runtime/lambda/closure_builder-inl.h @@ -35,6 +35,8 @@ void ClosureBuilder::CaptureVariablePrimitive(T value) { values_.push_back(value_storage); size_ += sizeof(T); + + shorty_types_ += kShortyType; } } // namespace lambda diff --git a/runtime/lambda/closure_builder.cc b/runtime/lambda/closure_builder.cc index 9c37db8fcc..739e965238 100644 --- a/runtime/lambda/closure_builder.cc +++ b/runtime/lambda/closure_builder.cc @@ -64,6 +64,8 @@ void ClosureBuilder::CaptureVariableObject(mirror::Object* object) { UNIMPLEMENTED(FATAL) << "can't yet safely capture objects with read barrier"; } } + + shorty_types_ += ShortyFieldType::kObject; } void ClosureBuilder::CaptureVariableLambda(Closure* closure) { @@ -78,6 +80,8 @@ void ClosureBuilder::CaptureVariableLambda(Closure* closure) { // A closure may be sized dynamically, so always query it for the true size. size_ += closure->GetSize(); + + shorty_types_ += ShortyFieldType::kLambda; } size_t ClosureBuilder::GetSize() const { @@ -85,9 +89,15 @@ size_t ClosureBuilder::GetSize() const { } size_t ClosureBuilder::GetCaptureCount() const { + DCHECK_EQ(values_.size(), shorty_types_.size()); return values_.size(); } +const std::string& ClosureBuilder::GetCapturedVariableShortyTypes() const { + DCHECK_EQ(values_.size(), shorty_types_.size()); + return shorty_types_; +} + Closure* ClosureBuilder::CreateInPlace(void* memory, ArtLambdaMethod* target_method) const { DCHECK(memory != nullptr); DCHECK(target_method != nullptr); @@ -138,11 +148,14 @@ size_t ClosureBuilder::WriteValues(ArtLambdaMethod* target_method, size_t variables_size) const { size_t total_size = header_size; const char* shorty_types = target_method->GetCapturedVariablesShortyTypeDescriptor(); + DCHECK_STREQ(shorty_types, shorty_types_.c_str()); size_t variables_offset = 0; size_t remaining_size = variables_size; const size_t shorty_count = target_method->GetNumberOfCapturedVariables(); + DCHECK_EQ(shorty_count, GetCaptureCount()); + for (size_t i = 0; i < shorty_count; ++i) { ShortyFieldType shorty{shorty_types[i]}; // NOLINT [readability/braces] [4] diff --git a/runtime/lambda/closure_builder.h b/runtime/lambda/closure_builder.h index 542e12afaa..23eb484529 100644 --- a/runtime/lambda/closure_builder.h +++ b/runtime/lambda/closure_builder.h @@ -40,13 +40,12 @@ class ArtLambdaMethod; // forward declaration // // The mutator lock must be held for the duration of the lifetime of this object, // since it needs to temporarily store heap references into an internal list. -class ClosureBuilder : ValueObject { +class ClosureBuilder { public: using ShortyTypeEnum = decltype(ShortyFieldType::kByte); - // Mark this primitive value to be captured as the specified type. - template <typename T, ShortyTypeEnum kShortyType> + template <typename T, ShortyTypeEnum kShortyType = ShortyFieldTypeSelectEnum<T>::value> void CaptureVariablePrimitive(T value); // Mark this object reference to be captured. @@ -63,6 +62,9 @@ class ClosureBuilder : ValueObject { // Returns how many variables have been captured so far. size_t GetCaptureCount() const; + // Get the list of captured variables' shorty field types. + const std::string& GetCapturedVariableShortyTypes() const; + // Creates a closure in-place and writes out the data into 'memory'. // Memory must be at least 'GetSize' bytes large. // All previously marked data to be captured is now written out. @@ -93,6 +95,7 @@ class ClosureBuilder : ValueObject { size_t size_ = kInitialSize; bool is_dynamic_size_ = false; std::vector<ShortyFieldTypeTraits::MaxType> values_; + std::string shorty_types_; }; } // namespace lambda diff --git a/runtime/lambda/leaking_allocator.cc b/runtime/lambda/leaking_allocator.cc new file mode 100644 index 0000000000..4910732a6c --- /dev/null +++ b/runtime/lambda/leaking_allocator.cc @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "lambda/leaking_allocator.h" +#include "linear_alloc.h" +#include "runtime.h" + +namespace art { +namespace lambda { + +void* LeakingAllocator::AllocateMemory(Thread* self, size_t byte_size) { + // TODO: use GetAllocatorForClassLoader to allocate lambda ArtMethod data. + return Runtime::Current()->GetLinearAlloc()->Alloc(self, byte_size); +} + +} // namespace lambda +} // namespace art diff --git a/runtime/lambda/leaking_allocator.h b/runtime/lambda/leaking_allocator.h new file mode 100644 index 0000000000..c3222d0485 --- /dev/null +++ b/runtime/lambda/leaking_allocator.h @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef ART_RUNTIME_LAMBDA_LEAKING_ALLOCATOR_H_ +#define ART_RUNTIME_LAMBDA_LEAKING_ALLOCATOR_H_ + +#include <utility> // std::forward + +namespace art { +class Thread; // forward declaration + +namespace lambda { + +// Temporary class to centralize all the leaking allocations. +// Allocations made through this class are never freed, but it is a placeholder +// that means that the calling code needs to be rewritten to properly: +// +// (a) Have a lifetime scoped to some other entity. +// (b) Not be allocated over and over again if it was already allocated once (immutable data). +// +// TODO: do all of the above a/b for each callsite, and delete this class. +class LeakingAllocator { + public: + // Allocate byte_size bytes worth of memory. Never freed. + static void* AllocateMemory(Thread* self, size_t byte_size); + + // Make a new instance of T, flexibly sized, in-place at newly allocated memory. Never freed. + template <typename T, typename... Args> + static T* MakeFlexibleInstance(Thread* self, size_t byte_size, Args&&... args) { + return new (AllocateMemory(self, byte_size)) T(std::forward<Args>(args)...); + } + + // Make a new instance of T in-place at newly allocated memory. Never freed. + template <typename T, typename... Args> + static T* MakeInstance(Thread* self, Args&&... args) { + return new (AllocateMemory(self, sizeof(T))) T(std::forward<Args>(args)...); + } +}; + +} // namespace lambda +} // namespace art + +#endif // ART_RUNTIME_LAMBDA_LEAKING_ALLOCATOR_H_ diff --git a/runtime/mirror/class_loader.h b/runtime/mirror/class_loader.h index f27b6155ce..c2a65d62e2 100644 --- a/runtime/mirror/class_loader.h +++ b/runtime/mirror/class_loader.h @@ -35,18 +35,31 @@ class MANAGED ClassLoader : public Object { static constexpr uint32_t InstanceSize() { return sizeof(ClassLoader); } + ClassLoader* GetParent() SHARED_REQUIRES(Locks::mutator_lock_) { return GetFieldObject<ClassLoader>(OFFSET_OF_OBJECT_MEMBER(ClassLoader, parent_)); } + ClassTable* GetClassTable() SHARED_REQUIRES(Locks::mutator_lock_) { return reinterpret_cast<ClassTable*>( GetField64(OFFSET_OF_OBJECT_MEMBER(ClassLoader, class_table_))); } + void SetClassTable(ClassTable* class_table) SHARED_REQUIRES(Locks::mutator_lock_) { SetField64<false>(OFFSET_OF_OBJECT_MEMBER(ClassLoader, class_table_), reinterpret_cast<uint64_t>(class_table)); } + LinearAlloc* GetAllocator() SHARED_REQUIRES(Locks::mutator_lock_) { + return reinterpret_cast<LinearAlloc*>( + GetField64(OFFSET_OF_OBJECT_MEMBER(ClassLoader, allocator_))); + } + + void SetAllocator(LinearAlloc* allocator) SHARED_REQUIRES(Locks::mutator_lock_) { + SetField64<false>(OFFSET_OF_OBJECT_MEMBER(ClassLoader, allocator_), + reinterpret_cast<uint64_t>(allocator)); + } + private: // Visit instance fields of the class loader as well as its associated classes. // Null class loader is handled by ClassLinker::VisitClassRoots. @@ -61,6 +74,7 @@ class MANAGED ClassLoader : public Object { HeapReference<Object> proxyCache_; // Native pointer to class table, need to zero this out when image writing. uint32_t padding_ ATTRIBUTE_UNUSED; + uint64_t allocator_; uint64_t class_table_; friend struct art::ClassLoaderOffsets; // for verifying offset information diff --git a/runtime/mirror/throwable.cc b/runtime/mirror/throwable.cc index e8633def48..e215994fb7 100644 --- a/runtime/mirror/throwable.cc +++ b/runtime/mirror/throwable.cc @@ -71,18 +71,14 @@ bool Throwable::IsCheckedException() { int32_t Throwable::GetStackDepth() { Object* stack_state = GetStackState(); - if (stack_state == nullptr) { + if (stack_state == nullptr || !stack_state->IsObjectArray()) { return -1; } - if (!stack_state->IsIntArray() && !stack_state->IsLongArray()) { - return -1; - } - mirror::PointerArray* method_trace = down_cast<mirror::PointerArray*>(stack_state->AsArray()); - int32_t array_len = method_trace->GetLength(); - // The format is [method pointers][pcs] so the depth is half the length (see method - // BuildInternalStackTraceVisitor::Init). - CHECK_EQ(array_len % 2, 0); - return array_len / 2; + mirror::ObjectArray<mirror::Object>* const trace = stack_state->AsObjectArray<mirror::Object>(); + const int32_t array_len = trace->GetLength(); + DCHECK_GT(array_len, 0); + // See method BuildInternalStackTraceVisitor::Init for the format. + return array_len - 1; } std::string Throwable::Dump() { @@ -95,18 +91,22 @@ std::string Throwable::Dump() { result += "\n"; Object* stack_state = GetStackState(); // check stack state isn't missing or corrupt - if (stack_state != nullptr && - (stack_state->IsIntArray() || stack_state->IsLongArray())) { + if (stack_state != nullptr && stack_state->IsObjectArray()) { + mirror::ObjectArray<mirror::Object>* object_array = + stack_state->AsObjectArray<mirror::Object>(); // Decode the internal stack trace into the depth and method trace - // Format is [method pointers][pcs] - auto* method_trace = down_cast<mirror::PointerArray*>(stack_state->AsArray()); - auto array_len = method_trace->GetLength(); + // See method BuildInternalStackTraceVisitor::Init for the format. + DCHECK_GT(object_array->GetLength(), 0); + mirror::Object* methods_and_dex_pcs = object_array->Get(0); + DCHECK(methods_and_dex_pcs->IsIntArray() || methods_and_dex_pcs->IsLongArray()); + mirror::PointerArray* method_trace = down_cast<mirror::PointerArray*>(methods_and_dex_pcs); + const int32_t array_len = method_trace->GetLength(); CHECK_EQ(array_len % 2, 0); const auto depth = array_len / 2; if (depth == 0) { result += "(Throwable with empty stack trace)"; } else { - auto ptr_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize(); + const size_t ptr_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize(); for (int32_t i = 0; i < depth; ++i) { ArtMethod* method = method_trace->GetElementPtrSize<ArtMethod*>(i, ptr_size); uintptr_t dex_pc = method_trace->GetElementPtrSize<uintptr_t>(i + depth, ptr_size); diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc index 5c13e13f90..63f43cf3b2 100644 --- a/runtime/quick_exception_handler.cc +++ b/runtime/quick_exception_handler.cc @@ -20,6 +20,7 @@ #include "art_method-inl.h" #include "dex_instruction.h" #include "entrypoints/entrypoint_utils.h" +#include "entrypoints/quick/quick_entrypoints_enum.h" #include "entrypoints/runtime_asm_entrypoints.h" #include "handle_scope-inl.h" #include "mirror/class-inl.h" @@ -36,8 +37,9 @@ QuickExceptionHandler::QuickExceptionHandler(Thread* self, bool is_deoptimizatio : self_(self), context_(self->GetLongJumpContext()), is_deoptimization_(is_deoptimization), method_tracing_active_(is_deoptimization || Runtime::Current()->GetInstrumentation()->AreExitStubsInstalled()), - handler_quick_frame_(nullptr), handler_quick_frame_pc_(0), handler_method_(nullptr), - handler_dex_pc_(0), clear_exception_(false), handler_frame_depth_(kInvalidFrameDepth) { + handler_quick_frame_(nullptr), handler_quick_frame_pc_(0), handler_quick_arg0_(0), + handler_method_(nullptr), handler_dex_pc_(0), clear_exception_(false), + handler_frame_depth_(kInvalidFrameDepth) { } // Finds catch handler. @@ -260,19 +262,25 @@ void QuickExceptionHandler::SetCatchEnvironmentForOptimizedHandler(StackVisitor* // Prepares deoptimization. class DeoptimizeStackVisitor FINAL : public StackVisitor { public: - DeoptimizeStackVisitor(Thread* self, Context* context, QuickExceptionHandler* exception_handler) + DeoptimizeStackVisitor(Thread* self, + Context* context, + QuickExceptionHandler* exception_handler, + bool single_frame) SHARED_REQUIRES(Locks::mutator_lock_) : StackVisitor(self, context, StackVisitor::StackWalkKind::kIncludeInlinedFrames), exception_handler_(exception_handler), prev_shadow_frame_(nullptr), - stacked_shadow_frame_pushed_(false) { + stacked_shadow_frame_pushed_(false), + single_frame_deopt_(single_frame), + single_frame_done_(false) { } bool VisitFrame() OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) { exception_handler_->SetHandlerFrameDepth(GetFrameDepth()); ArtMethod* method = GetMethod(); - if (method == nullptr) { - // This is the upcall, we remember the frame and last pc so that we may long jump to them. + if (method == nullptr || single_frame_done_) { + // This is the upcall (or the next full frame in single-frame deopt), we remember the frame + // and last pc so that we may long jump to them. exception_handler_->SetHandlerQuickFramePc(GetCurrentQuickFramePc()); exception_handler_->SetHandlerQuickFrame(GetCurrentQuickFrame()); if (!stacked_shadow_frame_pushed_) { @@ -295,7 +303,13 @@ class DeoptimizeStackVisitor FINAL : public StackVisitor { CHECK_EQ(GetFrameDepth(), 1U); return true; } else { - return HandleDeoptimization(method); + HandleDeoptimization(method); + if (single_frame_deopt_ && !IsInInlinedFrame()) { + // Single-frame deopt ends at the first non-inlined frame and needs to store that method. + exception_handler_->SetHandlerQuickArg0(reinterpret_cast<uintptr_t>(method)); + single_frame_done_ = true; + } + return true; } } @@ -304,7 +318,7 @@ class DeoptimizeStackVisitor FINAL : public StackVisitor { return static_cast<VRegKind>(kinds.at(reg * 2)); } - bool HandleDeoptimization(ArtMethod* m) SHARED_REQUIRES(Locks::mutator_lock_) { + void HandleDeoptimization(ArtMethod* m) SHARED_REQUIRES(Locks::mutator_lock_) { const DexFile::CodeItem* code_item = m->GetCodeItem(); CHECK(code_item != nullptr) << "No code item for " << PrettyMethod(m); uint16_t num_regs = code_item->registers_size_; @@ -448,16 +462,20 @@ class DeoptimizeStackVisitor FINAL : public StackVisitor { // Will be popped after the long jump after DeoptimizeStack(), // right before interpreter::EnterInterpreterFromDeoptimize(). stacked_shadow_frame_pushed_ = true; - GetThread()->PushStackedShadowFrame(new_frame, - StackedShadowFrameType::kDeoptimizationShadowFrame); + GetThread()->PushStackedShadowFrame( + new_frame, + single_frame_deopt_ + ? StackedShadowFrameType::kSingleFrameDeoptimizationShadowFrame + : StackedShadowFrameType::kDeoptimizationShadowFrame); } prev_shadow_frame_ = new_frame; - return true; } QuickExceptionHandler* const exception_handler_; ShadowFrame* prev_shadow_frame_; bool stacked_shadow_frame_pushed_; + const bool single_frame_deopt_; + bool single_frame_done_; DISALLOW_COPY_AND_ASSIGN(DeoptimizeStackVisitor); }; @@ -468,13 +486,46 @@ void QuickExceptionHandler::DeoptimizeStack() { self_->DumpStack(LOG(INFO) << "Deoptimizing: "); } - DeoptimizeStackVisitor visitor(self_, context_, this); + DeoptimizeStackVisitor visitor(self_, context_, this, false); visitor.WalkStack(true); // Restore deoptimization exception self_->SetException(Thread::GetDeoptimizationException()); } +void QuickExceptionHandler::DeoptimizeSingleFrame() { + DCHECK(is_deoptimization_); + + if (VLOG_IS_ON(deopt) || kDebugExceptionDelivery) { + LOG(INFO) << "Single-frame deopting:"; + DumpFramesWithType(self_, true); + } + + DeoptimizeStackVisitor visitor(self_, context_, this, true); + visitor.WalkStack(true); + + // PC needs to be of the quick-to-interpreter bridge. + int32_t offset; + #ifdef __LP64__ + offset = GetThreadOffset<8>(kQuickQuickToInterpreterBridge).Int32Value(); + #else + offset = GetThreadOffset<4>(kQuickQuickToInterpreterBridge).Int32Value(); + #endif + handler_quick_frame_pc_ = *reinterpret_cast<uintptr_t*>( + reinterpret_cast<uint8_t*>(self_) + offset); +} + +void QuickExceptionHandler::DeoptimizeSingleFrameArchDependentFixup() { + // Architecture-dependent work. This is to get the LR right for x86 and x86-64. + + if (kRuntimeISA == InstructionSet::kX86 || kRuntimeISA == InstructionSet::kX86_64) { + // On x86, the return address is on the stack, so just reuse it. Otherwise we would have to + // change how longjump works. + handler_quick_frame_ = reinterpret_cast<ArtMethod**>( + reinterpret_cast<uintptr_t>(handler_quick_frame_) - sizeof(void*)); + } +} + // Unwinds all instrumentation stack frame prior to catch handler or upcall. class InstrumentationStackVisitor : public StackVisitor { public: @@ -529,15 +580,67 @@ void QuickExceptionHandler::UpdateInstrumentationStack() { } } -void QuickExceptionHandler::DoLongJump() { +void QuickExceptionHandler::DoLongJump(bool smash_caller_saves) { // Place context back on thread so it will be available when we continue. self_->ReleaseLongJumpContext(context_); context_->SetSP(reinterpret_cast<uintptr_t>(handler_quick_frame_)); CHECK_NE(handler_quick_frame_pc_, 0u); context_->SetPC(handler_quick_frame_pc_); - context_->SmashCallerSaves(); + context_->SetArg0(handler_quick_arg0_); + if (smash_caller_saves) { + context_->SmashCallerSaves(); + } context_->DoLongJump(); UNREACHABLE(); } +// Prints out methods with their type of frame. +class DumpFramesWithTypeStackVisitor FINAL : public StackVisitor { + public: + DumpFramesWithTypeStackVisitor(Thread* self, bool show_details = false) + SHARED_REQUIRES(Locks::mutator_lock_) + : StackVisitor(self, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames), + show_details_(show_details) {} + + bool VisitFrame() OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) { + ArtMethod* method = GetMethod(); + if (show_details_) { + LOG(INFO) << "|> pc = " << std::hex << GetCurrentQuickFramePc(); + LOG(INFO) << "|> addr = " << std::hex << reinterpret_cast<uintptr_t>(GetCurrentQuickFrame()); + if (GetCurrentQuickFrame() != nullptr && method != nullptr) { + LOG(INFO) << "|> ret = " << std::hex << GetReturnPc(); + } + } + if (method == nullptr) { + // Transition, do go on, we want to unwind over bridges, all the way. + if (show_details_) { + LOG(INFO) << "N <transition>"; + } + return true; + } else if (method->IsRuntimeMethod()) { + if (show_details_) { + LOG(INFO) << "R " << PrettyMethod(method, true); + } + return true; + } else { + bool is_shadow = GetCurrentShadowFrame() != nullptr; + LOG(INFO) << (is_shadow ? "S" : "Q") + << ((!is_shadow && IsInInlinedFrame()) ? "i" : " ") + << " " + << PrettyMethod(method, true); + return true; // Go on. + } + } + + private: + bool show_details_; + + DISALLOW_COPY_AND_ASSIGN(DumpFramesWithTypeStackVisitor); +}; + +void QuickExceptionHandler::DumpFramesWithType(Thread* self, bool details) { + DumpFramesWithTypeStackVisitor visitor(self, details); + visitor.WalkStack(true); +} + } // namespace art diff --git a/runtime/quick_exception_handler.h b/runtime/quick_exception_handler.h index 2e05c7e1e5..89d6a25128 100644 --- a/runtime/quick_exception_handler.h +++ b/runtime/quick_exception_handler.h @@ -49,6 +49,9 @@ class QuickExceptionHandler { // Deoptimize the stack to the upcall. For every compiled frame, we create a "copy" // shadow frame that will be executed with the interpreter. void DeoptimizeStack() SHARED_REQUIRES(Locks::mutator_lock_); + void DeoptimizeSingleFrame() SHARED_REQUIRES(Locks::mutator_lock_); + void DeoptimizeSingleFrameArchDependentFixup() SHARED_REQUIRES(Locks::mutator_lock_); + // Update the instrumentation stack by removing all methods that will be unwound // by the exception being thrown. void UpdateInstrumentationStack() SHARED_REQUIRES(Locks::mutator_lock_); @@ -58,7 +61,7 @@ class QuickExceptionHandler { SHARED_REQUIRES(Locks::mutator_lock_); // Long jump either to a catch handler or to the upcall. - NO_RETURN void DoLongJump() SHARED_REQUIRES(Locks::mutator_lock_); + NO_RETURN void DoLongJump(bool smash_caller_saves = true) SHARED_REQUIRES(Locks::mutator_lock_); void SetHandlerQuickFrame(ArtMethod** handler_quick_frame) { handler_quick_frame_ = handler_quick_frame; @@ -68,6 +71,10 @@ class QuickExceptionHandler { handler_quick_frame_pc_ = handler_quick_frame_pc; } + void SetHandlerQuickArg0(uintptr_t handler_quick_arg0) { + handler_quick_arg0_ = handler_quick_arg0; + } + ArtMethod* GetHandlerMethod() const { return handler_method_; } @@ -92,6 +99,11 @@ class QuickExceptionHandler { handler_frame_depth_ = frame_depth; } + // Walk the stack frames of the given thread, printing out non-runtime methods with their types + // of frames. Helps to verify that single-frame deopt really only deopted one frame. + static void DumpFramesWithType(Thread* self, bool details = false) + SHARED_REQUIRES(Locks::mutator_lock_); + private: Thread* const self_; Context* const context_; @@ -103,6 +115,8 @@ class QuickExceptionHandler { ArtMethod** handler_quick_frame_; // PC to branch to for the handler. uintptr_t handler_quick_frame_pc_; + // The value for argument 0. + uintptr_t handler_quick_arg0_; // The handler method to report to the debugger. ArtMethod* handler_method_; // The handler's dex PC, zero implies an uncaught exception. diff --git a/runtime/runtime.cc b/runtime/runtime.cc index 6b144cf48b..1f447d076b 100644 --- a/runtime/runtime.cc +++ b/runtime/runtime.cc @@ -274,9 +274,6 @@ Runtime::~Runtime() { VLOG(jit) << "Deleting jit"; jit_.reset(nullptr); } - linear_alloc_.reset(); - arena_pool_.reset(); - low_4gb_arena_pool_.reset(); // Shutdown the fault manager if it was initialized. fault_manager.Shutdown(); @@ -290,7 +287,13 @@ Runtime::~Runtime() { Thread::Shutdown(); QuasiAtomic::Shutdown(); verifier::MethodVerifier::Shutdown(); + + // Destroy allocators before shutting down the MemMap because they may use it. + linear_alloc_.reset(); + low_4gb_arena_pool_.reset(); + arena_pool_.reset(); MemMap::Shutdown(); + // TODO: acquire a static mutex on Runtime to avoid racing. CHECK(instance_ == nullptr || instance_ == this); instance_ = nullptr; @@ -941,13 +944,11 @@ bool Runtime::Init(const RuntimeOptions& raw_options, bool ignore_unrecognized) // can't be trimmed as easily. const bool use_malloc = IsAotCompiler(); arena_pool_.reset(new ArenaPool(use_malloc, false)); - if (IsCompiler() && Is64BitInstructionSet(kRuntimeISA)) { + if (IsAotCompiler() && Is64BitInstructionSet(kRuntimeISA)) { // 4gb, no malloc. Explanation in header. low_4gb_arena_pool_.reset(new ArenaPool(false, true)); - linear_alloc_.reset(new LinearAlloc(low_4gb_arena_pool_.get())); - } else { - linear_alloc_.reset(new LinearAlloc(arena_pool_.get())); } + linear_alloc_.reset(CreateLinearAlloc()); BlockSignals(); InitPlatformSignalHandlers(); @@ -1788,4 +1789,13 @@ bool Runtime::IsVerificationSoftFail() const { return verify_ == verifier::VerifyMode::kSoftFail; } +LinearAlloc* Runtime::CreateLinearAlloc() { + // For 64 bit compilers, it needs to be in low 4GB in the case where we are cross compiling for a + // 32 bit target. In this case, we have 32 bit pointers in the dex cache arrays which can't hold + // when we have 64 bit ArtMethod pointers. + return (IsAotCompiler() && Is64BitInstructionSet(kRuntimeISA)) + ? new LinearAlloc(low_4gb_arena_pool_.get()) + : new LinearAlloc(arena_pool_.get()); +} + } // namespace art diff --git a/runtime/runtime.h b/runtime/runtime.h index a35eac1af8..6154c34ec5 100644 --- a/runtime/runtime.h +++ b/runtime/runtime.h @@ -570,6 +570,9 @@ class Runtime { // Called from class linker. void SetSentinel(mirror::Object* sentinel) SHARED_REQUIRES(Locks::mutator_lock_); + // Create a normal LinearAlloc or low 4gb version if we are 64 bit AOT compiler. + LinearAlloc* CreateLinearAlloc(); + private: static void InitPlatformSignalHandlers(); diff --git a/runtime/safe_map.h b/runtime/safe_map.h index 402c7e9cb5..7ac17b60d6 100644 --- a/runtime/safe_map.h +++ b/runtime/safe_map.h @@ -92,6 +92,11 @@ class SafeMap { DCHECK(result.second); // Check we didn't accidentally overwrite an existing value. return result.first; } + iterator Put(const K& k, const V&& v) { + std::pair<iterator, bool> result = map_.emplace(k, std::move(v)); + DCHECK(result.second); // Check we didn't accidentally overwrite an existing value. + return result.first; + } // Used to insert a new mapping at a known position for better performance. iterator PutBefore(iterator pos, const K& k, const V& v) { @@ -100,16 +105,23 @@ class SafeMap { DCHECK(pos == map_.begin() || map_.key_comp()((--iterator(pos))->first, k)); return map_.emplace_hint(pos, k, v); } + iterator PutBefore(iterator pos, const K& k, const V&& v) { + // Check that we're using the correct position and the key is not in the map. + DCHECK(pos == map_.end() || map_.key_comp()(k, pos->first)); + DCHECK(pos == map_.begin() || map_.key_comp()((--iterator(pos))->first, k)); + return map_.emplace_hint(pos, k, std::move(v)); + } // Used to insert a new mapping or overwrite an existing mapping. Note that if the value type // of this container is a pointer, any overwritten pointer will be lost and if this container - // was the owner, you have a leak. - void Overwrite(const K& k, const V& v) { + // was the owner, you have a leak. Returns iterator pointing to the new or overwritten entry. + iterator Overwrite(const K& k, const V& v) { std::pair<iterator, bool> result = map_.insert(std::make_pair(k, v)); if (!result.second) { // Already there - update the value for the existing key result.first->second = v; } + return result.first; } bool Equals(const Self& rhs) const { diff --git a/runtime/stack.cc b/runtime/stack.cc index d739743151..7f72f8ab61 100644 --- a/runtime/stack.cc +++ b/runtime/stack.cc @@ -840,23 +840,30 @@ void StackVisitor::SanityCheckFrame() const { } else { CHECK(declaring_class == nullptr); } - auto* runtime = Runtime::Current(); - auto* la = runtime->GetLinearAlloc(); - if (!la->Contains(method)) { - // Check image space. - bool in_image = false; - for (auto& space : runtime->GetHeap()->GetContinuousSpaces()) { - if (space->IsImageSpace()) { - auto* image_space = space->AsImageSpace(); - const auto& header = image_space->GetImageHeader(); - const auto* methods = &header.GetMethodsSection(); - if (methods->Contains(reinterpret_cast<const uint8_t*>(method) - image_space->Begin())) { - in_image = true; - break; + Runtime* const runtime = Runtime::Current(); + LinearAlloc* const linear_alloc = runtime->GetLinearAlloc(); + if (!linear_alloc->Contains(method)) { + // Check class linker linear allocs. + mirror::Class* klass = method->GetDeclaringClass(); + LinearAlloc* const class_linear_alloc = (klass != nullptr) + ? ClassLinker::GetAllocatorForClassLoader(klass->GetClassLoader()) + : linear_alloc; + if (!class_linear_alloc->Contains(method)) { + // Check image space. + bool in_image = false; + for (auto& space : runtime->GetHeap()->GetContinuousSpaces()) { + if (space->IsImageSpace()) { + auto* image_space = space->AsImageSpace(); + const auto& header = image_space->GetImageHeader(); + const auto* methods = &header.GetMethodsSection(); + if (methods->Contains(reinterpret_cast<const uint8_t*>(method) - image_space->Begin())) { + in_image = true; + break; + } } } + CHECK(in_image) << PrettyMethod(method) << " not in linear alloc or image"; } - CHECK(in_image) << PrettyMethod(method) << " not in linear alloc or image"; } if (cur_quick_frame_ != nullptr) { method->AssertPcIsWithinQuickCode(cur_quick_frame_pc_); diff --git a/runtime/stack.h b/runtime/stack.h index b805239836..292c745090 100644 --- a/runtime/stack.h +++ b/runtime/stack.h @@ -62,6 +62,10 @@ template<class MirrorType> class MANAGED StackReference : public mirror::CompressedReference<MirrorType> { }; +// Forward declaration. Just calls the destructor. +struct ShadowFrameDeleter; +using ShadowFrameAllocaUniquePtr = std::unique_ptr<ShadowFrame, ShadowFrameDeleter>; + // ShadowFrame has 2 possible layouts: // - interpreter - separate VRegs and reference arrays. References are in the reference array. // - JNI - just VRegs, but where every VReg holds a reference. @@ -77,21 +81,26 @@ class ShadowFrame { static ShadowFrame* CreateDeoptimizedFrame(uint32_t num_vregs, ShadowFrame* link, ArtMethod* method, uint32_t dex_pc) { uint8_t* memory = new uint8_t[ComputeSize(num_vregs)]; - return Create(num_vregs, link, method, dex_pc, memory); + return CreateShadowFrameImpl(num_vregs, link, method, dex_pc, memory); } // Delete a ShadowFrame allocated on the heap for deoptimization. static void DeleteDeoptimizedFrame(ShadowFrame* sf) { + sf->~ShadowFrame(); // Explicitly destruct. uint8_t* memory = reinterpret_cast<uint8_t*>(sf); delete[] memory; } - // Create ShadowFrame for interpreter using provided memory. - static ShadowFrame* Create(uint32_t num_vregs, ShadowFrame* link, - ArtMethod* method, uint32_t dex_pc, void* memory) { - ShadowFrame* sf = new (memory) ShadowFrame(num_vregs, link, method, dex_pc, true); - return sf; - } + // Create a shadow frame in a fresh alloca. This needs to be in the context of the caller. + // Inlining doesn't work, the compiler will still undo the alloca. So this needs to be a macro. +#define CREATE_SHADOW_FRAME(num_vregs, link, method, dex_pc) ({ \ + size_t frame_size = ShadowFrame::ComputeSize(num_vregs); \ + void* alloca_mem = alloca(frame_size); \ + ShadowFrameAllocaUniquePtr( \ + ShadowFrame::CreateShadowFrameImpl((num_vregs), (link), (method), (dex_pc), \ + (alloca_mem))); \ + }) + ~ShadowFrame() {} // TODO(iam): Clean references array up since they're always there, @@ -283,6 +292,15 @@ class ShadowFrame { return OFFSETOF_MEMBER(ShadowFrame, vregs_); } + // Create ShadowFrame for interpreter using provided memory. + static ShadowFrame* CreateShadowFrameImpl(uint32_t num_vregs, + ShadowFrame* link, + ArtMethod* method, + uint32_t dex_pc, + void* memory) { + return new (memory) ShadowFrame(num_vregs, link, method, dex_pc, true); + } + private: ShadowFrame(uint32_t num_vregs, ShadowFrame* link, ArtMethod* method, uint32_t dex_pc, bool has_reference_array) @@ -326,6 +344,14 @@ class ShadowFrame { DISALLOW_IMPLICIT_CONSTRUCTORS(ShadowFrame); }; +struct ShadowFrameDeleter { + inline void operator()(ShadowFrame* frame) { + if (frame != nullptr) { + frame->~ShadowFrame(); + } + } +}; + class JavaFrameRootInfo : public RootInfo { public: JavaFrameRootInfo(uint32_t thread_id, const StackVisitor* stack_visitor, size_t vreg) diff --git a/runtime/thread.cc b/runtime/thread.cc index 5bf895ef80..65f71efc06 100644 --- a/runtime/thread.cc +++ b/runtime/thread.cc @@ -250,10 +250,16 @@ void Thread::PushStackedShadowFrame(ShadowFrame* sf, StackedShadowFrameType type tlsPtr_.stacked_shadow_frame_record = record; } -ShadowFrame* Thread::PopStackedShadowFrame(StackedShadowFrameType type) { +ShadowFrame* Thread::PopStackedShadowFrame(StackedShadowFrameType type, bool must_be_present) { StackedShadowFrameRecord* record = tlsPtr_.stacked_shadow_frame_record; - DCHECK(record != nullptr); - DCHECK_EQ(record->GetType(), type); + if (must_be_present) { + DCHECK(record != nullptr); + DCHECK_EQ(record->GetType(), type); + } else { + if (record == nullptr || record->GetType() != type) { + return nullptr; + } + } tlsPtr_.stacked_shadow_frame_record = record->GetLink(); ShadowFrame* shadow_frame = record->GetShadowFrame(); delete record; @@ -1960,15 +1966,32 @@ class BuildInternalStackTraceVisitor : public StackVisitor { pointer_size_(Runtime::Current()->GetClassLinker()->GetImagePointerSize()) {} bool Init(int depth) SHARED_REQUIRES(Locks::mutator_lock_) ACQUIRE(Roles::uninterruptible_) { - // Allocate method trace with format [method pointers][pcs]. - auto* cl = Runtime::Current()->GetClassLinker(); - trace_ = cl->AllocPointerArray(self_, depth * 2); + // Allocate method trace as an object array where the first element is a pointer array that + // contains the ArtMethod pointers and dex PCs. The rest of the elements are the declaring + // class of the ArtMethod pointers. + ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); + StackHandleScope<1> hs(self_); + mirror::Class* array_class = class_linker->GetClassRoot(ClassLinker::kObjectArrayClass); + // The first element is the methods and dex pc array, the other elements are declaring classes + // for the methods to ensure classes in the stack trace don't get unloaded. + Handle<mirror::ObjectArray<mirror::Object>> trace( + hs.NewHandle( + mirror::ObjectArray<mirror::Object>::Alloc(hs.Self(), array_class, depth + 1))); + if (trace.Get() == nullptr) { + // Acquire uninterruptible_ in all paths. + self_->StartAssertNoThreadSuspension("Building internal stack trace"); + self_->AssertPendingOOMException(); + return false; + } + mirror::PointerArray* methods_and_pcs = class_linker->AllocPointerArray(self_, depth * 2); const char* last_no_suspend_cause = self_->StartAssertNoThreadSuspension("Building internal stack trace"); - if (trace_ == nullptr) { + if (methods_and_pcs == nullptr) { self_->AssertPendingOOMException(); return false; } + trace->Set(0, methods_and_pcs); + trace_ = trace.Get(); // If We are called from native, use non-transactional mode. CHECK(last_no_suspend_cause == nullptr) << last_no_suspend_cause; return true; @@ -1990,16 +2013,24 @@ class BuildInternalStackTraceVisitor : public StackVisitor { if (m->IsRuntimeMethod()) { return true; // Ignore runtime frames (in particular callee save). } - trace_->SetElementPtrSize<kTransactionActive>( - count_, m, pointer_size_); - trace_->SetElementPtrSize<kTransactionActive>( - trace_->GetLength() / 2 + count_, m->IsProxyMethod() ? DexFile::kDexNoIndex : GetDexPc(), - pointer_size_); + mirror::PointerArray* trace_methods_and_pcs = GetTraceMethodsAndPCs(); + trace_methods_and_pcs->SetElementPtrSize<kTransactionActive>(count_, m, pointer_size_); + trace_methods_and_pcs->SetElementPtrSize<kTransactionActive>( + trace_methods_and_pcs->GetLength() / 2 + count_, + m->IsProxyMethod() ? DexFile::kDexNoIndex : GetDexPc(), + pointer_size_); + // Save the declaring class of the method to ensure that the declaring classes of the methods + // do not get unloaded while the stack trace is live. + trace_->Set(count_ + 1, m->GetDeclaringClass()); ++count_; return true; } - mirror::PointerArray* GetInternalStackTrace() const { + mirror::PointerArray* GetTraceMethodsAndPCs() const SHARED_REQUIRES(Locks::mutator_lock_) { + return down_cast<mirror::PointerArray*>(trace_->Get(0)); + } + + mirror::ObjectArray<mirror::Object>* GetInternalStackTrace() const { return trace_; } @@ -2009,8 +2040,11 @@ class BuildInternalStackTraceVisitor : public StackVisitor { int32_t skip_depth_; // Current position down stack trace. uint32_t count_; - // An array of the methods on the stack, the last entries are the dex PCs. - mirror::PointerArray* trace_; + // An object array where the first element is a pointer array that contains the ArtMethod + // pointers on the stack and dex PCs. The rest of the elements are the declaring + // class of the ArtMethod pointers. trace_[i+1] contains the declaring class of the ArtMethod of + // the i'th frame. + mirror::ObjectArray<mirror::Object>* trace_; // For cross compilation. const size_t pointer_size_; @@ -2033,11 +2067,12 @@ jobject Thread::CreateInternalStackTrace(const ScopedObjectAccessAlreadyRunnable return nullptr; // Allocation failed. } build_trace_visitor.WalkStack(); - mirror::PointerArray* trace = build_trace_visitor.GetInternalStackTrace(); + mirror::ObjectArray<mirror::Object>* trace = build_trace_visitor.GetInternalStackTrace(); if (kIsDebugBuild) { - // Second half is dex PCs. - for (uint32_t i = 0; i < static_cast<uint32_t>(trace->GetLength() / 2); ++i) { - auto* method = trace->GetElementPtrSize<ArtMethod*>( + mirror::PointerArray* trace_methods = build_trace_visitor.GetTraceMethodsAndPCs(); + // Second half of trace_methods is dex PCs. + for (uint32_t i = 0; i < static_cast<uint32_t>(trace_methods->GetLength() / 2); ++i) { + auto* method = trace_methods->GetElementPtrSize<ArtMethod*>( i, Runtime::Current()->GetClassLinker()->GetImagePointerSize()); CHECK(method != nullptr); } @@ -2056,12 +2091,16 @@ bool Thread::IsExceptionThrownByCurrentMethod(mirror::Throwable* exception) cons } jobjectArray Thread::InternalStackTraceToStackTraceElementArray( - const ScopedObjectAccessAlreadyRunnable& soa, jobject internal, jobjectArray output_array, + const ScopedObjectAccessAlreadyRunnable& soa, + jobject internal, + jobjectArray output_array, int* stack_depth) { - // Decode the internal stack trace into the depth, method trace and PC trace - int32_t depth = soa.Decode<mirror::PointerArray*>(internal)->GetLength() / 2; + // Decode the internal stack trace into the depth, method trace and PC trace. + // Subtract one for the methods and PC trace. + int32_t depth = soa.Decode<mirror::Array*>(internal)->GetLength() - 1; + DCHECK_GE(depth, 0); - auto* cl = Runtime::Current()->GetClassLinker(); + ClassLinker* const class_linker = Runtime::Current()->GetClassLinker(); jobjectArray result; @@ -2075,7 +2114,7 @@ jobjectArray Thread::InternalStackTraceToStackTraceElementArray( } else { // Create java_trace array and place in local reference table mirror::ObjectArray<mirror::StackTraceElement>* java_traces = - cl->AllocStackTraceElementArray(soa.Self(), depth); + class_linker->AllocStackTraceElementArray(soa.Self(), depth); if (java_traces == nullptr) { return nullptr; } @@ -2087,7 +2126,12 @@ jobjectArray Thread::InternalStackTraceToStackTraceElementArray( } for (int32_t i = 0; i < depth; ++i) { - auto* method_trace = soa.Decode<mirror::PointerArray*>(internal); + mirror::ObjectArray<mirror::Object>* decoded_traces = + soa.Decode<mirror::Object*>(internal)->AsObjectArray<mirror::Object>(); + // Methods and dex PC trace is element 0. + DCHECK(decoded_traces->Get(0)->IsIntArray() || decoded_traces->Get(0)->IsLongArray()); + mirror::PointerArray* const method_trace = + down_cast<mirror::PointerArray*>(decoded_traces->Get(0)); // Prepare parameters for StackTraceElement(String cls, String method, String file, int line) ArtMethod* method = method_trace->GetElementPtrSize<ArtMethod*>(i, sizeof(void*)); uint32_t dex_pc = method_trace->GetElementPtrSize<uint32_t>( diff --git a/runtime/thread.h b/runtime/thread.h index 11f2e285a1..d21644d179 100644 --- a/runtime/thread.h +++ b/runtime/thread.h @@ -108,7 +108,8 @@ enum ThreadFlag { enum class StackedShadowFrameType { kShadowFrameUnderConstruction, - kDeoptimizationShadowFrame + kDeoptimizationShadowFrame, + kSingleFrameDeoptimizationShadowFrame }; static constexpr size_t kNumRosAllocThreadLocalSizeBrackets = 34; @@ -843,7 +844,7 @@ class Thread { void AssertHasDeoptimizationContext() SHARED_REQUIRES(Locks::mutator_lock_); void PushStackedShadowFrame(ShadowFrame* sf, StackedShadowFrameType type); - ShadowFrame* PopStackedShadowFrame(StackedShadowFrameType type); + ShadowFrame* PopStackedShadowFrame(StackedShadowFrameType type, bool must_be_present = true); // For debugger, find the shadow frame that corresponds to a frame id. // Or return null if there is none. diff --git a/runtime/thread_pool.cc b/runtime/thread_pool.cc index d8f80fa690..0527d3ae14 100644 --- a/runtime/thread_pool.cc +++ b/runtime/thread_pool.cc @@ -16,7 +16,9 @@ #include "thread_pool.h" +#include "base/bit_utils.h" #include "base/casts.h" +#include "base/logging.h" #include "base/stl_util.h" #include "base/time_utils.h" #include "runtime.h" @@ -30,10 +32,15 @@ ThreadPoolWorker::ThreadPoolWorker(ThreadPool* thread_pool, const std::string& n size_t stack_size) : thread_pool_(thread_pool), name_(name) { + // Add an inaccessible page to catch stack overflow. + stack_size += kPageSize; std::string error_msg; stack_.reset(MemMap::MapAnonymous(name.c_str(), nullptr, stack_size, PROT_READ | PROT_WRITE, false, false, &error_msg)); CHECK(stack_.get() != nullptr) << error_msg; + CHECK_ALIGNED(stack_->Begin(), kPageSize); + int mprotect_result = mprotect(stack_->Begin(), kPageSize, PROT_NONE); + CHECK_EQ(mprotect_result, 0) << "Failed to mprotect() bottom page of thread pool worker stack."; const char* reason = "new thread pool worker thread"; pthread_attr_t attr; CHECK_PTHREAD_CALL(pthread_attr_init, (&attr), reason); @@ -92,7 +99,8 @@ ThreadPool::ThreadPool(const char* name, size_t num_threads) while (GetThreadCount() < num_threads) { const std::string worker_name = StringPrintf("%s worker thread %zu", name_.c_str(), GetThreadCount()); - threads_.push_back(new ThreadPoolWorker(this, worker_name, ThreadPoolWorker::kDefaultStackSize)); + threads_.push_back( + new ThreadPoolWorker(this, worker_name, ThreadPoolWorker::kDefaultStackSize)); } // Wait for all of the threads to attach. creation_barier_.Wait(self); diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc index 3d4f04c70c..eed3e22a72 100644 --- a/runtime/verifier/method_verifier.cc +++ b/runtime/verifier/method_verifier.cc @@ -548,7 +548,8 @@ SafeMap<uint32_t, std::set<uint32_t>> MethodVerifier::FindStringInitMap(ArtMetho MethodVerifier verifier(self, m->GetDexFile(), dex_cache, class_loader, &m->GetClassDef(), m->GetCodeItem(), m->GetDexMethodIndex(), m, m->GetAccessFlags(), true, true, false, true); - return verifier.FindStringInitMap(); + // Avoid copying: The map is moved out of the verifier before the verifier is destroyed. + return std::move(verifier.FindStringInitMap()); } SafeMap<uint32_t, std::set<uint32_t>>& MethodVerifier::FindStringInitMap() { @@ -1007,6 +1008,9 @@ bool MethodVerifier::VerifyInstruction(const Instruction* inst, uint32_t code_of case Instruction::kVerifyRegCWide: result = result && CheckWideRegisterIndex(inst->VRegC()); break; + case Instruction::kVerifyRegCString: + result = result && CheckStringIndex(inst->VRegC()); + break; } switch (inst->GetVerifyExtraFlags()) { case Instruction::kVerifyArrayData: @@ -1299,17 +1303,17 @@ bool MethodVerifier::CheckSwitchTargets(uint32_t cur_offset) { return false; } + bool is_packed_switch = (*insns & 0xff) == Instruction::PACKED_SWITCH; + uint32_t switch_count = switch_insns[1]; - int32_t keys_offset, targets_offset; + int32_t targets_offset; uint16_t expected_signature; - if ((*insns & 0xff) == Instruction::PACKED_SWITCH) { + if (is_packed_switch) { /* 0=sig, 1=count, 2/3=firstKey */ targets_offset = 4; - keys_offset = -1; expected_signature = Instruction::kPackedSwitchSignature; } else { /* 0=sig, 1=count, 2..count*2 = keys */ - keys_offset = 2; targets_offset = 2 + 2 * switch_count; expected_signature = Instruction::kSparseSwitchSignature; } @@ -1328,19 +1332,33 @@ bool MethodVerifier::CheckSwitchTargets(uint32_t cur_offset) { << ", count " << insn_count; return false; } - /* for a sparse switch, verify the keys are in ascending order */ - if (keys_offset > 0 && switch_count > 1) { - int32_t last_key = switch_insns[keys_offset] | (switch_insns[keys_offset + 1] << 16); - for (uint32_t targ = 1; targ < switch_count; targ++) { - int32_t key = - static_cast<int32_t>(switch_insns[keys_offset + targ * 2]) | - static_cast<int32_t>(switch_insns[keys_offset + targ * 2 + 1] << 16); - if (key <= last_key) { - Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid packed switch: last key=" << last_key - << ", this=" << key; + + constexpr int32_t keys_offset = 2; + if (switch_count > 1) { + if (is_packed_switch) { + /* for a packed switch, verify that keys do not overflow int32 */ + int32_t first_key = switch_insns[keys_offset] | (switch_insns[keys_offset + 1] << 16); + int32_t max_first_key = + std::numeric_limits<int32_t>::max() - (static_cast<int32_t>(switch_count) - 1); + if (first_key > max_first_key) { + Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid packed switch: first_key=" << first_key + << ", switch_count=" << switch_count; return false; } - last_key = key; + } else { + /* for a sparse switch, verify the keys are in ascending order */ + int32_t last_key = switch_insns[keys_offset] | (switch_insns[keys_offset + 1] << 16); + for (uint32_t targ = 1; targ < switch_count; targ++) { + int32_t key = + static_cast<int32_t>(switch_insns[keys_offset + targ * 2]) | + static_cast<int32_t>(switch_insns[keys_offset + targ * 2 + 1] << 16); + if (key <= last_key) { + Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid sparse switch: last key=" << last_key + << ", this=" << key; + return false; + } + last_key = key; + } } } /* verify each switch target */ @@ -3148,6 +3166,13 @@ bool MethodVerifier::CodeFlowVerifyInstruction(uint32_t* start_guess) { Fail(VERIFY_ERROR_FORCE_INTERPRETER); // TODO(iam): implement invoke-lambda verification break; } + case Instruction::CAPTURE_VARIABLE: { + // Don't bother verifying, instead the interpreter will take the slow path with access checks. + // If the code would've normally hard-failed, then the interpreter will throw the + // appropriate verification errors at runtime. + Fail(VERIFY_ERROR_FORCE_INTERPRETER); // TODO(iam): implement capture-variable verification + break; + } case Instruction::CREATE_LAMBDA: { // Don't bother verifying, instead the interpreter will take the slow path with access checks. // If the code would've normally hard-failed, then the interpreter will throw the @@ -3155,10 +3180,15 @@ bool MethodVerifier::CodeFlowVerifyInstruction(uint32_t* start_guess) { Fail(VERIFY_ERROR_FORCE_INTERPRETER); // TODO(iam): implement create-lambda verification break; } + case Instruction::LIBERATE_VARIABLE: { + // Don't bother verifying, instead the interpreter will take the slow path with access checks. + // If the code would've normally hard-failed, then the interpreter will throw the + // appropriate verification errors at runtime. + Fail(VERIFY_ERROR_FORCE_INTERPRETER); // TODO(iam): implement liberate-variable verification + break; + } - case Instruction::UNUSED_F4: - case Instruction::UNUSED_F5: - case Instruction::UNUSED_F7: { + case Instruction::UNUSED_F4: { DCHECK(false); // TODO(iam): Implement opcodes for lambdas // Conservatively fail verification on release builds. Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Unexpected opcode " << inst->DumpString(dex_file_); diff --git a/test/004-JniTest/expected.txt b/test/004-JniTest/expected.txt index 49d9cc0d5a..86ab37e1e5 100644 --- a/test/004-JniTest/expected.txt +++ b/test/004-JniTest/expected.txt @@ -1,3 +1,4 @@ +JNI_OnLoad called Super.<init> Super.<init> Subclass.<init> diff --git a/test/004-JniTest/jni_test.cc b/test/004-JniTest/jni_test.cc index db0dd32771..be7888b04a 100644 --- a/test/004-JniTest/jni_test.cc +++ b/test/004-JniTest/jni_test.cc @@ -15,8 +15,9 @@ */ #include <assert.h> -#include <stdio.h> +#include <iostream> #include <pthread.h> +#include <stdio.h> #include <vector> #include "jni.h" @@ -27,13 +28,21 @@ static JavaVM* jvm = nullptr; -extern "C" JNIEXPORT jint JNI_OnLoad(JavaVM *vm, void *) { +extern "C" JNIEXPORT jint JNI_OnLoad(JavaVM *vm, void*) { assert(vm != nullptr); assert(jvm == nullptr); jvm = vm; + std::cout << "JNI_OnLoad called" << std::endl; return JNI_VERSION_1_6; } +extern "C" JNIEXPORT void JNI_OnUnload(JavaVM*, void*) { + // std::cout since LOG(INFO) adds extra stuff like pid. + std::cout << "JNI_OnUnload called" << std::endl; + // Clear jvm for assert in test 004-JniTest. + jvm = nullptr; +} + static void* AttachHelper(void* arg) { assert(jvm != nullptr); diff --git a/test/004-ReferenceMap/expected.txt b/test/004-ReferenceMap/expected.txt index e69de29bb2..6a5618ebc6 100644 --- a/test/004-ReferenceMap/expected.txt +++ b/test/004-ReferenceMap/expected.txt @@ -0,0 +1 @@ +JNI_OnLoad called diff --git a/test/004-SignalTest/expected.txt b/test/004-SignalTest/expected.txt index fd5ec00067..b3a0e1cbe0 100644 --- a/test/004-SignalTest/expected.txt +++ b/test/004-SignalTest/expected.txt @@ -1,3 +1,4 @@ +JNI_OnLoad called init signal test Caught NullPointerException Caught StackOverflowError diff --git a/test/004-StackWalk/expected.txt b/test/004-StackWalk/expected.txt index bde00246a3..5af68cd85d 100644 --- a/test/004-StackWalk/expected.txt +++ b/test/004-StackWalk/expected.txt @@ -1,3 +1,4 @@ +JNI_OnLoad called 1st call 172001234567891011121314151617181920652310201919 2nd call diff --git a/test/004-UnsafeTest/expected.txt b/test/004-UnsafeTest/expected.txt index e69de29bb2..6a5618ebc6 100644 --- a/test/004-UnsafeTest/expected.txt +++ b/test/004-UnsafeTest/expected.txt @@ -0,0 +1 @@ +JNI_OnLoad called diff --git a/test/044-proxy/expected.txt b/test/044-proxy/expected.txt index f86948ad6c..052c8faf1b 100644 --- a/test/044-proxy/expected.txt +++ b/test/044-proxy/expected.txt @@ -93,4 +93,5 @@ Invocation of public abstract java.lang.String NarrowingTest$I2.foo() Got expected exception Proxy narrowed invocation return type passed 5.8 +JNI_OnLoad called callback diff --git a/test/051-thread/expected.txt b/test/051-thread/expected.txt index 54e34af3aa..c6cd4f8bea 100644 --- a/test/051-thread/expected.txt +++ b/test/051-thread/expected.txt @@ -1,3 +1,4 @@ +JNI_OnLoad called thread test starting testThreadCapacity thread count: 512 testThreadDaemons starting thread 'TestDaemonThread' diff --git a/test/088-monitor-verification/expected.txt b/test/088-monitor-verification/expected.txt index 13b8c73970..f252f6f2ee 100644 --- a/test/088-monitor-verification/expected.txt +++ b/test/088-monitor-verification/expected.txt @@ -1,3 +1,4 @@ +JNI_OnLoad called recursiveSync ok nestedMayThrow ok constantLock ok diff --git a/test/115-native-bridge/expected.txt b/test/115-native-bridge/expected.txt index 372ecd0484..b003307ab7 100644 --- a/test/115-native-bridge/expected.txt +++ b/test/115-native-bridge/expected.txt @@ -17,6 +17,7 @@ Test ART callbacks: all JNI function number is 11. name:testSignal, signature:()I, shorty:I. name:testZeroLengthByteBuffers, signature:()V, shorty:V. trampoline_JNI_OnLoad called! +JNI_OnLoad called Getting trampoline for Java_Main_testFindClassOnAttachedNativeThread with shorty V. trampoline_Java_Main_testFindClassOnAttachedNativeThread called! Getting trampoline for Java_Main_testFindFieldOnAttachedNativeThreadNative with shorty V. diff --git a/test/116-nodex2oat/expected.txt b/test/116-nodex2oat/expected.txt index 05b1c2f387..157dfc4ea4 100644 --- a/test/116-nodex2oat/expected.txt +++ b/test/116-nodex2oat/expected.txt @@ -1,6 +1,9 @@ Run -Xnodex2oat +JNI_OnLoad called Has oat is false, is dex2oat enabled is false. Run -Xdex2oat +JNI_OnLoad called Has oat is true, is dex2oat enabled is true. Run default +JNI_OnLoad called Has oat is true, is dex2oat enabled is true. diff --git a/test/117-nopatchoat/expected.txt b/test/117-nopatchoat/expected.txt index 5cc02d1662..0cd4715d09 100644 --- a/test/117-nopatchoat/expected.txt +++ b/test/117-nopatchoat/expected.txt @@ -1,9 +1,12 @@ Run without dex2oat/patchoat +JNI_OnLoad called dex2oat & patchoat are disabled, has oat is true, has executable oat is expected. This is a function call Run with dexoat/patchoat +JNI_OnLoad called dex2oat & patchoat are enabled, has oat is true, has executable oat is expected. This is a function call Run default +JNI_OnLoad called dex2oat & patchoat are enabled, has oat is true, has executable oat is expected. This is a function call diff --git a/test/117-nopatchoat/nopatchoat.cc b/test/117-nopatchoat/nopatchoat.cc index 7eac412681..3e533ad62e 100644 --- a/test/117-nopatchoat/nopatchoat.cc +++ b/test/117-nopatchoat/nopatchoat.cc @@ -16,7 +16,10 @@ #include "class_linker.h" #include "dex_file-inl.h" +#include "gc/heap.h" +#include "gc/space/image_space.h" #include "mirror/class-inl.h" +#include "runtime.h" #include "scoped_thread_state_change.h" #include "thread.h" @@ -31,6 +34,11 @@ class NoPatchoatTest { return dex_file.GetOatDexFile(); } + static bool isRelocationDeltaZero() { + gc::space::ImageSpace* space = Runtime::Current()->GetHeap()->GetImageSpace(); + return space != nullptr && space->GetImageHeader().GetPatchDelta() == 0; + } + static bool hasExecutableOat(jclass cls) { const OatFile::OatDexFile* oat_dex_file = getOatDexFile(cls); @@ -49,6 +57,10 @@ class NoPatchoatTest { } }; +extern "C" JNIEXPORT jboolean JNICALL Java_Main_isRelocationDeltaZero(JNIEnv*, jclass) { + return NoPatchoatTest::isRelocationDeltaZero(); +} + extern "C" JNIEXPORT jboolean JNICALL Java_Main_hasExecutableOat(JNIEnv*, jclass cls) { return NoPatchoatTest::hasExecutableOat(cls); } diff --git a/test/117-nopatchoat/run b/test/117-nopatchoat/run index c749c74345..c634900218 100755 --- a/test/117-nopatchoat/run +++ b/test/117-nopatchoat/run @@ -36,8 +36,6 @@ fi # Make sure we can run without relocation echo "Run without dex2oat/patchoat" -# /bin/false is actually not even there for either, so the exec will fail. -# Unfortunately there is no equivalent to /bin/false in android. ${RUN} ${flags} --runtime-option -Xnodex2oat # Make sure we can run with the oat file. diff --git a/test/117-nopatchoat/src/Main.java b/test/117-nopatchoat/src/Main.java index 223e12084d..5cca309847 100644 --- a/test/117-nopatchoat/src/Main.java +++ b/test/117-nopatchoat/src/Main.java @@ -18,9 +18,13 @@ public class Main { public static void main(String[] args) { System.loadLibrary(args[0]); + // With a relocationDelta of 0, the runtime has no way to determine if the oat file in + // ANDROID_DATA has been relocated, since a non-relocated oat file always has a 0 delta. + // Hitting this condition should be rare and ideally we would prevent it from happening but + // there is no way to do so without major changes to the run-test framework. boolean executable_correct = (isPic() ? - hasExecutableOat() == true : - hasExecutableOat() == isDex2OatEnabled()); + hasExecutableOat() == true : + hasExecutableOat() == (isDex2OatEnabled() || isRelocationDeltaZero())); System.out.println( "dex2oat & patchoat are " + ((isDex2OatEnabled()) ? "enabled" : "disabled") + @@ -50,4 +54,6 @@ public class Main { private native static boolean hasOat(); private native static boolean hasExecutableOat(); + + private native static boolean isRelocationDeltaZero(); } diff --git a/test/118-noimage-dex2oat/expected.txt b/test/118-noimage-dex2oat/expected.txt index 0103e899f6..166481e96a 100644 --- a/test/118-noimage-dex2oat/expected.txt +++ b/test/118-noimage-dex2oat/expected.txt @@ -1,11 +1,14 @@ Run -Xnoimage-dex2oat +JNI_OnLoad called Has image is false, is image dex2oat enabled is false, is BOOTCLASSPATH on disk is false. testB18485243 PASS Run -Xnoimage-dex2oat -Xno-dex-file-fallback Failed to initialize runtime (check log for details) Run -Ximage-dex2oat +JNI_OnLoad called Has image is true, is image dex2oat enabled is true, is BOOTCLASSPATH on disk is true. testB18485243 PASS Run default +JNI_OnLoad called Has image is true, is image dex2oat enabled is true, is BOOTCLASSPATH on disk is true. testB18485243 PASS diff --git a/test/119-noimage-patchoat/expected.txt b/test/119-noimage-patchoat/expected.txt index ed136621c3..9b9db58fcd 100644 --- a/test/119-noimage-patchoat/expected.txt +++ b/test/119-noimage-patchoat/expected.txt @@ -1,8 +1,11 @@ Run -Xnoimage-dex2oat -Xpatchoat:/system/bin/false +JNI_OnLoad called Has image is false, is image dex2oat enabled is false. Run -Xnoimage-dex2oat -Xpatchoat:/system/bin/false -Xno-dex-file-fallback Failed to initialize runtime (check log for details) Run -Ximage-dex2oat +JNI_OnLoad called Has image is true, is image dex2oat enabled is true. Run default +JNI_OnLoad called Has image is true, is image dex2oat enabled is true. diff --git a/test/137-cfi/expected.txt b/test/137-cfi/expected.txt index e69de29bb2..6a5618ebc6 100644 --- a/test/137-cfi/expected.txt +++ b/test/137-cfi/expected.txt @@ -0,0 +1 @@ +JNI_OnLoad called diff --git a/test/139-register-natives/expected.txt b/test/139-register-natives/expected.txt index e69de29bb2..6a5618ebc6 100644 --- a/test/139-register-natives/expected.txt +++ b/test/139-register-natives/expected.txt @@ -0,0 +1 @@ +JNI_OnLoad called diff --git a/test/141-class-unload/expected.txt b/test/141-class-unload/expected.txt new file mode 100644 index 0000000000..53d7abecaf --- /dev/null +++ b/test/141-class-unload/expected.txt @@ -0,0 +1,23 @@ +1 +2 +JNI_OnLoad called +JNI_OnUnload called +1 +2 +JNI_OnLoad called +JNI_OnUnload called +null +null +JNI_OnLoad called +JNI_OnUnload called +null +loader null false +loader null false +JNI_OnLoad called +JNI_OnUnload called +null +1 +2 +JNI_OnLoad called +class null false test +JNI_OnUnload called diff --git a/test/141-class-unload/info.txt b/test/141-class-unload/info.txt new file mode 100644 index 0000000000..d8dd381dc7 --- /dev/null +++ b/test/141-class-unload/info.txt @@ -0,0 +1 @@ +Test that classes get freed after they are no longer reachable. diff --git a/test/141-class-unload/jni_unload.cc b/test/141-class-unload/jni_unload.cc new file mode 100644 index 0000000000..d913efe53e --- /dev/null +++ b/test/141-class-unload/jni_unload.cc @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "jni.h" + +#include <iostream> + +#include "jit/jit.h" +#include "jit/jit_instrumentation.h" +#include "runtime.h" +#include "thread-inl.h" + +namespace art { +namespace { + +extern "C" JNIEXPORT void JNICALL Java_IntHolder_waitForCompilation(JNIEnv*, jclass) { + jit::Jit* jit = Runtime::Current()->GetJit(); + if (jit != nullptr) { + jit->GetInstrumentationCache()->WaitForCompilationToFinish(Thread::Current()); + } +} + +} // namespace +} // namespace art diff --git a/test/141-class-unload/src-ex/IntHolder.java b/test/141-class-unload/src-ex/IntHolder.java new file mode 100644 index 0000000000..feff0d2ba1 --- /dev/null +++ b/test/141-class-unload/src-ex/IntHolder.java @@ -0,0 +1,43 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Simple class that holds a static int for testing that class unloading works +// and re-runs the class initializer. +public class IntHolder { + private static int value = 1; + + public static void setValue(int newValue) { + value = newValue; + } + + public static int getValue() { + return value; + } + + public static void runGC() { + Runtime.getRuntime().gc(); + } + + public static void loadLibrary(String name) { + System.loadLibrary(name); + } + + public static native void waitForCompilation(); + + public static Throwable generateStackTrace() { + return new Exception("test"); + } +} diff --git a/test/141-class-unload/src/Main.java b/test/141-class-unload/src/Main.java new file mode 100644 index 0000000000..3cc43accbe --- /dev/null +++ b/test/141-class-unload/src/Main.java @@ -0,0 +1,166 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.lang.ref.WeakReference; +import java.lang.reflect.Constructor; +import java.lang.reflect.Method; + +public class Main { + static final String DEX_FILE = System.getenv("DEX_LOCATION") + "/141-class-unload-ex.jar"; + static String nativeLibraryName; + + public static void main(String[] args) throws Exception { + nativeLibraryName = args[0]; + Class pathClassLoader = Class.forName("dalvik.system.PathClassLoader"); + if (pathClassLoader == null) { + throw new AssertionError("Couldn't find path class loader class"); + } + Constructor constructor = + pathClassLoader.getDeclaredConstructor(String.class, ClassLoader.class); + try { + testUnloadClass(constructor); + testUnloadLoader(constructor); + // Test that we don't unload if we have a Method keeping the class live. + testNoUnloadInvoke(constructor); + // Test that we don't unload if we have an instance. + testNoUnloadInstance(constructor); + // Test JNI_OnLoad and JNI_OnUnload. + testLoadAndUnloadLibrary(constructor); + // Test that stack traces keep the classes live. + testStackTrace(constructor); + // Stress test to make sure we dont leak memory. + stressTest(constructor); + } catch (Exception e) { + System.out.println(e); + } + } + + private static void stressTest(Constructor constructor) throws Exception { + for (int i = 0; i <= 100; ++i) { + setUpUnloadLoader(constructor, false); + if (i % 10 == 0) { + Runtime.getRuntime().gc(); + } + } + } + + private static void testUnloadClass(Constructor constructor) throws Exception { + WeakReference<Class> klass = setUpUnloadClass(constructor); + // No strong refernces to class loader, should get unloaded. + Runtime.getRuntime().gc(); + WeakReference<Class> klass2 = setUpUnloadClass(constructor); + Runtime.getRuntime().gc(); + // If the weak reference is cleared, then it was unloaded. + System.out.println(klass.get()); + System.out.println(klass2.get()); + } + + private static void testUnloadLoader(Constructor constructor) + throws Exception { + WeakReference<ClassLoader> loader = setUpUnloadLoader(constructor, true); + // No strong refernces to class loader, should get unloaded. + Runtime.getRuntime().gc(); + // If the weak reference is cleared, then it was unloaded. + System.out.println(loader.get()); + } + + private static void testStackTrace(Constructor constructor) throws Exception { + WeakReference<Class> klass = setUpUnloadClass(constructor); + Method stackTraceMethod = klass.get().getDeclaredMethod("generateStackTrace"); + Throwable throwable = (Throwable) stackTraceMethod.invoke(klass.get()); + stackTraceMethod = null; + Runtime.getRuntime().gc(); + boolean isNull = klass.get() == null; + System.out.println("class null " + isNull + " " + throwable.getMessage()); + } + + private static void testLoadAndUnloadLibrary(Constructor constructor) throws Exception { + WeakReference<ClassLoader> loader = setUpLoadLibrary(constructor); + // No strong refernces to class loader, should get unloaded. + Runtime.getRuntime().gc(); + // If the weak reference is cleared, then it was unloaded. + System.out.println(loader.get()); + } + + private static void testNoUnloadInvoke(Constructor constructor) throws Exception { + WeakReference<ClassLoader> loader = + new WeakReference((ClassLoader) constructor.newInstance( + DEX_FILE, ClassLoader.getSystemClassLoader())); + WeakReference<Class> intHolder = new WeakReference(loader.get().loadClass("IntHolder")); + intHolder.get().getDeclaredMethod("runGC").invoke(intHolder.get()); + boolean isNull = loader.get() == null; + System.out.println("loader null " + isNull); + } + + private static void testNoUnloadInstance(Constructor constructor) throws Exception { + WeakReference<ClassLoader> loader = + new WeakReference((ClassLoader) constructor.newInstance( + DEX_FILE, ClassLoader.getSystemClassLoader())); + WeakReference<Class> intHolder = new WeakReference(loader.get().loadClass("IntHolder")); + Object o = intHolder.get().newInstance(); + Runtime.getRuntime().gc(); + boolean isNull = loader.get() == null; + System.out.println("loader null " + isNull); + } + + private static WeakReference<Class> setUpUnloadClass(Constructor constructor) throws Exception { + ClassLoader loader = (ClassLoader) constructor.newInstance( + DEX_FILE, ClassLoader.getSystemClassLoader()); + Class intHolder = loader.loadClass("IntHolder"); + Method getValue = intHolder.getDeclaredMethod("getValue"); + Method setValue = intHolder.getDeclaredMethod("setValue", Integer.TYPE); + // Make sure we don't accidentally preserve the value in the int holder, the class + // initializer should be re-run. + System.out.println((int) getValue.invoke(intHolder)); + setValue.invoke(intHolder, 2); + System.out.println((int) getValue.invoke(intHolder)); + waitForCompilation(intHolder); + return new WeakReference(intHolder); + } + + private static WeakReference<ClassLoader> setUpUnloadLoader(Constructor constructor, + boolean waitForCompilation) + throws Exception { + ClassLoader loader = (ClassLoader) constructor.newInstance( + DEX_FILE, ClassLoader.getSystemClassLoader()); + Class intHolder = loader.loadClass("IntHolder"); + Method setValue = intHolder.getDeclaredMethod("setValue", Integer.TYPE); + setValue.invoke(intHolder, 2); + if (waitForCompilation) { + waitForCompilation(intHolder); + } + return new WeakReference(loader); + } + + private static void waitForCompilation(Class intHolder) throws Exception { + // Load the native library so that we can call waitForCompilation. + Method loadLibrary = intHolder.getDeclaredMethod("loadLibrary", String.class); + loadLibrary.invoke(intHolder, nativeLibraryName); + // Wait for JIT compilation to finish since the async threads may prevent unloading. + Method waitForCompilation = intHolder.getDeclaredMethod("waitForCompilation"); + waitForCompilation.invoke(intHolder); + } + + private static WeakReference<ClassLoader> setUpLoadLibrary(Constructor constructor) + throws Exception { + ClassLoader loader = (ClassLoader) constructor.newInstance( + DEX_FILE, ClassLoader.getSystemClassLoader()); + Class intHolder = loader.loadClass("IntHolder"); + Method loadLibrary = intHolder.getDeclaredMethod("loadLibrary", String.class); + loadLibrary.invoke(intHolder, nativeLibraryName); + return new WeakReference(loader); + } +} diff --git a/test/142-classloader2/expected.txt b/test/142-classloader2/expected.txt new file mode 100644 index 0000000000..86f5e220e2 --- /dev/null +++ b/test/142-classloader2/expected.txt @@ -0,0 +1 @@ +Everything OK. diff --git a/test/142-classloader2/info.txt b/test/142-classloader2/info.txt new file mode 100644 index 0000000000..eb821a8ddb --- /dev/null +++ b/test/142-classloader2/info.txt @@ -0,0 +1 @@ +Check sub-classing of PathClassLoader. diff --git a/test/142-classloader2/smali/MyPathClassLoader.smali b/test/142-classloader2/smali/MyPathClassLoader.smali new file mode 100644 index 0000000000..553abd46c9 --- /dev/null +++ b/test/142-classloader2/smali/MyPathClassLoader.smali @@ -0,0 +1,13 @@ +# Simple subclass of PathClassLoader with methods overridden. +# We need to use smali right now to subclass a libcore class, see b/24304298. + +.class public LMyPathClassLoader; + +.super Ldalvik/system/PathClassLoader; + +# Simple forwarding constructor. +.method public constructor <init>(Ljava/lang/String;Ljava/lang/ClassLoader;)V + .registers 3 + invoke-direct {p0, p1, p2}, Ldalvik/system/PathClassLoader;-><init>(Ljava/lang/String;Ljava/lang/ClassLoader;)V + return-void +.end method diff --git a/test/142-classloader2/src-ex/A.java b/test/142-classloader2/src-ex/A.java new file mode 100644 index 0000000000..d5fa1f9df7 --- /dev/null +++ b/test/142-classloader2/src-ex/A.java @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Identical class to the main src, except with a different value, so we can distinguish them. + */ +public class A { + public static String value = "Ex-A"; +} diff --git a/test/142-classloader2/src/A.java b/test/142-classloader2/src/A.java new file mode 100644 index 0000000000..532df51878 --- /dev/null +++ b/test/142-classloader2/src/A.java @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Main class, with a simple value. + */ +public class A { + public static String value = "Src-A"; +} diff --git a/test/142-classloader2/src/Main.java b/test/142-classloader2/src/Main.java new file mode 100644 index 0000000000..86c61ebc3a --- /dev/null +++ b/test/142-classloader2/src/Main.java @@ -0,0 +1,76 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.lang.reflect.Constructor; +import java.lang.reflect.Field; + +/** + * PathClassLoader test. + */ +public class Main { + + private static ClassLoader createClassLoader(String dexPath, ClassLoader parent) { + try { + Class<?> myClassLoaderClass = Class.forName("MyPathClassLoader"); + Constructor constructor = myClassLoaderClass.getConstructor(String.class, + ClassLoader.class); + return (ClassLoader)constructor.newInstance(dexPath, parent); + } catch (Exception e) { + // Ups, not available?!?! + throw new RuntimeException(e); + } + } + + /** + * Main entry point. + */ + public static void main(String[] args) throws Exception { + // Check the class-path for the second file. We'll use that one as the source of the + // new classloader. + String cp = System.getProperty("java.class.path"); + if (cp.split(System.getProperty("path.separator")).length != 1) { + throw new IllegalStateException("Didn't find exactly one classpath element in " + cp); + } + if (!cp.endsWith("classloader2.jar")) { + throw new IllegalStateException("Don't understand classpath " + cp); + } + cp = cp.replace("classloader2.jar", "classloader2-ex.jar"); + + ClassLoader myClassLoader = createClassLoader( + cp, ClassLoader.getSystemClassLoader().getParent()); + + // Now load our test class. + Class<?> srcClass = A.class; + Class<?> exClass = myClassLoader.loadClass("A"); + + // First check: classes should be different. + if (srcClass == exClass) { + throw new IllegalStateException("Loaded class instances are the same"); + } + + // Secondary checks: get the static field values and make sure they aren't the same. + String srcValue = (String)srcClass.getDeclaredField("value").get(null); + if (!"Src-A".equals(srcValue)) { + throw new IllegalStateException("Expected Src-A, found " + srcValue); + } + String exValue = (String)exClass.getDeclaredField("value").get(null); + if (!"Ex-A".equals(exValue)) { + throw new IllegalStateException("Expected Ex-A, found " + exValue); + } + + System.out.println("Everything OK."); + } +} diff --git a/test/449-checker-bce/src/Main.java b/test/449-checker-bce/src/Main.java index a746664160..f06c250dc7 100644 --- a/test/449-checker-bce/src/Main.java +++ b/test/449-checker-bce/src/Main.java @@ -249,6 +249,25 @@ public class Main { array[Integer.MAX_VALUE - 998] = 1; } + /// CHECK-START: void Main.constantIndexing6(int[]) BCE (before) + /// CHECK: BoundsCheck + /// CHECK: ArraySet + /// CHECK: BoundsCheck + /// CHECK: ArraySet + + /// CHECK-START: void Main.constantIndexing6(int[]) BCE (after) + /// CHECK: Deoptimize + + static void constantIndexing6(int[] array) { + array[3] = 1; + array[4] = 1; + } + + // A helper into which the actual throwing function should be inlined. + static void constantIndexingForward6(int[] array) { + constantIndexing6(array); + } + /// CHECK-START: void Main.loopPattern1(int[]) BCE (before) /// CHECK: BoundsCheck /// CHECK: ArraySet @@ -602,7 +621,12 @@ public class Main { // This will cause AIOOBE. constantIndexing2(new int[3]); } catch (ArrayIndexOutOfBoundsException e) { - return 99; + try { + // This will cause AIOOBE. + constantIndexingForward6(new int[3]); + } catch (ArrayIndexOutOfBoundsException e2) { + return 99; + } } return 0; } diff --git a/test/454-get-vreg/expected.txt b/test/454-get-vreg/expected.txt index e69de29bb2..6a5618ebc6 100644 --- a/test/454-get-vreg/expected.txt +++ b/test/454-get-vreg/expected.txt @@ -0,0 +1 @@ +JNI_OnLoad called diff --git a/test/455-set-vreg/expected.txt b/test/455-set-vreg/expected.txt index e69de29bb2..6a5618ebc6 100644 --- a/test/455-set-vreg/expected.txt +++ b/test/455-set-vreg/expected.txt @@ -0,0 +1 @@ +JNI_OnLoad called diff --git a/test/457-regs/expected.txt b/test/457-regs/expected.txt index e69de29bb2..6a5618ebc6 100644 --- a/test/457-regs/expected.txt +++ b/test/457-regs/expected.txt @@ -0,0 +1 @@ +JNI_OnLoad called diff --git a/test/458-checker-instruction-simplification/src/Main.java b/test/458-checker-instruction-simplification/src/Main.java index a14200e7ce..c32d34aa6f 100644 --- a/test/458-checker-instruction-simplification/src/Main.java +++ b/test/458-checker-instruction-simplification/src/Main.java @@ -84,6 +84,172 @@ public class Main { return arg & -1; } + /// CHECK-START: int Main.UShr28And15(int) instruction_simplifier (before) + /// CHECK-DAG: <<Arg:i\d+>> ParameterValue + /// CHECK-DAG: <<Const28:i\d+>> IntConstant 28 + /// CHECK-DAG: <<Const15:i\d+>> IntConstant 15 + /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Arg>>,<<Const28>>] + /// CHECK-DAG: <<And:i\d+>> And [<<UShr>>,<<Const15>>] + /// CHECK-DAG: Return [<<And>>] + + /// CHECK-START: int Main.UShr28And15(int) instruction_simplifier (after) + /// CHECK-DAG: <<Arg:i\d+>> ParameterValue + /// CHECK-DAG: <<Const28:i\d+>> IntConstant 28 + /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Arg>>,<<Const28>>] + /// CHECK-DAG: Return [<<UShr>>] + + /// CHECK-START: int Main.UShr28And15(int) instruction_simplifier (after) + /// CHECK-NOT: And + + public static int UShr28And15(int arg) { + return (arg >>> 28) & 15; + } + + /// CHECK-START: long Main.UShr60And15(long) instruction_simplifier (before) + /// CHECK-DAG: <<Arg:j\d+>> ParameterValue + /// CHECK-DAG: <<Const60:i\d+>> IntConstant 60 + /// CHECK-DAG: <<Const15:j\d+>> LongConstant 15 + /// CHECK-DAG: <<UShr:j\d+>> UShr [<<Arg>>,<<Const60>>] + /// CHECK-DAG: <<And:j\d+>> And [<<UShr>>,<<Const15>>] + /// CHECK-DAG: Return [<<And>>] + + /// CHECK-START: long Main.UShr60And15(long) instruction_simplifier (after) + /// CHECK-DAG: <<Arg:j\d+>> ParameterValue + /// CHECK-DAG: <<Const60:i\d+>> IntConstant 60 + /// CHECK-DAG: <<UShr:j\d+>> UShr [<<Arg>>,<<Const60>>] + /// CHECK-DAG: Return [<<UShr>>] + + /// CHECK-START: long Main.UShr60And15(long) instruction_simplifier (after) + /// CHECK-NOT: And + + public static long UShr60And15(long arg) { + return (arg >>> 60) & 15; + } + + /// CHECK-START: int Main.UShr28And7(int) instruction_simplifier (before) + /// CHECK-DAG: <<Arg:i\d+>> ParameterValue + /// CHECK-DAG: <<Const28:i\d+>> IntConstant 28 + /// CHECK-DAG: <<Const7:i\d+>> IntConstant 7 + /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Arg>>,<<Const28>>] + /// CHECK-DAG: <<And:i\d+>> And [<<UShr>>,<<Const7>>] + /// CHECK-DAG: Return [<<And>>] + + /// CHECK-START: int Main.UShr28And7(int) instruction_simplifier (after) + /// CHECK-DAG: <<Arg:i\d+>> ParameterValue + /// CHECK-DAG: <<Const28:i\d+>> IntConstant 28 + /// CHECK-DAG: <<Const7:i\d+>> IntConstant 7 + /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Arg>>,<<Const28>>] + /// CHECK-DAG: <<And:i\d+>> And [<<UShr>>,<<Const7>>] + /// CHECK-DAG: Return [<<And>>] + + public static int UShr28And7(int arg) { + return (arg >>> 28) & 7; + } + + /// CHECK-START: long Main.UShr60And7(long) instruction_simplifier (before) + /// CHECK-DAG: <<Arg:j\d+>> ParameterValue + /// CHECK-DAG: <<Const60:i\d+>> IntConstant 60 + /// CHECK-DAG: <<Const7:j\d+>> LongConstant 7 + /// CHECK-DAG: <<UShr:j\d+>> UShr [<<Arg>>,<<Const60>>] + /// CHECK-DAG: <<And:j\d+>> And [<<UShr>>,<<Const7>>] + /// CHECK-DAG: Return [<<And>>] + + /// CHECK-START: long Main.UShr60And7(long) instruction_simplifier (after) + /// CHECK-DAG: <<Arg:j\d+>> ParameterValue + /// CHECK-DAG: <<Const60:i\d+>> IntConstant 60 + /// CHECK-DAG: <<Const7:j\d+>> LongConstant 7 + /// CHECK-DAG: <<UShr:j\d+>> UShr [<<Arg>>,<<Const60>>] + /// CHECK-DAG: <<And:j\d+>> And [<<UShr>>,<<Const7>>] + /// CHECK-DAG: Return [<<And>>] + + public static long UShr60And7(long arg) { + return (arg >>> 60) & 7; + } + + /// CHECK-START: int Main.Shr24And255(int) instruction_simplifier (before) + /// CHECK-DAG: <<Arg:i\d+>> ParameterValue + /// CHECK-DAG: <<Const24:i\d+>> IntConstant 24 + /// CHECK-DAG: <<Const255:i\d+>> IntConstant 255 + /// CHECK-DAG: <<Shr:i\d+>> Shr [<<Arg>>,<<Const24>>] + /// CHECK-DAG: <<And:i\d+>> And [<<Shr>>,<<Const255>>] + /// CHECK-DAG: Return [<<And>>] + + /// CHECK-START: int Main.Shr24And255(int) instruction_simplifier (after) + /// CHECK-DAG: <<Arg:i\d+>> ParameterValue + /// CHECK-DAG: <<Const24:i\d+>> IntConstant 24 + /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Arg>>,<<Const24>>] + /// CHECK-DAG: Return [<<UShr>>] + + /// CHECK-START: int Main.Shr24And255(int) instruction_simplifier (after) + /// CHECK-NOT: Shr + /// CHECK-NOT: And + + public static int Shr24And255(int arg) { + return (arg >> 24) & 255; + } + + /// CHECK-START: long Main.Shr56And255(long) instruction_simplifier (before) + /// CHECK-DAG: <<Arg:j\d+>> ParameterValue + /// CHECK-DAG: <<Const56:i\d+>> IntConstant 56 + /// CHECK-DAG: <<Const255:j\d+>> LongConstant 255 + /// CHECK-DAG: <<Shr:j\d+>> Shr [<<Arg>>,<<Const56>>] + /// CHECK-DAG: <<And:j\d+>> And [<<Shr>>,<<Const255>>] + /// CHECK-DAG: Return [<<And>>] + + /// CHECK-START: long Main.Shr56And255(long) instruction_simplifier (after) + /// CHECK-DAG: <<Arg:j\d+>> ParameterValue + /// CHECK-DAG: <<Const56:i\d+>> IntConstant 56 + /// CHECK-DAG: <<UShr:j\d+>> UShr [<<Arg>>,<<Const56>>] + /// CHECK-DAG: Return [<<UShr>>] + + /// CHECK-START: long Main.Shr56And255(long) instruction_simplifier (after) + /// CHECK-NOT: Shr + /// CHECK-NOT: And + + public static long Shr56And255(long arg) { + return (arg >> 56) & 255; + } + + /// CHECK-START: int Main.Shr24And127(int) instruction_simplifier (before) + /// CHECK-DAG: <<Arg:i\d+>> ParameterValue + /// CHECK-DAG: <<Const24:i\d+>> IntConstant 24 + /// CHECK-DAG: <<Const127:i\d+>> IntConstant 127 + /// CHECK-DAG: <<Shr:i\d+>> Shr [<<Arg>>,<<Const24>>] + /// CHECK-DAG: <<And:i\d+>> And [<<Shr>>,<<Const127>>] + /// CHECK-DAG: Return [<<And>>] + + /// CHECK-START: int Main.Shr24And127(int) instruction_simplifier (after) + /// CHECK-DAG: <<Arg:i\d+>> ParameterValue + /// CHECK-DAG: <<Const24:i\d+>> IntConstant 24 + /// CHECK-DAG: <<Const127:i\d+>> IntConstant 127 + /// CHECK-DAG: <<Shr:i\d+>> Shr [<<Arg>>,<<Const24>>] + /// CHECK-DAG: <<And:i\d+>> And [<<Shr>>,<<Const127>>] + /// CHECK-DAG: Return [<<And>>] + + public static int Shr24And127(int arg) { + return (arg >> 24) & 127; + } + + /// CHECK-START: long Main.Shr56And127(long) instruction_simplifier (before) + /// CHECK-DAG: <<Arg:j\d+>> ParameterValue + /// CHECK-DAG: <<Const56:i\d+>> IntConstant 56 + /// CHECK-DAG: <<Const127:j\d+>> LongConstant 127 + /// CHECK-DAG: <<Shr:j\d+>> Shr [<<Arg>>,<<Const56>>] + /// CHECK-DAG: <<And:j\d+>> And [<<Shr>>,<<Const127>>] + /// CHECK-DAG: Return [<<And>>] + + /// CHECK-START: long Main.Shr56And127(long) instruction_simplifier (after) + /// CHECK-DAG: <<Arg:j\d+>> ParameterValue + /// CHECK-DAG: <<Const56:i\d+>> IntConstant 56 + /// CHECK-DAG: <<Const127:j\d+>> LongConstant 127 + /// CHECK-DAG: <<Shr:j\d+>> Shr [<<Arg>>,<<Const56>>] + /// CHECK-DAG: <<And:j\d+>> And [<<Shr>>,<<Const127>>] + /// CHECK-DAG: Return [<<And>>] + + public static long Shr56And127(long arg) { + return (arg >> 56) & 127; + } + /// CHECK-START: long Main.Div1(long) instruction_simplifier (before) /// CHECK-DAG: <<Arg:j\d+>> ParameterValue /// CHECK-DAG: <<Const1:j\d+>> LongConstant 1 @@ -1109,5 +1275,13 @@ public class Main { assertFloatEquals(DivMP25(100.0f), -400.0f); assertDoubleEquals(DivMP25(150.0), -600.0); assertLongEquals(Shl1(100), 200); + assertIntEquals(UShr28And15(0xc1234567), 0xc); + assertLongEquals(UShr60And15(0xc123456787654321L), 0xcL); + assertIntEquals(UShr28And7(0xc1234567), 0x4); + assertLongEquals(UShr60And7(0xc123456787654321L), 0x4L); + assertIntEquals(Shr24And255(0xc1234567), 0xc1); + assertLongEquals(Shr56And255(0xc123456787654321L), 0xc1L); + assertIntEquals(Shr24And127(0xc1234567), 0x41); + assertLongEquals(Shr56And127(0xc123456787654321L), 0x41L); } } diff --git a/test/461-get-reference-vreg/expected.txt b/test/461-get-reference-vreg/expected.txt index e69de29bb2..6a5618ebc6 100644 --- a/test/461-get-reference-vreg/expected.txt +++ b/test/461-get-reference-vreg/expected.txt @@ -0,0 +1 @@ +JNI_OnLoad called diff --git a/test/466-get-live-vreg/expected.txt b/test/466-get-live-vreg/expected.txt index e69de29bb2..6a5618ebc6 100644 --- a/test/466-get-live-vreg/expected.txt +++ b/test/466-get-live-vreg/expected.txt @@ -0,0 +1 @@ +JNI_OnLoad called diff --git a/test/466-get-live-vreg/get_live_vreg_jni.cc b/test/466-get-live-vreg/get_live_vreg_jni.cc index e3e00918f8..7e9a583faf 100644 --- a/test/466-get-live-vreg/get_live_vreg_jni.cc +++ b/test/466-get-live-vreg/get_live_vreg_jni.cc @@ -42,7 +42,9 @@ class TestVisitor : public StackVisitor { } else if (m_name.compare("testIntervalHole") == 0) { found_method_ = true; uint32_t value = 0; - if (GetCurrentQuickFrame() != nullptr && m->IsOptimized(sizeof(void*))) { + if (GetCurrentQuickFrame() != nullptr && + m->IsOptimized(sizeof(void*)) && + !Runtime::Current()->IsDebuggable()) { CHECK_EQ(GetVReg(m, 0, kIntVReg, &value), false); } else { CHECK(GetVReg(m, 0, kIntVReg, &value)); diff --git a/test/482-checker-loop-back-edge-use/src/Main.java b/test/482-checker-loop-back-edge-use/src/Main.java index 2cfb04d652..6b4da9de27 100644 --- a/test/482-checker-loop-back-edge-use/src/Main.java +++ b/test/482-checker-loop-back-edge-use/src/Main.java @@ -18,16 +18,27 @@ public class Main { /// CHECK-START: void Main.loop1(boolean) liveness (after) - /// CHECK: ParameterValue liveness:2 ranges:{[2,22)} uses:[17,22] - /// CHECK: Goto liveness:20 + /// CHECK: <<Arg:z\d+>> ParameterValue liveness:<<ArgLiv:\d+>> ranges:{[<<ArgLiv>>,<<ArgLoopUse:\d+>>)} uses:[<<ArgUse:\d+>>,<<ArgLoopUse>>] + /// CHECK: If [<<Arg>>] liveness:<<IfLiv:\d+>> + /// CHECK: Goto liveness:<<GotoLiv:\d+>> + /// CHECK: Exit + /// CHECK-EVAL: <<IfLiv>> + 1 == <<ArgUse>> + /// CHECK-EVAL: <<GotoLiv>> + 2 == <<ArgLoopUse>> + public static void loop1(boolean incoming) { while (incoming) {} } /// CHECK-START: void Main.loop2(boolean) liveness (after) - /// CHECK: ParameterValue liveness:4 ranges:{[4,44)} uses:[35,40,44] - /// CHECK: Goto liveness:38 - /// CHECK: Goto liveness:42 + /// CHECK: <<Arg:z\d+>> ParameterValue liveness:<<ArgLiv:\d+>> ranges:{[<<ArgLiv>>,<<ArgLoopUse2:\d+>>)} uses:[<<ArgUse:\d+>>,<<ArgLoopUse1:\d+>>,<<ArgLoopUse2>>] + /// CHECK: If [<<Arg>>] liveness:<<IfLiv:\d+>> + /// CHECK: Goto liveness:<<GotoLiv1:\d+>> + /// CHECK: Goto liveness:<<GotoLiv2:\d+>> + /// CHECK-EVAL: <<IfLiv>> + 1 == <<ArgUse>> + /// CHECK-EVAL: <<GotoLiv1>> < <<GotoLiv2>> + /// CHECK-EVAL: <<GotoLiv1>> + 2 == <<ArgLoopUse1>> + /// CHECK-EVAL: <<GotoLiv2>> + 2 == <<ArgLoopUse2>> + public static void loop2(boolean incoming) { while (true) { System.out.println("foo"); @@ -36,11 +47,14 @@ public class Main { } /// CHECK-START: void Main.loop3(boolean) liveness (after) - /// CHECK: ParameterValue liveness:4 ranges:{[4,60)} uses:[56,60] - /// CHECK: Goto liveness:58 + /// CHECK: <<Arg:z\d+>> ParameterValue liveness:<<ArgLiv:\d+>> ranges:{[<<ArgLiv>>,<<ArgLoopUse:\d+>>)} uses:[<<ArgUse:\d+>>,<<ArgLoopUse>>] + /// CHECK: Goto liveness:<<GotoLiv1:\d+>> + /// CHECK: InvokeVirtual [{{l\d+}},<<Arg>>] method_name:java.io.PrintStream.println liveness:<<InvokeLiv:\d+>> + /// CHECK: Goto liveness:<<GotoLiv2:\d+>> + /// CHECK-EVAL: <<InvokeLiv>> == <<ArgUse>> + /// CHECK-EVAL: <<GotoLiv1>> < <<GotoLiv2>> + /// CHECK-EVAL: <<GotoLiv2>> + 2 == <<ArgLoopUse>> - // CHECK-START: void Main.loop3(boolean) liveness (after) - // CHECK-NOT: Goto liveness:50 public static void loop3(boolean incoming) { // 'incoming' only needs a use at the outer loop's back edge. while (System.currentTimeMillis() != 42) { @@ -49,11 +63,11 @@ public class Main { } } - // CHECK-START: void Main.loop4(boolean) liveness (after) - // CHECK: ParameterValue liveness:4 ranges:{[4,22)} uses:[22] + /// CHECK-START: void Main.loop4(boolean) liveness (after) + /// CHECK: <<Arg:z\d+>> ParameterValue liveness:<<ArgLiv:\d+>> ranges:{[<<ArgLiv>>,<<ArgUse:\d+>>)} uses:[<<ArgUse>>] + /// CHECK: InvokeVirtual [{{l\d+}},<<Arg>>] method_name:java.io.PrintStream.println liveness:<<InvokeLiv:\d+>> + /// CHECK-EVAL: <<InvokeLiv>> == <<ArgUse>> - // CHECK-START: void Main.loop4(boolean) liveness (after) - // CHECK-NOT: Goto liveness:18 public static void loop4(boolean incoming) { // 'incoming' has no loop use, so should not have back edge uses. System.out.println(incoming); @@ -63,59 +77,98 @@ public class Main { } /// CHECK-START: void Main.loop5(boolean) liveness (after) - /// CHECK: ParameterValue liveness:4 ranges:{[4,54)} uses:[37,46,50,54] - /// CHECK: Goto liveness:48 - /// CHECK: Goto liveness:52 + /// CHECK: <<Arg:z\d+>> ParameterValue liveness:<<ArgLiv:\d+>> ranges:{[<<ArgLiv>>,<<ArgLoopUse2:\d+>>)} uses:[<<ArgUse:\d+>>,<<ArgLoopUse1:\d+>>,<<ArgLoopUse2>>] + /// CHECK: InvokeVirtual [{{l\d+}},<<Arg>>] method_name:java.io.PrintStream.println liveness:<<InvokeLiv:\d+>> + /// CHECK: Goto liveness:<<GotoLiv1:\d+>> + /// CHECK: Goto liveness:<<GotoLiv2:\d+>> + /// CHECK: Exit + /// CHECK-EVAL: <<InvokeLiv>> == <<ArgUse>> + /// CHECK-EVAL: <<GotoLiv1>> < <<GotoLiv2>> + /// CHECK-EVAL: <<GotoLiv1>> + 2 == <<ArgLoopUse1>> + /// CHECK-EVAL: <<GotoLiv2>> + 2 == <<ArgLoopUse2>> + public static void loop5(boolean incoming) { // 'incoming' must have a use at both back edges. - while (Runtime.getRuntime() != null) { - while (incoming) { + for (long i = System.nanoTime(); i < 42; ++i) { + for (long j = System.currentTimeMillis(); j != 42; ++j) { System.out.println(incoming); } } } /// CHECK-START: void Main.loop6(boolean) liveness (after) - /// CHECK: ParameterValue liveness:4 ranges:{[4,50)} uses:[26,50] - /// CHECK: Goto liveness:48 + /// CHECK: <<Arg:z\d+>> ParameterValue liveness:<<ArgLiv:\d+>> ranges:{[<<ArgLiv>>,<<ArgLoopUse:\d+>>)} uses:[<<ArgUse:\d+>>,<<ArgLoopUse>>] + /// CHECK: InvokeVirtual [{{l\d+}},<<Arg>>] method_name:java.io.PrintStream.println liveness:<<InvokeLiv:\d+>> + /// CHECK: Add + /// CHECK: Goto liveness:<<GotoLiv1:\d+>> + /// CHECK: Add + /// CHECK: Goto liveness:<<GotoLiv2:\d+>> + /// CHECK: Exit + /// CHECK-EVAL: <<InvokeLiv>> == <<ArgUse>> + /// CHECK-EVAL: <<GotoLiv1>> < <<GotoLiv2>> + /// CHECK-EVAL: <<GotoLiv2>> + 2 == <<ArgLoopUse>> - /// CHECK-START: void Main.loop6(boolean) liveness (after) - /// CHECK-NOT: Goto liveness:24 public static void loop6(boolean incoming) { // 'incoming' must have a use only at the first loop's back edge. - while (true) { + for (long i = System.nanoTime(); i < 42; ++i) { System.out.println(incoming); - while (Runtime.getRuntime() != null) {} + for (long j = System.currentTimeMillis(); j != 42; ++j) {} } } /// CHECK-START: void Main.loop7(boolean) liveness (after) - /// CHECK: ParameterValue liveness:4 ranges:{[4,54)} uses:[36,45,50,54] - /// CHECK: Goto liveness:48 - /// CHECK: Goto liveness:52 + /// CHECK: <<Arg:z\d+>> ParameterValue liveness:<<ArgLiv:\d+>> ranges:{[<<ArgLiv>>,<<ArgLoopUse2:\d+>>)} uses:[<<ArgUse1:\d+>>,<<ArgUse2:\d+>>,<<ArgLoopUse1:\d+>>,<<ArgLoopUse2>>] + /// CHECK: InvokeVirtual [{{l\d+}},<<Arg>>] method_name:java.io.PrintStream.println liveness:<<InvokeLiv:\d+>> + /// CHECK: If [<<Arg>>] liveness:<<IfLiv:\d+>> + /// CHECK: Goto liveness:<<GotoLiv1:\d+>> + /// CHECK: Goto liveness:<<GotoLiv2:\d+>> + /// CHECK: Exit + /// CHECK-EVAL: <<InvokeLiv>> == <<ArgUse1>> + /// CHECK-EVAL: <<IfLiv>> + 1 == <<ArgUse2>> + /// CHECK-EVAL: <<GotoLiv1>> < <<GotoLiv2>> + /// CHECK-EVAL: <<GotoLiv1>> + 2 == <<ArgLoopUse1>> + /// CHECK-EVAL: <<GotoLiv2>> + 2 == <<ArgLoopUse2>> + public static void loop7(boolean incoming) { // 'incoming' must have a use at both back edges. while (Runtime.getRuntime() != null) { System.out.println(incoming); while (incoming) {} + System.nanoTime(); // beat back edge splitting } } /// CHECK-START: void Main.loop8() liveness (after) - /// CHECK: StaticFieldGet liveness:14 ranges:{[14,48)} uses:[39,44,48] - /// CHECK: Goto liveness:42 - /// CHECK: Goto liveness:46 + /// CHECK: <<Arg:z\d+>> StaticFieldGet liveness:<<ArgLiv:\d+>> ranges:{[<<ArgLiv>>,<<ArgLoopUse2:\d+>>)} uses:[<<ArgUse:\d+>>,<<ArgLoopUse1:\d+>>,<<ArgLoopUse2>>] + /// CHECK: If [<<Arg>>] liveness:<<IfLiv:\d+>> + /// CHECK: Goto liveness:<<GotoLiv1:\d+>> + /// CHECK: Goto liveness:<<GotoLiv2:\d+>> + /// CHECK: Exit + /// CHECK-EVAL: <<IfLiv>> + 1 == <<ArgUse>> + /// CHECK-EVAL: <<GotoLiv1>> < <<GotoLiv2>> + /// CHECK-EVAL: <<GotoLiv1>> + 2 == <<ArgLoopUse1>> + /// CHECK-EVAL: <<GotoLiv2>> + 2 == <<ArgLoopUse2>> + public static void loop8() { // 'incoming' must have a use at both back edges. boolean incoming = field; while (Runtime.getRuntime() != null) { + System.nanoTime(); // beat pre-header creation while (incoming) {} + System.nanoTime(); // beat back edge splitting } } /// CHECK-START: void Main.loop9() liveness (after) - /// CHECK: StaticFieldGet liveness:26 ranges:{[26,40)} uses:[35,40] - /// CHECK: Goto liveness:42 + /// CHECK: <<Arg:z\d+>> StaticFieldGet liveness:<<ArgLiv:\d+>> ranges:{[<<ArgLiv>>,<<ArgLoopUse:\d+>>)} uses:[<<ArgUse:\d+>>,<<ArgLoopUse>>] + /// CHECK: If [<<Arg>>] liveness:<<IfLiv:\d+>> + /// CHECK: Goto liveness:<<GotoLiv1:\d+>> + /// CHECK: Goto liveness:<<GotoLiv2:\d+>> + /// CHECK: Exit + /// CHECK-EVAL: <<IfLiv>> + 1 == <<ArgUse>> + /// CHECK-EVAL: <<GotoLiv1>> < <<GotoLiv2>> + /// CHECK-EVAL: <<GotoLiv1>> + 2 == <<ArgLoopUse>> + public static void loop9() { while (Runtime.getRuntime() != null) { // 'incoming' must only have a use in the inner loop. diff --git a/test/485-checker-dce-switch/expected.txt b/test/485-checker-dce-switch/expected.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/test/485-checker-dce-switch/expected.txt diff --git a/test/485-checker-dce-switch/info.txt b/test/485-checker-dce-switch/info.txt new file mode 100644 index 0000000000..6653526827 --- /dev/null +++ b/test/485-checker-dce-switch/info.txt @@ -0,0 +1 @@ +Tests that DCE can remove a packed switch. diff --git a/test/485-checker-dce-switch/src/Main.java b/test/485-checker-dce-switch/src/Main.java new file mode 100644 index 0000000000..019d876ec8 --- /dev/null +++ b/test/485-checker-dce-switch/src/Main.java @@ -0,0 +1,192 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class Main { + + public static int $inline$method() { + return 5; + } + + /// CHECK-START: int Main.wholeSwitchDead(int) dead_code_elimination_final (before) + /// CHECK-DAG: PackedSwitch + + /// CHECK-START: int Main.wholeSwitchDead(int) dead_code_elimination_final (after) + /// CHECK-DAG: <<Const100:i\d+>> IntConstant 100 + /// CHECK-DAG: Return [<<Const100>>] + + /// CHECK-START: int Main.wholeSwitchDead(int) dead_code_elimination_final (after) + /// CHECK-NOT: PackedSwitch + + public static int wholeSwitchDead(int j) { + int i = $inline$method(); + int l = 100; + if (i > 100) { + switch(j) { + case 1: + i++; + break; + case 2: + i = 99; + break; + case 3: + i = 100; + break; + case 4: + i = -100; + break; + case 5: + i = 7; + break; + case 6: + i = -9; + break; + } + l += i; + } + + return l; + } + + /// CHECK-START: int Main.constantSwitch_InRange() dead_code_elimination_final (before) + /// CHECK-DAG: PackedSwitch + + /// CHECK-START: int Main.constantSwitch_InRange() dead_code_elimination_final (after) + /// CHECK-DAG: <<Const7:i\d+>> IntConstant 7 + /// CHECK-DAG: Return [<<Const7>>] + + /// CHECK-START: int Main.constantSwitch_InRange() dead_code_elimination_final (after) + /// CHECK-NOT: PackedSwitch + + public static int constantSwitch_InRange() { + int i = $inline$method(); + switch(i) { + case 1: + i++; + break; + case 2: + i = 99; + break; + case 3: + i = 100; + break; + case 4: + i = -100; + break; + case 5: + i = 7; + break; + case 6: + i = -9; + break; + } + + return i; + } + + /// CHECK-START: int Main.constantSwitch_AboveRange() dead_code_elimination_final (before) + /// CHECK-DAG: PackedSwitch + + /// CHECK-START: int Main.constantSwitch_AboveRange() dead_code_elimination_final (after) + /// CHECK-DAG: <<Const15:i\d+>> IntConstant 15 + /// CHECK-DAG: Return [<<Const15>>] + + /// CHECK-START: int Main.constantSwitch_AboveRange() dead_code_elimination_final (after) + /// CHECK-NOT: PackedSwitch + + public static int constantSwitch_AboveRange() { + int i = $inline$method() + 10; + switch(i) { + case 1: + i++; + break; + case 2: + i = 99; + break; + case 3: + i = 100; + break; + case 4: + i = -100; + break; + case 5: + i = 7; + break; + case 6: + i = -9; + break; + } + + return i; + } + + /// CHECK-START: int Main.constantSwitch_BelowRange() dead_code_elimination_final (before) + /// CHECK-DAG: PackedSwitch + + /// CHECK-START: int Main.constantSwitch_BelowRange() dead_code_elimination_final (after) + /// CHECK-DAG: <<ConstM5:i\d+>> IntConstant -5 + /// CHECK-DAG: Return [<<ConstM5>>] + + /// CHECK-START: int Main.constantSwitch_BelowRange() dead_code_elimination_final (after) + /// CHECK-NOT: PackedSwitch + + public static int constantSwitch_BelowRange() { + int i = $inline$method() - 10; + switch(i) { + case 1: + i++; + break; + case 2: + i = 99; + break; + case 3: + i = 100; + break; + case 4: + i = -100; + break; + case 5: + i = 7; + break; + case 6: + i = -9; + break; + } + + return i; + } + + public static void main(String[] args) throws Exception { + int ret_val = wholeSwitchDead(10); + if (ret_val != 100) { + throw new Error("Incorrect return value from wholeSwitchDead:" + ret_val); + } + + ret_val = constantSwitch_InRange(); + if (ret_val != 7) { + throw new Error("Incorrect return value from constantSwitch_InRange:" + ret_val); + } + + ret_val = constantSwitch_AboveRange(); + if (ret_val != 15) { + throw new Error("Incorrect return value from constantSwitch_AboveRange:" + ret_val); + } + + ret_val = constantSwitch_BelowRange(); + if (ret_val != -5) { + throw new Error("Incorrect return value from constantSwitch_BelowRange:" + ret_val); + } + } +} diff --git a/test/497-inlining-and-class-loader/expected.txt b/test/497-inlining-and-class-loader/expected.txt index f5b9fe07de..905dbfd2cb 100644 --- a/test/497-inlining-and-class-loader/expected.txt +++ b/test/497-inlining-and-class-loader/expected.txt @@ -1,3 +1,4 @@ +JNI_OnLoad called java.lang.Exception at Main.$noinline$bar(Main.java:124) at Level2.$inline$bar(Level1.java:25) diff --git a/test/510-checker-try-catch/smali/Builder.smali b/test/510-checker-try-catch/smali/Builder.smali index 2274ba4d43..1fde5edc23 100644 --- a/test/510-checker-try-catch/smali/Builder.smali +++ b/test/510-checker-try-catch/smali/Builder.smali @@ -59,7 +59,7 @@ ## CHECK: StoreLocal [v0,<<Minus2>>] ## CHECK: name "<<BCatch3>>" -## CHECK: predecessors "<<BEnterTry1>>" "<<BExitTry1>>" "<<BEnterTry2>>" "<<BExitTry2>>" +## CHECK: predecessors "<<BEnterTry1>>" "<<BEnterTry2>>" "<<BExitTry1>>" "<<BExitTry2>>" ## CHECK: successors "<<BReturn>>" ## CHECK: flags "catch_block" ## CHECK: StoreLocal [v0,<<Minus3>>] @@ -70,18 +70,18 @@ ## CHECK: xhandlers "<<BCatch1>>" "<<BCatch3>>" ## CHECK: TryBoundary kind:entry -## CHECK: name "<<BExitTry1>>" -## CHECK: predecessors "<<BTry1>>" -## CHECK: successors "<<BAdd>>" -## CHECK: xhandlers "<<BCatch1>>" "<<BCatch3>>" -## CHECK: TryBoundary kind:exit - ## CHECK: name "<<BEnterTry2>>" ## CHECK: predecessors "<<BAdd>>" ## CHECK: successors "<<BTry2>>" ## CHECK: xhandlers "<<BCatch2>>" "<<BCatch3>>" ## CHECK: TryBoundary kind:entry +## CHECK: name "<<BExitTry1>>" +## CHECK: predecessors "<<BTry1>>" +## CHECK: successors "<<BAdd>>" +## CHECK: xhandlers "<<BCatch1>>" "<<BCatch3>>" +## CHECK: TryBoundary kind:exit + ## CHECK: name "<<BExitTry2>>" ## CHECK: predecessors "<<BTry2>>" ## CHECK: successors "<<BReturn>>" @@ -121,8 +121,7 @@ goto :return .end method -# Test that multiple try-entry blocks are generated if there are multiple entry -# points into the try block. +# Tests try-entry block when there are multiple entry points into the try block. ## CHECK-START: int Builder.testMultipleEntries(int, int, int, int) builder (after) @@ -142,20 +141,20 @@ ## CHECK: name "<<BTry1:B\d+>>" ## CHECK: predecessors "<<BEnterTry1>>" -## CHECK: successors "<<BTry2:B\d+>>" +## CHECK: successors "<<BExitTry1:B\d+>>" ## CHECK: Div -## CHECK: name "<<BTry2>>" -## CHECK: predecessors "<<BEnterTry2>>" "<<BTry1>>" -## CHECK: successors "<<BExitTry:B\d+>>" +## CHECK: name "<<BTry2:B\d+>>" +## CHECK: predecessors "<<BEnterTry2>>" +## CHECK: successors "<<BExitTry2:B\d+>>" ## CHECK: Div ## CHECK: name "<<BReturn:B\d+>>" -## CHECK: predecessors "<<BExitTry>>" "<<BCatch:B\d+>>" +## CHECK: predecessors "<<BExitTry2>>" "<<BCatch:B\d+>>" ## CHECK: Return ## CHECK: name "<<BCatch>>" -## CHECK: predecessors "<<BEnterTry1>>" "<<BEnterTry2>>" "<<BExitTry>>" +## CHECK: predecessors "<<BEnterTry1>>" "<<BEnterTry2>>" "<<BExitTry1>>" "<<BExitTry2>>" ## CHECK: successors "<<BReturn>>" ## CHECK: flags "catch_block" ## CHECK: StoreLocal [v0,<<Minus1>>] @@ -167,12 +166,18 @@ ## CHECK: TryBoundary kind:entry ## CHECK: name "<<BEnterTry2>>" -## CHECK: predecessors "<<BIf>>" +## CHECK: predecessors "<<BIf>>" "<<BExitTry1>>" ## CHECK: successors "<<BTry2>>" ## CHECK: xhandlers "<<BCatch>>" ## CHECK: TryBoundary kind:entry -## CHECK: name "<<BExitTry>>" +## CHECK: name "<<BExitTry1>>" +## CHECK: predecessors "<<BTry1>>" +## CHECK: successors "<<BEnterTry2>>" +## CHECK: xhandlers "<<BCatch>>" +## CHECK: TryBoundary kind:exit + +## CHECK: name "<<BExitTry2>>" ## CHECK: predecessors "<<BTry2>>" ## CHECK: successors "<<BReturn>>" ## CHECK: xhandlers "<<BCatch>>" @@ -314,18 +319,18 @@ ## CHECK: xhandlers "<<BCatch1>>" ## CHECK: TryBoundary kind:entry -## CHECK: name "<<BExit1>>" -## CHECK: predecessors "<<BTry1>>" -## CHECK: successors "<<BEnter2>>" -## CHECK: xhandlers "<<BCatch1>>" -## CHECK: TryBoundary kind:exit - ## CHECK: name "<<BEnter2>>" ## CHECK: predecessors "<<BExit1>>" ## CHECK: successors "<<BTry2>>" ## CHECK: xhandlers "<<BCatch2>>" ## CHECK: TryBoundary kind:entry +## CHECK: name "<<BExit1>>" +## CHECK: predecessors "<<BTry1>>" +## CHECK: successors "<<BEnter2>>" +## CHECK: xhandlers "<<BCatch1>>" +## CHECK: TryBoundary kind:exit + ## CHECK: name "<<BExit2>>" ## CHECK: predecessors "<<BTry2>>" ## CHECK: successors "<<BReturn>>" @@ -402,18 +407,18 @@ ## CHECK: xhandlers "<<BCatch1>>" ## CHECK: TryBoundary kind:entry -## CHECK: name "<<BExit1>>" -## CHECK: predecessors "<<BTry1>>" -## CHECK: successors "<<BReturn>>" -## CHECK: xhandlers "<<BCatch1>>" -## CHECK: TryBoundary kind:exit - ## CHECK: name "<<BEnter2>>" ## CHECK: predecessors "<<BGoto>>" ## CHECK: successors "<<BTry2>>" ## CHECK: xhandlers "<<BCatch2>>" ## CHECK: TryBoundary kind:entry +## CHECK: name "<<BExit1>>" +## CHECK: predecessors "<<BTry1>>" +## CHECK: successors "<<BReturn>>" +## CHECK: xhandlers "<<BCatch1>>" +## CHECK: TryBoundary kind:exit + ## CHECK: name "<<BExit2>>" ## CHECK: predecessors "<<BTry2>>" ## CHECK: successors "<<BEnter1>>" @@ -483,7 +488,7 @@ ## CHECK: StoreLocal [v0,<<Minus1>>] ## CHECK: name "<<BCatchAll>>" -## CHECK: predecessors "<<BEnter1>>" "<<BExit1>>" "<<BEnter2>>" "<<BExit2>>" "<<BEnter3>>" "<<BExit3>>" +## CHECK: predecessors "<<BEnter1>>" "<<BEnter2>>" "<<BEnter3>>" "<<BExit1>>" "<<BExit2>>" "<<BExit3>>" ## CHECK: successors "<<BReturn>>" ## CHECK: flags "catch_block" ## CHECK: StoreLocal [v0,<<Minus2>>] @@ -494,30 +499,30 @@ ## CHECK: xhandlers "<<BCatchAll>>" ## CHECK: TryBoundary kind:entry -## CHECK: name "<<BExit1>>" -## CHECK: predecessors "<<BTry1>>" -## CHECK: successors "<<BEnter2>>" -## CHECK: xhandlers "<<BCatchAll>>" -## CHECK: TryBoundary kind:exit - ## CHECK: name "<<BEnter2>>" ## CHECK: predecessors "<<BExit1>>" ## CHECK: successors "<<BTry2>>" ## CHECK: xhandlers "<<BCatchArith>>" "<<BCatchAll>>" ## CHECK: TryBoundary kind:entry -## CHECK: name "<<BExit2>>" -## CHECK: predecessors "<<BTry2>>" -## CHECK: successors "<<BEnter3>>" -## CHECK: xhandlers "<<BCatchArith>>" "<<BCatchAll>>" -## CHECK: TryBoundary kind:exit - ## CHECK: name "<<BEnter3>>" ## CHECK: predecessors "<<BExit2>>" ## CHECK: successors "<<BTry3>>" ## CHECK: xhandlers "<<BCatchAll>>" ## CHECK: TryBoundary kind:entry +## CHECK: name "<<BExit1>>" +## CHECK: predecessors "<<BTry1>>" +## CHECK: successors "<<BEnter2>>" +## CHECK: xhandlers "<<BCatchAll>>" +## CHECK: TryBoundary kind:exit + +## CHECK: name "<<BExit2>>" +## CHECK: predecessors "<<BTry2>>" +## CHECK: successors "<<BEnter3>>" +## CHECK: xhandlers "<<BCatchArith>>" "<<BCatchAll>>" +## CHECK: TryBoundary kind:exit + ## CHECK: name "<<BExit3>>" ## CHECK: predecessors "<<BTry3>>" ## CHECK: successors "<<BReturn>>" @@ -577,7 +582,7 @@ ## CHECK: Div ## CHECK: name "<<BCatch>>" -## CHECK: predecessors "<<BEnterTry1>>" "<<BExitTry1>>" "<<BEnterTry2>>" "<<BExitTry2>>" +## CHECK: predecessors "<<BEnterTry1>>" "<<BEnterTry2>>" "<<BExitTry1>>" "<<BExitTry2>>" ## CHECK: successors "<<BReturn>>" ## CHECK: flags "catch_block" ## CHECK: StoreLocal [v0,<<Minus1>>] @@ -588,18 +593,18 @@ ## CHECK: xhandlers "<<BCatch>>" ## CHECK: TryBoundary kind:entry -## CHECK: name "<<BExitTry1>>" -## CHECK: predecessors "<<BTry1>>" -## CHECK: successors "<<BOutside>>" -## CHECK: xhandlers "<<BCatch>>" -## CHECK: TryBoundary kind:exit - ## CHECK: name "<<BEnterTry2>>" ## CHECK: predecessors "<<BOutside>>" ## CHECK: successors "<<BTry2>>" ## CHECK: xhandlers "<<BCatch>>" ## CHECK: TryBoundary kind:entry +## CHECK: name "<<BExitTry1>>" +## CHECK: predecessors "<<BTry1>>" +## CHECK: successors "<<BOutside>>" +## CHECK: xhandlers "<<BCatch>>" +## CHECK: TryBoundary kind:exit + ## CHECK: name "<<BExitTry2>>" ## CHECK: predecessors "<<BTry2>>" ## CHECK: successors "<<BReturn>>" @@ -647,21 +652,21 @@ ## CHECK: name "<<BTry1:B\d+>>" ## CHECK: predecessors "<<BEnterTry1>>" -## CHECK: successors "<<BTry2:B\d+>>" +## CHECK: successors "<<BExitTry1:B\d+>>" ## CHECK: Div -## CHECK: name "<<BTry2>>" -## CHECK: predecessors "<<BEnterTry2>>" "<<BTry1>>" -## CHECK: successors "<<BExitTry:B\d+>>" +## CHECK: name "<<BTry2:B\d+>>" +## CHECK: predecessors "<<BEnterTry2>>" +## CHECK: successors "<<BExitTry2:B\d+>>" ## CHECK: Div ## CHECK: name "<<BOutside>>" -## CHECK: predecessors "<<BPSwitch1>>" "<<BExitTry>>" +## CHECK: predecessors "<<BPSwitch1>>" "<<BExitTry2>>" ## CHECK: successors "<<BCatchReturn:B\d+>>" ## CHECK: Div ## CHECK: name "<<BCatchReturn>>" -## CHECK: predecessors "<<BOutside>>" "<<BEnterTry1>>" "<<BEnterTry2>>" "<<BExitTry>>" +## CHECK: predecessors "<<BOutside>>" "<<BEnterTry1>>" "<<BEnterTry2>>" "<<BExitTry1>>" "<<BExitTry2>>" ## CHECK: flags "catch_block" ## CHECK: Return @@ -677,7 +682,13 @@ ## CHECK: xhandlers "<<BCatchReturn>>" ## CHECK: TryBoundary kind:entry -## CHECK: name "<<BExitTry>>" +## CHECK: name "<<BExitTry1>>" +## CHECK: predecessors "<<BTry1>>" +## CHECK: successors "<<BEnterTry2>>" +## CHECK: xhandlers "<<BCatchReturn>>" +## CHECK: TryBoundary kind:exit + +## CHECK: name "<<BExitTry2>>" ## CHECK: predecessors "<<BTry2>>" ## CHECK: successors "<<BOutside>>" ## CHECK: xhandlers "<<BCatchReturn>>" @@ -741,7 +752,7 @@ ## CHECK: Div ## CHECK: name "<<BCatchReturn>>" -## CHECK: predecessors "<<BOutside>>" "<<BEnterTry1>>" "<<BExitTry1>>" "<<BEnterTry2>>" "<<BExitTry2>>" +## CHECK: predecessors "<<BOutside>>" "<<BEnterTry1>>" "<<BEnterTry2>>" "<<BExitTry1>>" "<<BExitTry2>>" ## CHECK: flags "catch_block" ## CHECK: Return @@ -751,18 +762,18 @@ ## CHECK: xhandlers "<<BCatchReturn>>" ## CHECK: TryBoundary kind:entry -## CHECK: name "<<BExitTry1>>" -## CHECK: predecessors "<<BPSwitch0>>" -## CHECK: successors "<<BPSwitch1>>" -## CHECK: xhandlers "<<BCatchReturn>>" -## CHECK: TryBoundary kind:exit - ## CHECK: name "<<BEnterTry2>>" ## CHECK: predecessors "<<BPSwitch1>>" ## CHECK: successors "<<BTry1>>" ## CHECK: xhandlers "<<BCatchReturn>>" ## CHECK: TryBoundary kind:entry +## CHECK: name "<<BExitTry1>>" +## CHECK: predecessors "<<BPSwitch0>>" +## CHECK: successors "<<BPSwitch1>>" +## CHECK: xhandlers "<<BCatchReturn>>" +## CHECK: TryBoundary kind:exit + ## CHECK: name "<<BExitTry2>>" ## CHECK: predecessors "<<BTry2>>" ## CHECK: successors "<<BOutside>>" @@ -907,7 +918,7 @@ ## CHECK: Div ## CHECK: name "<<BCatch:B\d+>>" -## CHECK: predecessors "<<BExitTry1>>" "<<BEnterTry1>>" "<<BExitTry1>>" "<<BEnterTry2:B\d+>>" "<<BExitTry2:B\d+>>" +## CHECK: predecessors "<<BExitTry1>>" "<<BEnterTry1>>" "<<BEnterTry2:B\d+>>" "<<BExitTry1>>" "<<BExitTry2:B\d+>>" ## CHECK: successors "<<BEnterTry2>>" ## CHECK: flags "catch_block" @@ -928,18 +939,18 @@ ## CHECK: xhandlers "<<BCatch>>" ## CHECK: TryBoundary kind:entry -## CHECK: name "<<BExitTry1>>" -## CHECK: predecessors "<<BTry1>>" -## CHECK: successors "<<BCatch>>" -## CHECK: xhandlers "<<BCatch>>" -## CHECK: TryBoundary kind:exit - ## CHECK: name "<<BEnterTry2>>" ## CHECK: predecessors "<<BCatch>>" ## CHECK: successors "<<BTry2>>" ## CHECK: xhandlers "<<BCatch>>" ## CHECK: TryBoundary kind:entry +## CHECK: name "<<BExitTry1>>" +## CHECK: predecessors "<<BTry1>>" +## CHECK: successors "<<BCatch>>" +## CHECK: xhandlers "<<BCatch>>" +## CHECK: TryBoundary kind:exit + ## CHECK: name "<<BExitTry2>>" ## CHECK: predecessors "<<BTry2>>" ## CHECK: successors "<<BReturn>>" @@ -1001,18 +1012,18 @@ ## CHECK: xhandlers "<<BCatch2>>" ## CHECK: TryBoundary kind:entry -## CHECK: name "<<BExitTry1>>" -## CHECK: predecessors "<<BTry1>>" -## CHECK: successors "<<BCatch2>>" -## CHECK: xhandlers "<<BCatch2>>" -## CHECK: TryBoundary kind:exit - ## CHECK: name "<<BEnterTry2>>" ## CHECK: predecessors "<<BCatch2>>" ## CHECK: successors "<<BTry2>>" ## CHECK: xhandlers "<<BCatch1>>" ## CHECK: TryBoundary kind:entry +## CHECK: name "<<BExitTry1>>" +## CHECK: predecessors "<<BTry1>>" +## CHECK: successors "<<BCatch2>>" +## CHECK: xhandlers "<<BCatch2>>" +## CHECK: TryBoundary kind:exit + ## CHECK: name "<<BExitTry2>>" ## CHECK: predecessors "<<BTry2>>" ## CHECK: successors "<<BReturn>>" @@ -1037,6 +1048,52 @@ return p0 .end method +# Test graph with try/catch inside a loop. + +## CHECK-START: int Builder.testTryInLoop(int, int) builder (after) + +## CHECK: name "B0" +## CHECK: successors "<<BEnterTry:B\d+>>" + +## CHECK: name "<<BTry:B\d+>>" +## CHECK: predecessors "<<BEnterTry>>" +## CHECK: successors "<<BExitTry:B\d+>>" +## CHECK: Div + +## CHECK: name "<<BCatch:B\d+>>" +## CHECK: predecessors "<<BEnterTry>>" "<<BExitTry>>" +## CHECK: successors "<<BEnterTry>>" +## CHECK: flags "catch_block" + +## CHECK: name "<<BExit:B\d+>>" +## CHECK-NOT: predecessors "{{B\d+}}" +## CHECK: end_block + +## CHECK: name "<<BEnterTry>>" +## CHECK: predecessors "B0" +## CHECK: successors "<<BTry>>" +## CHECK: xhandlers "<<BCatch>>" +## CHECK: TryBoundary kind:entry + +## CHECK: name "<<BExitTry>>" +## CHECK: predecessors "<<BTry>>" +## CHECK: successors "<<BEnterTry>>" +## CHECK: xhandlers "<<BCatch>>" +## CHECK: TryBoundary kind:exit + +.method public static testTryInLoop(II)I + .registers 3 + + :try_start + div-int/2addr p0, p1 + goto :try_start + :try_end + .catchall {:try_start .. :try_end} :catch_all + + :catch_all + goto :try_start +.end method + # Test that a MOVE_RESULT instruction is placed into the same block as the # INVOKE it follows, even if there is a try boundary between them. diff --git a/test/526-checker-caller-callee-regs/src/Main.java b/test/526-checker-caller-callee-regs/src/Main.java index a1f33014ef..f402c2cd48 100644 --- a/test/526-checker-caller-callee-regs/src/Main.java +++ b/test/526-checker-caller-callee-regs/src/Main.java @@ -36,6 +36,8 @@ public class Main { // ------------------------------|------------------------|----------------- // ARM64 callee-saved registers | [x20-x29] | x2[0-9] // ARM callee-saved registers | [r5-r8,r10,r11] | r([5-8]|10|11) + // X86 callee-saved registers | [ebp,esi,edi] | e(bp|si|di) + // X86_64 callee-saved registers | [rbx,rbp,r12-15] | r(bx|bp|1[2-5]) /** * Check that a value live across a function call is allocated in a callee @@ -58,7 +60,21 @@ public class Main { /// CHECK: Sub [<<t1>>,<<t2>>] /// CHECK: Return - // TODO: Add tests for other architectures. + /// CHECK-START-X86: int Main.$opt$LiveInCall(int) register (after) + /// CHECK-DAG: <<Arg:i\d+>> ParameterValue + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 + /// CHECK: <<t1:i\d+>> Add [<<Arg>>,<<Const1>>] {{.*->e(bp|si|di)}} + /// CHECK: <<t2:i\d+>> InvokeStaticOrDirect + /// CHECK: Sub [<<t1>>,<<t2>>] + /// CHECK: Return + + /// CHECK-START-X86_64: int Main.$opt$LiveInCall(int) register (after) + /// CHECK-DAG: <<Arg:i\d+>> ParameterValue + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 + /// CHECK: <<t1:i\d+>> Add [<<Arg>>,<<Const1>>] {{.*->r(bx|bp|1[2-5])}} + /// CHECK: <<t2:i\d+>> InvokeStaticOrDirect + /// CHECK: Sub [<<t1>>,<<t2>>] + /// CHECK: Return public static int $opt$LiveInCall(int arg) { int t1 = arg + 1; diff --git a/test/530-checker-loops/src/Main.java b/test/530-checker-loops/src/Main.java index e518a61f88..58c92f1ea4 100644 --- a/test/530-checker-loops/src/Main.java +++ b/test/530-checker-loops/src/Main.java @@ -22,7 +22,7 @@ public class Main { static int sResult; // - // Various sequence variables where bound checks can be removed from loop. + // Various sequence variables used in bound checks. // /// CHECK-START: int Main.linear(int[]) BCE (before) @@ -62,6 +62,19 @@ public class Main { return result; } + /// CHECK-START: int Main.linearVeryObscure(int[]) BCE (before) + /// CHECK-DAG: BoundsCheck + /// CHECK-START: int Main.linearVeryObscure(int[]) BCE (after) + /// CHECK-NOT: BoundsCheck + private static int linearVeryObscure(int[] x) { + int result = 0; + for (int i = 0; i < x.length; i++) { + int k = (-i) + (i << 5) + i - (32 * i) + 5 + (int) i; + result += x[k - 5]; + } + return result; + } + /// CHECK-START: int Main.linearWhile(int[]) BCE (before) /// CHECK-DAG: BoundsCheck /// CHECK-START: int Main.linearWhile(int[]) BCE (after) @@ -75,6 +88,42 @@ public class Main { return result; } + /// CHECK-START: int Main.linearThreeWayPhi(int[]) BCE (before) + /// CHECK-DAG: BoundsCheck + /// CHECK-START: int Main.linearThreeWayPhi(int[]) BCE (after) + /// CHECK-NOT: BoundsCheck + private static int linearThreeWayPhi(int[] x) { + int result = 0; + for (int i = 0; i < x.length; ) { + if (x[i] == 5) { + i++; + continue; + } + result += x[i++]; + } + return result; + } + + /// CHECK-START: int Main.linearFourWayPhi(int[]) BCE (before) + /// CHECK-DAG: BoundsCheck + /// CHECK-START: int Main.linearFourWayPhi(int[]) BCE (after) + /// CHECK-NOT: BoundsCheck + private static int linearFourWayPhi(int[] x) { + int result = 0; + for (int i = 0; i < x.length; ) { + if (x[i] == 5) { + i++; + continue; + } else if (x[i] == 6) { + i++; + result += 7; + continue; + } + result += x[i++]; + } + return result; + } + /// CHECK-START: int Main.wrapAroundThenLinear(int[]) BCE (before) /// CHECK-DAG: BoundsCheck /// CHECK-START: int Main.wrapAroundThenLinear(int[]) BCE (after) @@ -90,6 +139,25 @@ public class Main { return result; } + /// CHECK-START: int Main.wrapAroundThenLinearThreeWayPhi(int[]) BCE (before) + /// CHECK-DAG: BoundsCheck + /// CHECK-START: int Main.wrapAroundThenLinearThreeWayPhi(int[]) BCE (after) + /// CHECK-NOT: BoundsCheck + private static int wrapAroundThenLinearThreeWayPhi(int[] x) { + // Loop with wrap around (length - 1, 0, 1, 2, ..). + int w = x.length - 1; + int result = 0; + for (int i = 0; i < x.length; ) { + if (x[w] == 1) { + w = i++; + continue; + } + result += x[w]; + w = i++; + } + return result; + } + /// CHECK-START: int[] Main.linearWithParameter(int) BCE (before) /// CHECK-DAG: BoundsCheck /// CHECK-START: int[] Main.linearWithParameter(int) BCE (after) @@ -102,6 +170,19 @@ public class Main { return x; } + /// CHECK-START: int[] Main.linearCopy(int[]) BCE (before) + /// CHECK-DAG: BoundsCheck + /// CHECK-START: int[] Main.linearCopy(int[]) BCE (after) + /// CHECK-NOT: BoundsCheck + private static int[] linearCopy(int x[]) { + int n = x.length; + int y[] = new int[n]; + for (int i = 0; i < n; i++) { + y[i] = x[i]; + } + return y; + } + /// CHECK-START: int Main.linearWithCompoundStride() BCE (before) /// CHECK-DAG: BoundsCheck /// CHECK-START: int Main.linearWithCompoundStride() BCE (after) @@ -126,7 +207,7 @@ public class Main { int result = 0; int k = 0; // Range analysis has no problem with a trip-count defined by a - // reasonably large positive stride. + // reasonably large positive stride far away from upper bound. for (int i = 1; i <= 10 * 10000000 + 1; i += 10000000) { result += x[k++]; } @@ -143,7 +224,7 @@ public class Main { int k = 0; // Range analysis conservatively bails due to potential of wrap-around // arithmetic while computing the trip-count for this very large stride. - for (int i = 1; i < 2147483647; i += 195225786) { + for (int i = 1; i < Integer.MAX_VALUE; i += 195225786) { result += x[k++]; } return result; @@ -158,7 +239,7 @@ public class Main { int result = 0; int k = 0; // Range analysis has no problem with a trip-count defined by a - // reasonably large negative stride. + // reasonably large negative stride far away from lower bound. for (int i = -1; i >= -10 * 10000000 - 1; i -= 10000000) { result += x[k++]; } @@ -175,12 +256,80 @@ public class Main { int k = 0; // Range analysis conservatively bails due to potential of wrap-around // arithmetic while computing the trip-count for this very large stride. - for (int i = -2; i > -2147483648; i -= 195225786) { + for (int i = -2; i > Integer.MIN_VALUE; i -= 195225786) { result += x[k++]; } return result; } + /// CHECK-START: int Main.linearForNEUp() BCE (before) + /// CHECK-DAG: BoundsCheck + /// CHECK-START: int Main.linearForNEUp() BCE (after) + /// CHECK-NOT: BoundsCheck + private static int linearForNEUp() { + int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + int result = 0; + for (int i = 0; i != 10; i++) { + result += x[i]; + } + return result; + } + + /// CHECK-START: int Main.linearForNEDown() BCE (before) + /// CHECK-DAG: BoundsCheck + /// CHECK-START: int Main.linearForNEDown() BCE (after) + /// CHECK-NOT: BoundsCheck + private static int linearForNEDown() { + int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + int result = 0; + for (int i = 9; i != -1; i--) { + result += x[i]; + } + return result; + } + + /// CHECK-START: int Main.linearDoWhileUp() BCE (before) + /// CHECK-DAG: BoundsCheck + /// CHECK-START: int Main.linearDoWhileUp() BCE (after) + /// CHECK-NOT: BoundsCheck + private static int linearDoWhileUp() { + int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + int result = 0; + int i = 0; + do { + result += x[i++]; + } while (i < 10); + return result; + } + + /// CHECK-START: int Main.linearDoWhileDown() BCE (before) + /// CHECK-DAG: BoundsCheck + /// CHECK-START: int Main.linearDoWhileDown() BCE (after) + /// CHECK-NOT: BoundsCheck + private static int linearDoWhileDown() { + int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + int result = 0; + int i = 9; + do { + result += x[i--]; + } while (0 <= i); + return result; + } + + /// CHECK-START: int Main.linearShort() BCE (before) + /// CHECK-DAG: BoundsCheck + /// CHECK-START: int Main.linearShort() BCE (after) + /// CHECK-DAG: BoundsCheck + private static int linearShort() { + int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + int result = 0; + // TODO: make this work + for (short i = 0; i < 10; i++) { + result += x[i]; + } + return result; + } + /// CHECK-START: int Main.periodicIdiom(int) BCE (before) /// CHECK-DAG: BoundsCheck /// CHECK-START: int Main.periodicIdiom(int) BCE (after) @@ -242,23 +391,156 @@ public class Main { return result; } - // - // Cases that actually go out of bounds. These test cases - // ensure the exceptions are thrown at the right places. - // + /// CHECK-START: int Main.justRightUp1() BCE (before) + /// CHECK-DAG: BoundsCheck + /// CHECK-START: int Main.justRightUp1() BCE (after) + /// CHECK-NOT: BoundsCheck + private static int justRightUp1() { + int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + int result = 0; + for (int i = Integer.MAX_VALUE - 10, k = 0; i < Integer.MAX_VALUE; i++) { + result += x[k++]; + } + return result; + } + /// CHECK-START: int Main.justRightUp2() BCE (before) + /// CHECK-DAG: BoundsCheck + /// CHECK-START: int Main.justRightUp2() BCE (after) + /// CHECK-NOT: BoundsCheck + private static int justRightUp2() { + int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + int result = 0; + for (int i = Integer.MAX_VALUE - 10; i < Integer.MAX_VALUE; i++) { + result += x[i - Integer.MAX_VALUE + 10]; + } + return result; + } + + /// CHECK-START: int Main.justRightUp3() BCE (before) + /// CHECK-DAG: BoundsCheck + /// CHECK-START: int Main.justRightUp3() BCE (after) + /// CHECK-NOT: BoundsCheck + private static int justRightUp3() { + int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + int result = 0; + for (int i = Integer.MAX_VALUE - 10, k = 0; i <= Integer.MAX_VALUE - 1; i++) { + result += x[k++]; + } + return result; + } + + /// CHECK-START: int Main.justOOBUp() BCE (before) + /// CHECK-DAG: BoundsCheck + /// CHECK-START: int Main.justOOBUp() BCE (after) + /// CHECK-DAG: BoundsCheck + private static int justOOBUp() { + int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + int result = 0; + // Infinite loop! + for (int i = Integer.MAX_VALUE - 9, k = 0; i <= Integer.MAX_VALUE; i++) { + result += x[k++]; + } + return result; + } + + /// CHECK-START: int Main.justRightDown1() BCE (before) + /// CHECK-DAG: BoundsCheck + /// CHECK-START: int Main.justRightDown1() BCE (after) + /// CHECK-NOT: BoundsCheck + private static int justRightDown1() { + int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + int result = 0; + for (int i = Integer.MIN_VALUE + 10, k = 0; i > Integer.MIN_VALUE; i--) { + result += x[k++]; + } + return result; + } + + /// CHECK-START: int Main.justRightDown2() BCE (before) + /// CHECK-DAG: BoundsCheck + /// CHECK-START: int Main.justRightDown2() BCE (after) + /// CHECK-NOT: BoundsCheck + private static int justRightDown2() { + int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + int result = 0; + for (int i = Integer.MIN_VALUE + 10; i > Integer.MIN_VALUE; i--) { + result += x[Integer.MAX_VALUE + i]; + } + return result; + } + + /// CHECK-START: int Main.justRightDown3() BCE (before) + /// CHECK-DAG: BoundsCheck + /// CHECK-START: int Main.justRightDown3() BCE (after) + /// CHECK-NOT: BoundsCheck + private static int justRightDown3() { + int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + int result = 0; + for (int i = Integer.MIN_VALUE + 10, k = 0; i >= Integer.MIN_VALUE + 1; i--) { + result += x[k++]; + } + return result; + } + + /// CHECK-START: int Main.justOOBDown() BCE (before) + /// CHECK-DAG: BoundsCheck + /// CHECK-START: int Main.justOOBDown() BCE (after) + /// CHECK-DAG: BoundsCheck + private static int justOOBDown() { + int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + int result = 0; + // Infinite loop! + for (int i = Integer.MIN_VALUE + 9, k = 0; i >= Integer.MIN_VALUE; i--) { + result += x[k++]; + } + return result; + } + + /// CHECK-START: void Main.lowerOOB(int[]) BCE (before) + /// CHECK-DAG: BoundsCheck + /// CHECK-START: void Main.lowerOOB(int[]) BCE (after) + /// CHECK-DAG: BoundsCheck private static void lowerOOB(int[] x) { for (int i = -1; i < x.length; i++) { sResult += x[i]; } } + /// CHECK-START: void Main.upperOOB(int[]) BCE (before) + /// CHECK-DAG: BoundsCheck + /// CHECK-START: void Main.upperOOB(int[]) BCE (after) + /// CHECK-DAG: BoundsCheck private static void upperOOB(int[] x) { for (int i = 0; i <= x.length; i++) { sResult += x[i]; } } + /// CHECK-START: void Main.doWhileUpOOB() BCE (before) + /// CHECK-DAG: BoundsCheck + /// CHECK-START: void Main.doWhileUpOOB() BCE (after) + /// CHECK-DAG: BoundsCheck + private static void doWhileUpOOB() { + int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + int i = 0; + do { + sResult += x[i++]; + } while (i <= x.length); + } + + /// CHECK-START: void Main.doWhileDownOOB() BCE (before) + /// CHECK-DAG: BoundsCheck + /// CHECK-START: void Main.doWhileDownOOB() BCE (after) + /// CHECK-DAG: BoundsCheck + private static void doWhileDownOOB() { + int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + int i = x.length - 1; + do { + sResult += x[i--]; + } while (-1 <= i); + } + // // Verifier. // @@ -274,10 +556,18 @@ public class Main { expectEquals(55, linearDown(x)); expectEquals(0, linearObscure(empty)); expectEquals(55, linearObscure(x)); + expectEquals(0, linearVeryObscure(empty)); + expectEquals(55, linearVeryObscure(x)); expectEquals(0, linearWhile(empty)); expectEquals(55, linearWhile(x)); + expectEquals(0, linearThreeWayPhi(empty)); + expectEquals(50, linearThreeWayPhi(x)); + expectEquals(0, linearFourWayPhi(empty)); + expectEquals(51, linearFourWayPhi(x)); expectEquals(0, wrapAroundThenLinear(empty)); expectEquals(55, wrapAroundThenLinear(x)); + expectEquals(0, wrapAroundThenLinearThreeWayPhi(empty)); + expectEquals(54, wrapAroundThenLinearThreeWayPhi(x)); // Linear with parameter. sResult = 0; @@ -295,6 +585,16 @@ public class Main { } } + // Linear copy. + expectEquals(0, linearCopy(empty).length); + { + int[] r = linearCopy(x); + expectEquals(x.length, r.length); + for (int i = 0; i < x.length; i++) { + expectEquals(x[i], r[i]); + } + } + // Linear with non-unit strides. expectEquals(56, linearWithCompoundStride()); expectEquals(66, linearWithLargePositiveStride()); @@ -302,6 +602,13 @@ public class Main { expectEquals(66, linearWithLargeNegativeStride()); expectEquals(66, linearWithVeryLargeNegativeStride()); + // Special forms. + expectEquals(55, linearForNEUp()); + expectEquals(55, linearForNEDown()); + expectEquals(55, linearDoWhileUp()); + expectEquals(55, linearDoWhileDown()); + expectEquals(55, linearShort()); + // Periodic adds (1, 3), one at the time. expectEquals(0, periodicIdiom(-1)); for (int tc = 0; tc < 32; tc++) { @@ -326,6 +633,28 @@ public class Main { expectEquals(tc * 16, periodicSequence4(tc)); } + // Large bounds. + expectEquals(55, justRightUp1()); + expectEquals(55, justRightUp2()); + expectEquals(55, justRightUp3()); + expectEquals(55, justRightDown1()); + expectEquals(55, justRightDown2()); + expectEquals(55, justRightDown3()); + sResult = 0; + try { + justOOBUp(); + } catch (ArrayIndexOutOfBoundsException e) { + sResult = 1; + } + expectEquals(1, sResult); + sResult = 0; + try { + justOOBDown(); + } catch (ArrayIndexOutOfBoundsException e) { + sResult = 1; + } + expectEquals(1, sResult); + // Lower bound goes OOB. sResult = 0; try { @@ -344,6 +673,23 @@ public class Main { } expectEquals(1055, sResult); + // Do while up goes OOB. + sResult = 0; + try { + doWhileUpOOB(); + } catch (ArrayIndexOutOfBoundsException e) { + sResult += 1000; + } + expectEquals(1055, sResult); + + // Do while down goes OOB. + sResult = 0; + try { + doWhileDownOOB(); + } catch (ArrayIndexOutOfBoundsException e) { + sResult += 1000; + } + expectEquals(1055, sResult); } private static void expectEquals(int expected, int result) { diff --git a/test/531-regression-debugphi/expected.txt b/test/531-regression-debugphi/expected.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/test/531-regression-debugphi/expected.txt diff --git a/test/531-regression-debugphi/info.txt b/test/531-regression-debugphi/info.txt new file mode 100644 index 0000000000..08726421e6 --- /dev/null +++ b/test/531-regression-debugphi/info.txt @@ -0,0 +1,2 @@ +Test a regression where DeadPhiHandling would produce two equivalent phis of +the same type, prohibited by SSAChecker.
\ No newline at end of file diff --git a/test/531-regression-debugphi/smali/TestCase.smali b/test/531-regression-debugphi/smali/TestCase.smali new file mode 100644 index 0000000000..fe4fd71972 --- /dev/null +++ b/test/531-regression-debugphi/smali/TestCase.smali @@ -0,0 +1,121 @@ +# Copyright (C) 2015 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +.class public LTestCase; +.super Ljava/lang/Object; + +# Situation: +# - PhiA: PrimVoid + PrimNot equivalents +# - PhiB: PrimVoid (PrimVoid PhiA as input) +# DeadPhiHandling: +# - iterate over blocks in reverse post order +# - update PrimVoid PhiA to PrimNot +# - update inputs of PrimNot PhiA +# - set type of PhiB +# - left with two PrimNot equivalents of PhiA + +.method public static testCase_ReversePostOrder(IILjava/lang/Object;)V + .registers 5 + + # v0 - Phi A + # v1 - Phi B + # p0 - int arg1 + # p1 - int arg2 + # p2 - ref arg3 + + if-nez p0, :else1 + :then1 + if-nez p1, :else2 + :then2 + const/4 v1, 0x0 + goto :merge2 + + :else2 + move-object v1, p2 + goto :merge2 + + :merge2 + # PhiA [null, arg3] + move-object v0, v1 # create PrimNot PhiA equivalent + invoke-static {}, Ljava/lang/System;->nanoTime()J # env use of both PhiA equivalents + goto :merge1 + + :else1 + move-object v0, p2 + goto :merge1 + + :merge1 + # PhiB [PhiA, arg3] + invoke-static {}, Ljava/lang/System;->nanoTime()J # env use of PhiB + + return-void +.end method + +# Situation: +# - PhiA: PrimVoid + PrimNot (PrimInt inputs) +# - PhiB: PrimVoid + PrimNot (PrimInt inputs) +# - PhiC: PrimVoid only +# DeadPhiHandling: +# - iterate over blocks in reverse post order +# - add both PhiAs to worklist, set PrimVoid PhiA to PrimInt +# - update inputs of PrimNot PhiB ; add PrimNot PhiA to worklist +# - update PhiC to PrimNot +# - start processing worklist +# - PrimNot PhiA: update inputs, no equivalent created +# - PrimInt PhiA: update inputs, set to PrimNot, use instead of PrimNot PhiA +# - add PhiBs to worklist as users of PhiA +# - PrimInt PhiB: set type to PrimNot, equivalent live and in worklist + +.method public static testCase_FixPointIteration(IILjava/lang/Object;Ljava/lang/Object;)V + .registers 6 + + # v0 - Phi A, C + # v1 - Phi B + # p0 - int arg1 + # p1 - int arg2 + # p2 - ref arg3 + # p3 - ref arg4 + + const/4 v0, 0x0 + + :loop_header + # PhiA [null, PhiC] for v0 + + if-eqz p0, :else1 + :then1 + const/4 v1, 0x0 + goto :merge1 + :else1 + move-object v1, v0 # create PrimNot equivalent of PhiA + invoke-static {}, Ljava/lang/System;->nanoTime()J # env use of both PhiA equivalents + goto :merge1 + :merge1 + # PhiB [null, PhiA] for v1 + + move-object v0, v1 # creates PrimNot equivalent of PhiB + invoke-static {}, Ljava/lang/System;->nanoTime()J # env use of both PhiB equivalents + + if-eqz p1, :else2 + :then2 + move-object v0, p2 + goto :merge2 + :else2 + move-object v0, p3 + goto :merge2 + :merge2 + # PhiC [arg3, arg4] for v0, second input of PhiA + + if-eqz p1, :loop_header + return-void +.end method diff --git a/test/531-regression-debugphi/src/Main.java b/test/531-regression-debugphi/src/Main.java new file mode 100644 index 0000000000..858770f508 --- /dev/null +++ b/test/531-regression-debugphi/src/Main.java @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class Main { + // Workaround for b/18051191. + class InnerClass {} + + public static void main(String[] args) {} +} diff --git a/test/532-checker-nonnull-arrayset/expected.txt b/test/532-checker-nonnull-arrayset/expected.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/test/532-checker-nonnull-arrayset/expected.txt diff --git a/test/532-checker-nonnull-arrayset/info.txt b/test/532-checker-nonnull-arrayset/info.txt new file mode 100644 index 0000000000..e1578c8f14 --- /dev/null +++ b/test/532-checker-nonnull-arrayset/info.txt @@ -0,0 +1 @@ +Test that we optimize ArraySet when the value is not null. diff --git a/test/532-checker-nonnull-arrayset/src/Main.java b/test/532-checker-nonnull-arrayset/src/Main.java new file mode 100644 index 0000000000..7d8fff46ba --- /dev/null +++ b/test/532-checker-nonnull-arrayset/src/Main.java @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class Main { + + // Check that we don't put a null check in the card marking code. + + /// CHECK-START: void Main.test() instruction_simplifier (before) + /// CHECK: ArraySet value_can_be_null:true + + /// CHECK-START: void Main.test() instruction_simplifier (after) + /// CHECK: ArraySet value_can_be_null:false + + /// CHECK-START-X86: void Main.test() disassembly (after) + /// CHECK: ArraySet value_can_be_null:false + /// CHECK-NOT: test + /// CHECK: ReturnVoid + public static void test() { + Object[] array = new Object[1]; + Object nonNull = array[0]; + nonNull.getClass(); // Ensure nonNull has an implicit null check. + array[0] = nonNull; + } + + public static void main(String[] args) {} +} diff --git a/test/533-regression-debugphi/expected.txt b/test/533-regression-debugphi/expected.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/test/533-regression-debugphi/expected.txt diff --git a/test/533-regression-debugphi/info.txt b/test/533-regression-debugphi/info.txt new file mode 100644 index 0000000000..a4d4857035 --- /dev/null +++ b/test/533-regression-debugphi/info.txt @@ -0,0 +1,2 @@ +Test a regression where DeadPhiHandling would infinitely loop over +complicated phi dependencies. diff --git a/test/533-regression-debugphi/smali/TestCase.smali b/test/533-regression-debugphi/smali/TestCase.smali new file mode 100644 index 0000000000..1908e72c57 --- /dev/null +++ b/test/533-regression-debugphi/smali/TestCase.smali @@ -0,0 +1,72 @@ +# Copyright (C) 2015 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +.class public LTestCase; +.super Ljava/lang/Object; + +# This is a reduced test case that used to trigger an infinite loop +# in the DeadPhiHandling phase of the optimizing compiler (only used +# with debuggable flag). +.method public static testCase(IILjava/lang/Object;)V + .registers 5 + const/4 v0, 0x0 + + :B4 + invoke-static {}, Ljava/lang/System;->nanoTime()J + goto :B7 + + :B7 + invoke-static {}, Ljava/lang/System;->nanoTime()J + if-nez p2, :Btmp + goto :B111 + + :Btmp + invoke-static {}, Ljava/lang/System;->nanoTime()J + if-nez p2, :B9 + goto :B110 + + :B13 + invoke-static {}, Ljava/lang/System;->nanoTime()J + add-int v0, p0, p1 + goto :B7 + + :B110 + invoke-static {}, Ljava/lang/System;->nanoTime()J + add-int v0, p0, p1 + goto :B111 + + :B111 + invoke-static {}, Ljava/lang/System;->nanoTime()J + goto :B4 + + :B9 + invoke-static {}, Ljava/lang/System;->nanoTime()J + if-nez p2, :B10 + + :B11 + invoke-static {}, Ljava/lang/System;->nanoTime()J + move v1, v0 + goto :B12 + + :B10 + invoke-static {}, Ljava/lang/System;->nanoTime()J + move-object v1, p2 + goto :B12 + + :B12 + invoke-static {}, Ljava/lang/System;->nanoTime()J + goto :B13 + + return-void +.end method diff --git a/test/533-regression-debugphi/src/Main.java b/test/533-regression-debugphi/src/Main.java new file mode 100644 index 0000000000..858770f508 --- /dev/null +++ b/test/533-regression-debugphi/src/Main.java @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class Main { + // Workaround for b/18051191. + class InnerClass {} + + public static void main(String[] args) {} +} diff --git a/test/534-checker-bce-deoptimization/expected.txt b/test/534-checker-bce-deoptimization/expected.txt new file mode 100644 index 0000000000..3823a29f3f --- /dev/null +++ b/test/534-checker-bce-deoptimization/expected.txt @@ -0,0 +1 @@ +finish diff --git a/test/534-checker-bce-deoptimization/info.txt b/test/534-checker-bce-deoptimization/info.txt new file mode 100644 index 0000000000..9f097d0ac7 --- /dev/null +++ b/test/534-checker-bce-deoptimization/info.txt @@ -0,0 +1,8 @@ +Checker test for testing the behavior of deoptimization generated by +bounds check elimination. + +The runtime used to trip on that test because it used to deopt the +whole stack, and the compiler was not preserving dex registers at +call sites. + +We fixed the bug by doing single frame deoptimization. diff --git a/test/534-checker-bce-deoptimization/src/Main.java b/test/534-checker-bce-deoptimization/src/Main.java new file mode 100644 index 0000000000..8cd20f677a --- /dev/null +++ b/test/534-checker-bce-deoptimization/src/Main.java @@ -0,0 +1,57 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class Main { + public static void main(String[] args) { + new Main().run(); + System.out.println("finish"); + } + + public void run() { + double a[][] = new double[200][201]; + double b[] = new double[200]; + int n = 100; + + foo1(a, n, b); + } + + void foo1(double a[][], int n, double b[]) { + double t; + int i,k; + + for (i = 0; i < n; i++) { + k = n - (i + 1); + b[k] /= a[k][k]; + t = -b[k]; + foo2(k + 1000, t, b); + } + } + + void foo2(int n, double c, double b[]) { + try { + foo3(n, c, b); + } catch (Exception e) { + } + } + + void foo3(int n, double c, double b[]) { + int i = 0; + for (i = 0; i < n; i++) { + b[i + 1] += c * b[i + 1]; + } + } +} + diff --git a/test/800-smali/expected.txt b/test/800-smali/expected.txt index 6568eac29f..17c1f00c41 100644 --- a/test/800-smali/expected.txt +++ b/test/800-smali/expected.txt @@ -1,4 +1,6 @@ PackedSwitch +PackedSwitch key INT_MAX +PackedSwitch key overflow b/17790197 FloatBadArgReg negLong diff --git a/test/800-smali/smali/PackedSwitch.smali b/test/800-smali/smali/PackedSwitch.smali index 6a3e5f00ba..95659fb16f 100644 --- a/test/800-smali/smali/PackedSwitch.smali +++ b/test/800-smali/smali/PackedSwitch.smali @@ -24,3 +24,29 @@ goto :return .end method + +.method public static packedSwitch_INT_MAX(I)I + .registers 2 + + const/4 v0, 0 + packed-switch v0, :switch_data + goto :default + + :switch_data + .packed-switch 0x7FFFFFFE + :case1 # key = INT_MAX - 1 + :case2 # key = INT_MAX + .end packed-switch + + :return + return v1 + + :default + goto :return + + :case1 + goto :return + :case2 + goto :return + +.end method diff --git a/test/800-smali/smali/b_24399945.smali b/test/800-smali/smali/b_24399945.smali new file mode 100644 index 0000000000..68f59d0387 --- /dev/null +++ b/test/800-smali/smali/b_24399945.smali @@ -0,0 +1,32 @@ +.class public Lb_24399945; + +.super Ljava/lang/Object; + +.method public static packedSwitch_overflow(I)I + .registers 2 + + const/4 v0, 0 + packed-switch v0, :switch_data + goto :default + + :switch_data + .packed-switch 0x7FFFFFFE + :case1 # key = INT_MAX - 1 + :case2 # key = INT_MAX + :case3 # key = INT_MIN (overflow!) + .end packed-switch + + :return + return v1 + + :default + goto :return + + :case1 + goto :return + :case2 + goto :return + :case3 + goto :return + +.end method diff --git a/test/800-smali/src/Main.java b/test/800-smali/src/Main.java index ba4990a76e..f75747d5c5 100644 --- a/test/800-smali/src/Main.java +++ b/test/800-smali/src/Main.java @@ -51,6 +51,10 @@ public class Main { testCases = new LinkedList<TestCase>(); testCases.add(new TestCase("PackedSwitch", "PackedSwitch", "packedSwitch", new Object[]{123}, null, 123)); + testCases.add(new TestCase("PackedSwitch key INT_MAX", "PackedSwitch", + "packedSwitch_INT_MAX", new Object[]{123}, null, 123)); + testCases.add(new TestCase("PackedSwitch key overflow", "b_24399945", + "packedSwitch_overflow", new Object[]{123}, new VerifyError(), null)); testCases.add(new TestCase("b/17790197", "B17790197", "getInt", null, null, 100)); testCases.add(new TestCase("FloatBadArgReg", "FloatBadArgReg", "getInt", diff --git a/test/955-lambda-smali/expected.txt b/test/955-lambda-smali/expected.txt index 36370998f4..16381e4b46 100644 --- a/test/955-lambda-smali/expected.txt +++ b/test/955-lambda-smali/expected.txt @@ -16,3 +16,13 @@ Caught NPE (MoveResult) testF success (MoveResult) testD success (MoveResult) testL success +(CaptureVariables) (0-args, 1 captured variable 'Z'): value is true +(CaptureVariables) (0-args, 1 captured variable 'B'): value is R +(CaptureVariables) (0-args, 1 captured variable 'C'): value is ∂ +(CaptureVariables) (0-args, 1 captured variable 'S'): value is 1000 +(CaptureVariables) (0-args, 1 captured variable 'I'): value is 12345678 +(CaptureVariables) (0-args, 1 captured variable 'J'): value is 3287471278325742 +(CaptureVariables) (0-args, 1 captured variable 'F'): value is Infinity +(CaptureVariables) (0-args, 1 captured variable 'D'): value is -Infinity +(CaptureVariables) (0-args, 8 captured variable 'ZBCSIJFD'): value is true,R,∂,1000,12345678,3287471278325742,Infinity,-Infinity +(CaptureVariables) Caught NPE diff --git a/test/955-lambda-smali/smali/BoxUnbox.smali b/test/955-lambda-smali/smali/BoxUnbox.smali index 108b5fafbc..915de2d55d 100644 --- a/test/955-lambda-smali/smali/BoxUnbox.smali +++ b/test/955-lambda-smali/smali/BoxUnbox.smali @@ -1,4 +1,3 @@ -# # Copyright (C) 2015 The Android Open Source Project # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -36,8 +35,8 @@ .end method #TODO: should use a closure type instead of ArtMethod. -.method public static doHelloWorld(Ljava/lang/reflect/ArtMethod;)V - .registers 3 # 1 parameters, 2 locals +.method public static doHelloWorld(J)V + .registers 4 # 1 wide parameters, 2 locals const-string v0, "(BoxUnbox) Hello boxing world! (0-args, no closure)" @@ -51,9 +50,9 @@ .method private static testBox()V .registers 3 - create-lambda v0, LBoxUnbox;->doHelloWorld(Ljava/lang/reflect/ArtMethod;)V + create-lambda v0, LBoxUnbox;->doHelloWorld(J)V box-lambda v2, v0 # v2 = box(v0) - unbox-lambda v0, v2, Ljava/lang/reflect/ArtMethod; # v0 = unbox(v2) + unbox-lambda v0, v2, J # v0 = unbox(v2) invoke-lambda v0, {} return-void @@ -63,7 +62,7 @@ .method private static testBoxEquality()V .registers 6 # 0 parameters, 6 locals - create-lambda v0, LBoxUnbox;->doHelloWorld(Ljava/lang/reflect/ArtMethod;)V + create-lambda v0, LBoxUnbox;->doHelloWorld(J)V box-lambda v2, v0 # v2 = box(v0) box-lambda v3, v0 # v3 = box(v0) @@ -95,7 +94,7 @@ const v0, 0 # v0 = null const v1, 0 # v1 = null :start - unbox-lambda v2, v0, Ljava/lang/reflect/ArtMethod; + unbox-lambda v2, v0, J # attempting to unbox a null lambda will throw NPE :end return-void @@ -140,7 +139,7 @@ const-string v0, "This is not a boxed lambda" :start # TODO: use \FunctionalType; here instead - unbox-lambda v2, v0, Ljava/lang/reflect/ArtMethod; + unbox-lambda v2, v0, J # can't use a string, expects a lambda object here. throws ClassCastException. :end return-void diff --git a/test/955-lambda-smali/smali/CaptureVariables.smali b/test/955-lambda-smali/smali/CaptureVariables.smali new file mode 100644 index 0000000000..f18b7ff741 --- /dev/null +++ b/test/955-lambda-smali/smali/CaptureVariables.smali @@ -0,0 +1,311 @@ +# +# Copyright (C) 2015 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +.class public LCaptureVariables; +.super Ljava/lang/Object; + +.method public constructor <init>()V +.registers 1 + invoke-direct {p0}, Ljava/lang/Object;-><init>()V + return-void +.end method + +.method public static run()V +.registers 8 + # Test boolean capture + const v2, 1 # v2 = true + capture-variable v2, "Z" + create-lambda v0, LCaptureVariables;->printCapturedVariable_Z(J)V + # TODO: create-lambda should not write to both v0 and v1 + invoke-lambda v0, {} + + # Test byte capture + const v2, 82 # v2 = 82, 'R' + capture-variable v2, "B" + create-lambda v0, LCaptureVariables;->printCapturedVariable_B(J)V + # TODO: create-lambda should not write to both v0 and v1 + invoke-lambda v0, {} + + # Test char capture + const v2, 0x2202 # v2 = 0x2202, '∂' + capture-variable v2, "C" + create-lambda v0, LCaptureVariables;->printCapturedVariable_C(J)V + # TODO: create-lambda should not write to both v0 and v1 + invoke-lambda v0, {} + + # Test short capture + const v2, 1000 # v2 = 1000 + capture-variable v2, "S" + create-lambda v0, LCaptureVariables;->printCapturedVariable_S(J)V + # TODO: create-lambda should not write to both v0 and v1 + invoke-lambda v0, {} + + # Test int capture + const v2, 12345678 + capture-variable v2, "I" + create-lambda v0, LCaptureVariables;->printCapturedVariable_I(J)V + # TODO: create-lambda should not write to both v0 and v1 + invoke-lambda v0, {} + + # Test long capture + const-wide v2, 0x0badf00dc0ffeeL # v2 = 3287471278325742 + capture-variable v2, "J" + create-lambda v0, LCaptureVariables;->printCapturedVariable_J(J)V + # TODO: create-lambda should not write to both v0 and v1 + invoke-lambda v0, {} + + # Test float capture + const v2, infinityf + capture-variable v2, "F" + create-lambda v0, LCaptureVariables;->printCapturedVariable_F(J)V + # TODO: create-lambda should not write to both v0 and v1 + invoke-lambda v0, {} + + # Test double capture + const-wide v2, -infinity + capture-variable v2, "D" + create-lambda v0, LCaptureVariables;->printCapturedVariable_D(J)V + # TODO: create-lambda should not write to both v0 and v1 + invoke-lambda v0, {} + + #TODO: capture objects and lambdas once we have support for it + + # Test capturing multiple variables + invoke-static {}, LCaptureVariables;->testMultipleCaptures()V + + # Test failures + invoke-static {}, LCaptureVariables;->testFailures()V + + return-void +.end method + +#TODO: should use a closure type instead of a long +.method public static printCapturedVariable_Z(J)V + .registers 5 # 1 wide parameter, 3 locals + + const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'Z'): value is " + + sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream; + invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V + + liberate-variable v2, p0, "Z" + invoke-virtual {v1, v2}, Ljava/io/PrintStream;->println(Z)V + + return-void +.end method + +#TODO: should use a closure type instead of a long +.method public static printCapturedVariable_B(J)V + .registers 5 # 1 wide parameter, 3 locals + + const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'B'): value is " + + sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream; + invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V + + liberate-variable v2, p0, "B" + invoke-virtual {v1, v2}, Ljava/io/PrintStream;->println(C)V # no println(B), use char instead. + + return-void +.end method + +#TODO: should use a closure type instead of a long +.method public static printCapturedVariable_C(J)V + .registers 5 # 1 wide parameter, 3 locals + + const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'C'): value is " + + sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream; + invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V + + liberate-variable v2, p0, "C" + invoke-virtual {v1, v2}, Ljava/io/PrintStream;->println(C)V + + return-void +.end method + +#TODO: should use a closure type instead of a long +.method public static printCapturedVariable_S(J)V + .registers 5 # 1 wide parameter, 3 locals + + const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'S'): value is " + + sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream; + invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V + + liberate-variable v2, p0, "S" + invoke-virtual {v1, v2}, Ljava/io/PrintStream;->println(I)V # no println(S), use int instead + + return-void +.end method + +#TODO: should use a closure type instead of a long +.method public static printCapturedVariable_I(J)V + .registers 5 # 1 wide parameter, 3 locals + + const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'I'): value is " + + sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream; + invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V + + liberate-variable v2, p0, "I" + invoke-virtual {v1, v2}, Ljava/io/PrintStream;->println(I)V + + return-void +.end method + +#TODO: should use a closure type instead of a long +.method public static printCapturedVariable_J(J)V + .registers 6 # 1 wide parameter, 4 locals + + const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'J'): value is " + + sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream; + invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V + + liberate-variable v2, p0, "J" + invoke-virtual {v1, v2, v3}, Ljava/io/PrintStream;->println(J)V + + return-void +.end method + +#TODO: should use a closure type instead of a long +.method public static printCapturedVariable_F(J)V + .registers 5 # 1 parameter, 4 locals + + const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'F'): value is " + + sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream; + invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V + + liberate-variable v2, p0, "F" + invoke-virtual {v1, v2}, Ljava/io/PrintStream;->println(F)V + + return-void +.end method + +#TODO: should use a closure type instead of a long +.method public static printCapturedVariable_D(J)V + .registers 6 # 1 wide parameter, 4 locals + + const-string v0, "(CaptureVariables) (0-args, 1 captured variable 'D'): value is " + + sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream; + invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V + + liberate-variable v2, p0, "D" + invoke-virtual {v1, v2, v3}, Ljava/io/PrintStream;->println(D)V + + return-void +.end method + +# Test capturing more than one variable. +.method private static testMultipleCaptures()V + .registers 4 # 0 parameters, 4 locals + + const v2, 1 # v2 = true + capture-variable v2, "Z" + + const v2, 82 # v2 = 82, 'R' + capture-variable v2, "B" + + const v2, 0x2202 # v2 = 0x2202, '∂' + capture-variable v2, "C" + + const v2, 1000 # v2 = 1000 + capture-variable v2, "S" + + const v2, 12345678 + capture-variable v2, "I" + + const-wide v2, 0x0badf00dc0ffeeL # v2 = 3287471278325742 + capture-variable v2, "J" + + const v2, infinityf + capture-variable v2, "F" + + const-wide v2, -infinity + capture-variable v2, "D" + + create-lambda v0, LCaptureVariables;->printCapturedVariable_ZBCSIJFD(J)V + # TODO: create-lambda should not write to both v0 and v1 + invoke-lambda v0, {} + +.end method + +#TODO: should use a closure type instead of a long +.method public static printCapturedVariable_ZBCSIJFD(J)V + .registers 7 # 1 wide parameter, 5 locals + + const-string v0, "(CaptureVariables) (0-args, 8 captured variable 'ZBCSIJFD'): value is " + const-string v4, "," + + sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream; + invoke-virtual {v1, v0}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V + + liberate-variable v2, p0, "Z" + invoke-virtual {v1, v2}, Ljava/io/PrintStream;->print(Z)V + invoke-virtual {v1, v4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V + + liberate-variable v2, p0, "B" + invoke-virtual {v1, v2}, Ljava/io/PrintStream;->print(C)V + invoke-virtual {v1, v4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V + + liberate-variable v2, p0, "C" + invoke-virtual {v1, v2}, Ljava/io/PrintStream;->print(C)V + invoke-virtual {v1, v4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V + + liberate-variable v2, p0, "S" + invoke-virtual {v1, v2}, Ljava/io/PrintStream;->print(I)V + invoke-virtual {v1, v4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V + + liberate-variable v2, p0, "I" + invoke-virtual {v1, v2}, Ljava/io/PrintStream;->print(I)V + invoke-virtual {v1, v4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V + + liberate-variable v2, p0, "J" + invoke-virtual {v1, v2, v3}, Ljava/io/PrintStream;->print(J)V + invoke-virtual {v1, v4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V + + liberate-variable v2, p0, "F" + invoke-virtual {v1, v2}, Ljava/io/PrintStream;->print(F)V + invoke-virtual {v1, v4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V + + liberate-variable v2, p0, "D" + invoke-virtual {v1, v2, v3}, Ljava/io/PrintStream;->println(D)V + + return-void +.end method + +# Test exceptions are thrown as expected when used opcodes incorrectly +.method private static testFailures()V + .registers 4 # 0 parameters, 4 locals + + const v0, 0 # v0 = null + const v1, 0 # v1 = null +:start + liberate-variable v0, v2, "Z" # invoking a null lambda shall raise an NPE +:end + return-void + +:handler + const-string v2, "(CaptureVariables) Caught NPE" + sget-object v3, Ljava/lang/System;->out:Ljava/io/PrintStream; + invoke-virtual {v3, v2}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V + + return-void + + .catch Ljava/lang/NullPointerException; {:start .. :end} :handler +.end method diff --git a/test/955-lambda-smali/smali/Main.smali b/test/955-lambda-smali/smali/Main.smali index 5d2aabb386..9892d6124e 100644 --- a/test/955-lambda-smali/smali/Main.smali +++ b/test/955-lambda-smali/smali/Main.smali @@ -24,6 +24,7 @@ invoke-static {}, LTrivialHelloWorld;->run()V invoke-static {}, LBoxUnbox;->run()V invoke-static {}, LMoveResult;->run()V + invoke-static {}, LCaptureVariables;->run()V # TODO: add tests when verification fails diff --git a/test/955-lambda-smali/smali/MoveResult.smali b/test/955-lambda-smali/smali/MoveResult.smali index 1725da3044..52f7ba363b 100644 --- a/test/955-lambda-smali/smali/MoveResult.smali +++ b/test/955-lambda-smali/smali/MoveResult.smali @@ -41,7 +41,7 @@ .method public static testZ()V .registers 6 - create-lambda v0, LMoveResult;->lambdaZ(Ljava/lang/reflect/ArtMethod;)Z + create-lambda v0, LMoveResult;->lambdaZ(J)Z invoke-lambda v0, {} move-result v2 const v3, 1 @@ -61,7 +61,7 @@ .end method # Lambda target for testZ. Always returns "true". -.method public static lambdaZ(Ljava/lang/reflect/ArtMethod;)Z +.method public static lambdaZ(J)Z .registers 3 const v0, 1 @@ -73,7 +73,7 @@ .method public static testB()V .registers 6 - create-lambda v0, LMoveResult;->lambdaB(Ljava/lang/reflect/ArtMethod;)B + create-lambda v0, LMoveResult;->lambdaB(J)B invoke-lambda v0, {} move-result v2 const v3, 15 @@ -93,7 +93,7 @@ .end method # Lambda target for testB. Always returns "15". -.method public static lambdaB(Ljava/lang/reflect/ArtMethod;)B +.method public static lambdaB(J)B .registers 3 # 1 parameters, 2 locals const v0, 15 @@ -105,7 +105,7 @@ .method public static testS()V .registers 6 - create-lambda v0, LMoveResult;->lambdaS(Ljava/lang/reflect/ArtMethod;)S + create-lambda v0, LMoveResult;->lambdaS(J)S invoke-lambda v0, {} move-result v2 const/16 v3, 31000 @@ -125,7 +125,7 @@ .end method # Lambda target for testS. Always returns "31000". -.method public static lambdaS(Ljava/lang/reflect/ArtMethod;)S +.method public static lambdaS(J)S .registers 3 const/16 v0, 31000 @@ -137,7 +137,7 @@ .method public static testI()V .registers 6 - create-lambda v0, LMoveResult;->lambdaI(Ljava/lang/reflect/ArtMethod;)I + create-lambda v0, LMoveResult;->lambdaI(J)I invoke-lambda v0, {} move-result v2 const v3, 128000 @@ -157,7 +157,7 @@ .end method # Lambda target for testI. Always returns "128000". -.method public static lambdaI(Ljava/lang/reflect/ArtMethod;)I +.method public static lambdaI(J)I .registers 3 const v0, 128000 @@ -167,9 +167,9 @@ # Test that chars are returned correctly via move-result. .method public static testC()V - .registers 6 + .registers 7 - create-lambda v0, LMoveResult;->lambdaC(Ljava/lang/reflect/ArtMethod;)C + create-lambda v0, LMoveResult;->lambdaC(J)C invoke-lambda v0, {} move-result v2 const v3, 65535 @@ -189,7 +189,7 @@ .end method # Lambda target for testC. Always returns "65535". -.method public static lambdaC(Ljava/lang/reflect/ArtMethod;)C +.method public static lambdaC(J)C .registers 3 const v0, 65535 @@ -199,12 +199,12 @@ # Test that longs are returned correctly via move-result. .method public static testJ()V - .registers 8 + .registers 9 - create-lambda v0, LMoveResult;->lambdaJ(Ljava/lang/reflect/ArtMethod;)J + create-lambda v0, LMoveResult;->lambdaJ(J)J invoke-lambda v0, {} move-result v2 - const-wide v4, 0xdeadf00dc0ffee + const-wide v4, 0xdeadf00dc0ffeeL if-ne v4, v2, :is_not_equal const-string v6, "(MoveResult) testJ success" @@ -220,11 +220,11 @@ .end method -# Lambda target for testC. Always returns "0xdeadf00dc0ffee". -.method public static lambdaJ(Ljava/lang/reflect/ArtMethod;)J - .registers 4 +# Lambda target for testC. Always returns "0xdeadf00dc0ffeeL". +.method public static lambdaJ(J)J + .registers 5 - const-wide v0, 0xdeadf00dc0ffee + const-wide v0, 0xdeadf00dc0ffeeL return-wide v0 .end method @@ -233,7 +233,7 @@ .method public static testF()V .registers 6 - create-lambda v0, LMoveResult;->lambdaF(Ljava/lang/reflect/ArtMethod;)F + create-lambda v0, LMoveResult;->lambdaF(J)F invoke-lambda v0, {} move-result v2 const v3, infinityf @@ -253,8 +253,8 @@ .end method # Lambda target for testF. Always returns "infinityf". -.method public static lambdaF(Ljava/lang/reflect/ArtMethod;)F - .registers 3 +.method public static lambdaF(J)F + .registers 4 const v0, infinityf return v0 @@ -265,10 +265,10 @@ .method public static testD()V .registers 8 - create-lambda v0, LMoveResult;->lambdaD(Ljava/lang/reflect/ArtMethod;)D + create-lambda v0, LMoveResult;->lambdaD(J)D invoke-lambda v0, {} move-result-wide v2 - const-wide v4, infinity + const-wide v4, -infinity if-ne v4, v2, :is_not_equal const-string v6, "(MoveResult) testD success" @@ -285,10 +285,10 @@ .end method # Lambda target for testD. Always returns "infinity". -.method public static lambdaD(Ljava/lang/reflect/ArtMethod;)D - .registers 4 +.method public static lambdaD(J)D + .registers 5 - const-wide v0, infinity # 123.456789 + const-wide v0, -infinity return-wide v0 .end method @@ -298,7 +298,7 @@ .method public static testL()V .registers 8 - create-lambda v0, LMoveResult;->lambdaL(Ljava/lang/reflect/ArtMethod;)Ljava/lang/String; + create-lambda v0, LMoveResult;->lambdaL(J)Ljava/lang/String; invoke-lambda v0, {} move-result-object v2 const-string v4, "Interned string" @@ -319,8 +319,8 @@ .end method # Lambda target for testL. Always returns "Interned string" (string). -.method public static lambdaL(Ljava/lang/reflect/ArtMethod;)Ljava/lang/String; - .registers 4 +.method public static lambdaL(J)Ljava/lang/String; + .registers 5 const-string v0, "Interned string" return-object v0 diff --git a/test/955-lambda-smali/smali/TrivialHelloWorld.smali b/test/955-lambda-smali/smali/TrivialHelloWorld.smali index 38ee95ac7e..3444b13a65 100644 --- a/test/955-lambda-smali/smali/TrivialHelloWorld.smali +++ b/test/955-lambda-smali/smali/TrivialHelloWorld.smali @@ -25,12 +25,12 @@ .method public static run()V .registers 8 # Trivial 0-arg hello world - create-lambda v0, LTrivialHelloWorld;->doHelloWorld(Ljava/lang/reflect/ArtMethod;)V + create-lambda v0, LTrivialHelloWorld;->doHelloWorld(J)V # TODO: create-lambda should not write to both v0 and v1 invoke-lambda v0, {} # Slightly more interesting 4-arg hello world - create-lambda v2, doHelloWorldArgs(Ljava/lang/reflect/ArtMethod;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)V + create-lambda v2, doHelloWorldArgs(JLjava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)V # TODO: create-lambda should not write to both v2 and v3 const-string v4, "A" const-string v5, "B" @@ -43,9 +43,9 @@ return-void .end method -#TODO: should use a closure type instead of ArtMethod. -.method public static doHelloWorld(Ljava/lang/reflect/ArtMethod;)V - .registers 3 # 1 parameters, 2 locals +#TODO: should use a closure type instead of jlong. +.method public static doHelloWorld(J)V + .registers 5 # 1 wide parameters, 3 locals const-string v0, "Hello world! (0-args, no closure)" @@ -55,17 +55,17 @@ return-void .end method -#TODO: should use a closure type instead of ArtMethod. -.method public static doHelloWorldArgs(Ljava/lang/reflect/ArtMethod;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)V - .registers 7 # 5 parameters, 2 locals +#TODO: should use a closure type instead of jlong. +.method public static doHelloWorldArgs(JLjava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)V + .registers 9 # 1 wide parameter, 4 narrow parameters, 3 locals const-string v0, " Hello world! (4-args, no closure)" sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream; - invoke-virtual {v1, p1}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V invoke-virtual {v1, p2}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V invoke-virtual {v1, p3}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V invoke-virtual {v1, p4}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V + invoke-virtual {v1, p5}, Ljava/io/PrintStream;->print(Ljava/lang/String;)V invoke-virtual {v1, v0}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V diff --git a/test/Android.libarttest.mk b/test/Android.libarttest.mk index 7f05a043d8..e43ea90ba6 100644 --- a/test/Android.libarttest.mk +++ b/test/Android.libarttest.mk @@ -33,6 +33,7 @@ LIBARTTEST_COMMON_SRC_FILES := \ 1337-gc-coverage/gc_coverage.cc \ 137-cfi/cfi.cc \ 139-register-natives/regnative.cc \ + 141-class-unload/jni_unload.cc \ 454-get-vreg/get_vreg_jni.cc \ 455-set-vreg/set_vreg_jni.cc \ 457-regs/regs_jni.cc \ diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk index 439e42331c..e9153575f6 100644 --- a/test/Android.run-test.mk +++ b/test/Android.run-test.mk @@ -370,6 +370,7 @@ TEST_ART_BROKEN_FALLBACK_RUN_TESTS := # when already tracing, and writes an error message that we do not want to check for. TEST_ART_BROKEN_TRACING_RUN_TESTS := \ 137-cfi \ + 141-class-unload \ 802-deoptimization ifneq (,$(filter trace stream,$(TRACE_TYPES))) @@ -417,19 +418,70 @@ endif TEST_ART_BROKEN_DEFAULT_RUN_TESTS := -# Known broken tests for the arm64 optimizing compiler backend. -TEST_ART_BROKEN_OPTIMIZING_ARM64_RUN_TESTS := - -ifneq (,$(filter optimizing,$(COMPILER_TYPES))) - ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES), \ - optimizing,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \ - $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES),$(TEST_ART_BROKEN_OPTIMIZING_ARM64_RUN_TESTS),64) +# Known broken tests for the mips32 optimizing compiler backend. +TEST_ART_BROKEN_OPTIMIZING_MIPS_RUN_TESTS := \ + 441-checker-inliner \ + 442-checker-constant-folding \ + 444-checker-nce \ + 445-checker-licm \ + 446-checker-inliner2 \ + 447-checker-inliner3 \ + 449-checker-bce \ + 450-checker-types \ + 455-checker-gvn \ + 458-checker-instruction-simplification \ + 462-checker-inlining-across-dex-files \ + 463-checker-boolean-simplifier \ + 464-checker-inline-sharpen-calls \ + 465-checker-clinit-gvn \ + 468-checker-bool-simplifier-regression \ + 473-checker-inliner-constants \ + 474-checker-boolean-input \ + 476-checker-ctor-memory-barrier \ + 477-checker-bound-type \ + 478-checker-clinit-check-pruning \ + 478-checker-inliner-nested-loop \ + 480-checker-dead-blocks \ + 482-checker-loop-back-edge-use \ + 484-checker-register-hints \ + 485-checker-dce-loop-update \ + 485-checker-dce-switch \ + 486-checker-must-do-null-check \ + 487-checker-inline-calls \ + 488-checker-inline-recursive-calls \ + 490-checker-inline \ + 492-checker-inline-invoke-interface \ + 493-checker-inline-invoke-interface \ + 494-checker-instanceof-tests \ + 495-checker-checkcast-tests \ + 496-checker-inlining-and-class-loader \ + 508-checker-disassembly \ + 510-checker-try-catch \ + 517-checker-builder-fallthrough \ + 521-checker-array-set-null \ + 522-checker-regression-monitor-exit \ + 523-checker-can-throw-regression \ + 525-checker-arrays-and-fields \ + 526-checker-caller-callee-regs \ + 529-checker-unresolved \ + 530-checker-loops \ + 530-checker-regression-reftype-final \ + 532-checker-nonnull-arrayset \ + +ifeq (mips,$(TARGET_ARCH)) + ifneq (,$(filter optimizing,$(COMPILER_TYPES))) + ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES), \ + optimizing,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \ + $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \ + $(TEST_ART_BROKEN_OPTIMIZING_MIPS_RUN_TESTS),$(ALL_ADDRESS_SIZES)) + endif endif -TEST_ART_BROKEN_OPTIMIZING_ARM64_RUN_TESTS := +TEST_ART_BROKEN_OPTIMIZING_MIPS_RUN_TESTS := # Known broken tests for the optimizing compiler. -TEST_ART_BROKEN_OPTIMIZING_RUN_TESTS := +TEST_ART_BROKEN_OPTIMIZING_RUN_TESTS := \ + 455-set-vreg \ ifneq (,$(filter optimizing,$(COMPILER_TYPES))) ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \ @@ -450,7 +502,6 @@ TEST_ART_BROKEN_OPTIMIZING_RUN_TESTS := # Tests that should fail when the optimizing compiler compiles them non-debuggable. TEST_ART_BROKEN_OPTIMIZING_NONDEBUGGABLE_RUN_TESTS := \ 454-get-vreg \ - 455-set-vreg \ 457-regs \ ifneq (,$(filter optimizing,$(COMPILER_TYPES))) @@ -752,12 +803,7 @@ define define-test-art-run-test endif endif endif - ifeq ($(4),jit) - # Use interpreter image for JIT. - image_suffix := interpreter - else - image_suffix := $(4) - endif + image_suffix := $(4) ifeq ($(9),no-image) test_groups += ART_RUN_TEST_$$(uc_host_or_target)_NO_IMAGE_RULES run_test_options += --no-image diff --git a/test/run-test b/test/run-test index 73c92d4322..a5b6e92869 100755 --- a/test/run-test +++ b/test/run-test @@ -252,7 +252,7 @@ while true; do shift elif [ "x$1" = "x--jit" ]; then run_args="${run_args} --jit" - image_suffix="-interpreter" + image_suffix="-jit" shift elif [ "x$1" = "x--optimizing" ]; then run_args="${run_args} -Xcompiler-option --compiler-backend=Optimizing" @@ -392,7 +392,7 @@ fi # Most interesting target architecture variables are Makefile variables, not environment variables. # Try to map the suffix64 flag and what we find in ${ANDROID_PRODUCT_OUT}/data/art-test to an architecture name. -function guess_arch_name() { +function guess_target_arch_name() { grep32bit=`ls ${ANDROID_PRODUCT_OUT}/data/art-test | grep -E '^(arm|x86|mips)$'` grep64bit=`ls ${ANDROID_PRODUCT_OUT}/data/art-test | grep -E '^(arm64|x86_64|mips64)$'` if [ "x${suffix64}" = "x64" ]; then @@ -402,6 +402,14 @@ function guess_arch_name() { fi } +function guess_host_arch_name() { + if [ "x${suffix64}" = "x64" ]; then + host_arch_name="x86_64" + else + host_arch_name="x86" + fi +} + if [ "$target_mode" = "no" ]; then if [ "$runtime" = "jvm" ]; then if [ "$prebuild_mode" = "yes" ]; then @@ -437,10 +445,11 @@ elif [ "$runtime" = "art" ]; then if [ -z "$ANDROID_HOST_OUT" ]; then export ANDROID_HOST_OUT=$ANDROID_BUILD_TOP/out/host/linux-x86 fi + guess_host_arch_name run_args="${run_args} --boot ${ANDROID_HOST_OUT}/framework/core${image_suffix}${pic_image_suffix}.art" run_args="${run_args} --runtime-option -Djava.library.path=${ANDROID_HOST_OUT}/lib${suffix64}" else - guess_arch_name + guess_target_arch_name run_args="${run_args} --runtime-option -Djava.library.path=/data/art-test/${target_arch_name}" run_args="${run_args} --boot /data/art-test/core${image_suffix}${pic_image_suffix}.art" fi @@ -635,7 +644,7 @@ if [[ "$TEST_NAME" =~ ^[0-9]+-checker- ]]; then run_checker="yes" if [ "$target_mode" = "no" ]; then cfg_output_dir="$tmp_dir" - checker_arch_option= + checker_arch_option="--arch=${host_arch_name^^}" else cfg_output_dir="$DEX_LOCATION" checker_arch_option="--arch=${target_arch_name^^}" diff --git a/tools/buildbot-build.sh b/tools/buildbot-build.sh index c69b819d25..972e827667 100755 --- a/tools/buildbot-build.sh +++ b/tools/buildbot-build.sh @@ -19,7 +19,7 @@ if [ ! -d art ]; then exit 1 fi -common_targets="vogar vogar.jar core-tests apache-harmony-jdwp-tests-hostdex out/host/linux-x86/bin/adb jsr166-tests" +common_targets="vogar vogar.jar core-tests apache-harmony-jdwp-tests-hostdex jsr166-tests" android_root="/data/local/tmp/system" linker="linker" mode="target" @@ -68,18 +68,14 @@ if [[ $mode == "host" ]]; then echo "Executing $make_command" $make_command elif [[ $mode == "target" ]]; then + # Disable NINJA for building on target, it does not support setting environment variables + # within the make command. + env="$env USE_NINJA=false" + # Build extra tools that will be used by tests, so that + # they are compiled with our own linker. # We need to provide our own linker in case the linker on the device # is out of date. - env="TARGET_GLOBAL_LDFLAGS=-Wl,-dynamic-linker=$android_root/bin/$linker" - # gcc gives a linker error, so compile with clang. - # TODO: investigate and fix? - if [[ $TARGET_PRODUCT == "mips32r2_fp" ]]; then - env="$env USE_CLANG_PLATFORM_BUILD=true" - fi - # Use '-e' to force the override of TARGET_GLOBAL_LDFLAGS. - # Also, we build extra tools that will be used by tests, so that - # they are compiled with our own linker. - make_command="make -e $j_arg $showcommands build-art-target-tests $common_targets libjavacrypto libjavacoretests linker toybox toolbox sh" + make_command="make TARGET_LINKER=$android_root/bin/$linker $j_arg $showcommands build-art-target-tests $common_targets libjavacrypto libjavacoretests linker toybox toolbox sh out/host/linux-x86/bin/adb" echo "Executing env $env $make_command" env $env $make_command fi diff --git a/tools/run-libcore-tests.sh b/tools/run-libcore-tests.sh index 26d9ad7c37..80f7a3737f 100755 --- a/tools/run-libcore-tests.sh +++ b/tools/run-libcore-tests.sh @@ -86,12 +86,6 @@ while true; do # Remove the --debug from the arguments. vogar_args=${vogar_args/$1} vogar_args="$vogar_args --vm-arg -XXlib:libartd.so" - if [ "$emulator" = "no" ]; then - # Increase the timeout, as vogar cannot set individual test - # timeout when being asked to run packages, and some tests go above - # the default timeout. - vogar_args="$vogar_args --timeout 240" - fi shift elif [[ "$1" == "" ]]; then break @@ -100,10 +94,10 @@ while true; do fi done -if [ "$emulator" = "yes" ]; then - # Be very patient with the emulator. - vogar_args="$vogar_args --timeout 480" -fi +# Increase the timeout, as vogar cannot set individual test +# timeout when being asked to run packages, and some tests go above +# the default timeout. +vogar_args="$vogar_args --timeout 480" # Run the tests using vogar. echo "Running tests for the following test packages:" |