diff options
69 files changed, 2911 insertions, 754 deletions
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk index 424aa7a7eb..009933d2b7 100644 --- a/build/Android.gtest.mk +++ b/build/Android.gtest.mk @@ -318,6 +318,8 @@ COMPILER_GTEST_COMMON_SRC_FILES_arm64 := \ compiler/utils/arm64/managed_register_arm64_test.cc \ COMPILER_GTEST_COMMON_SRC_FILES_mips := \ + compiler/linker/mips/relative_patcher_mips_test.cc \ + compiler/linker/mips/relative_patcher_mips32r6_test.cc \ COMPILER_GTEST_COMMON_SRC_FILES_mips64 := \ diff --git a/compiler/Android.mk b/compiler/Android.mk index e9c22d2b0f..4ec7d721f3 100644 --- a/compiler/Android.mk +++ b/compiler/Android.mk @@ -113,8 +113,11 @@ LIBART_COMPILER_SRC_FILES_arm64 := \ LIBART_COMPILER_SRC_FILES_mips := \ jni/quick/mips/calling_convention_mips.cc \ + linker/mips/relative_patcher_mips.cc \ optimizing/code_generator_mips.cc \ + optimizing/dex_cache_array_fixups_mips.cc \ optimizing/intrinsics_mips.cc \ + optimizing/pc_relative_fixups_mips.cc \ utils/mips/assembler_mips.cc \ utils/mips/managed_register_mips.cc \ diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index e52dda35bb..474530a033 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -394,7 +394,7 @@ CompilerDriver::CompilerDriver( dump_passes_(dump_passes), timings_logger_(timer), compiler_context_(nullptr), - support_boot_image_fixup_(instruction_set != kMips && instruction_set != kMips64), + support_boot_image_fixup_(instruction_set != kMips64), dex_files_for_oat_file_(nullptr), compiled_method_storage_(swap_fd), profile_compilation_info_(profile_compilation_info), diff --git a/compiler/linker/mips/relative_patcher_mips.cc b/compiler/linker/mips/relative_patcher_mips.cc new file mode 100644 index 0000000000..7c0423b635 --- /dev/null +++ b/compiler/linker/mips/relative_patcher_mips.cc @@ -0,0 +1,109 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "linker/mips/relative_patcher_mips.h" + +#include "compiled_method.h" + +namespace art { +namespace linker { + +uint32_t MipsRelativePatcher::ReserveSpace( + uint32_t offset, + const CompiledMethod* compiled_method ATTRIBUTE_UNUSED, + MethodReference method_ref ATTRIBUTE_UNUSED) { + return offset; // No space reserved; no limit on relative call distance. +} + +uint32_t MipsRelativePatcher::ReserveSpaceEnd(uint32_t offset) { + return offset; // No space reserved; no limit on relative call distance. +} + +uint32_t MipsRelativePatcher::WriteThunks(OutputStream* out ATTRIBUTE_UNUSED, uint32_t offset) { + return offset; // No thunks added; no limit on relative call distance. +} + +void MipsRelativePatcher::PatchCall(std::vector<uint8_t>* code ATTRIBUTE_UNUSED, + uint32_t literal_offset ATTRIBUTE_UNUSED, + uint32_t patch_offset ATTRIBUTE_UNUSED, + uint32_t target_offset ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL) << "PatchCall unimplemented on MIPS"; +} + +void MipsRelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code, + const LinkerPatch& patch, + uint32_t patch_offset, + uint32_t target_offset) { + uint32_t anchor_literal_offset = patch.PcInsnOffset(); + uint32_t literal_offset = patch.LiteralOffset(); + + // Basic sanity checks. + if (is_r6) { + DCHECK_GE(code->size(), 8u); + DCHECK_LE(literal_offset, code->size() - 8u); + DCHECK_EQ(literal_offset, anchor_literal_offset); + // AUIPC reg, offset_high + DCHECK_EQ((*code)[literal_offset + 0], 0x34); + DCHECK_EQ((*code)[literal_offset + 1], 0x12); + DCHECK_EQ(((*code)[literal_offset + 2] & 0x1F), 0x1E); + DCHECK_EQ(((*code)[literal_offset + 3] & 0xFC), 0xEC); + // ADDIU reg, reg, offset_low + DCHECK_EQ((*code)[literal_offset + 4], 0x78); + DCHECK_EQ((*code)[literal_offset + 5], 0x56); + DCHECK_EQ(((*code)[literal_offset + 7] & 0xFC), 0x24); + } else { + DCHECK_GE(code->size(), 16u); + DCHECK_LE(literal_offset, code->size() - 12u); + DCHECK_GE(literal_offset, 4u); + DCHECK_EQ(literal_offset + 4u, anchor_literal_offset); + // NAL + DCHECK_EQ((*code)[literal_offset - 4], 0x00); + DCHECK_EQ((*code)[literal_offset - 3], 0x00); + DCHECK_EQ((*code)[literal_offset - 2], 0x10); + DCHECK_EQ((*code)[literal_offset - 1], 0x04); + // LUI reg, offset_high + DCHECK_EQ((*code)[literal_offset + 0], 0x34); + DCHECK_EQ((*code)[literal_offset + 1], 0x12); + DCHECK_EQ(((*code)[literal_offset + 2] & 0xE0), 0x00); + DCHECK_EQ((*code)[literal_offset + 3], 0x3C); + // ORI reg, reg, offset_low + DCHECK_EQ((*code)[literal_offset + 4], 0x78); + DCHECK_EQ((*code)[literal_offset + 5], 0x56); + DCHECK_EQ(((*code)[literal_offset + 7] & 0xFC), 0x34); + // ADDU reg, reg, RA + DCHECK_EQ((*code)[literal_offset + 8], 0x21); + DCHECK_EQ(((*code)[literal_offset + 9] & 0x07), 0x00); + DCHECK_EQ(((*code)[literal_offset + 10] & 0x1F), 0x1F); + DCHECK_EQ(((*code)[literal_offset + 11] & 0xFC), 0x00); + } + + // Apply patch. + uint32_t anchor_offset = patch_offset - literal_offset + anchor_literal_offset; + uint32_t diff = target_offset - anchor_offset + kDexCacheArrayLwOffset; + if (is_r6) { + diff += (diff & 0x8000) << 1; // Account for sign extension in ADDIU. + } + + // LUI reg, offset_high / AUIPC reg, offset_high + (*code)[literal_offset + 0] = static_cast<uint8_t>(diff >> 16); + (*code)[literal_offset + 1] = static_cast<uint8_t>(diff >> 24); + // ORI reg, reg, offset_low / ADDIU reg, reg, offset_low + (*code)[literal_offset + 4] = static_cast<uint8_t>(diff >> 0); + (*code)[literal_offset + 5] = static_cast<uint8_t>(diff >> 8); +} + +} // namespace linker +} // namespace art diff --git a/compiler/linker/mips/relative_patcher_mips.h b/compiler/linker/mips/relative_patcher_mips.h new file mode 100644 index 0000000000..4ff2f2f24f --- /dev/null +++ b/compiler/linker/mips/relative_patcher_mips.h @@ -0,0 +1,57 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_LINKER_MIPS_RELATIVE_PATCHER_MIPS_H_ +#define ART_COMPILER_LINKER_MIPS_RELATIVE_PATCHER_MIPS_H_ + +#include "linker/relative_patcher.h" +#include "arch/mips/instruction_set_features_mips.h" + +namespace art { +namespace linker { + +class MipsRelativePatcher FINAL : public RelativePatcher { + public: + explicit MipsRelativePatcher(const MipsInstructionSetFeatures* features) + : is_r6(features->IsR6()) {} + + uint32_t ReserveSpace(uint32_t offset, + const CompiledMethod* compiled_method, + MethodReference method_ref) OVERRIDE; + uint32_t ReserveSpaceEnd(uint32_t offset) OVERRIDE; + uint32_t WriteThunks(OutputStream* out, uint32_t offset) OVERRIDE; + void PatchCall(std::vector<uint8_t>* code, + uint32_t literal_offset, + uint32_t patch_offset, + uint32_t target_offset) OVERRIDE; + void PatchPcRelativeReference(std::vector<uint8_t>* code, + const LinkerPatch& patch, + uint32_t patch_offset, + uint32_t target_offset) OVERRIDE; + + private: + // We'll maximize the range of a single load instruction for dex cache array accesses + // by aligning offset -32768 with the offset of the first used element. + static constexpr uint32_t kDexCacheArrayLwOffset = 0x8000; + bool is_r6; + + DISALLOW_COPY_AND_ASSIGN(MipsRelativePatcher); +}; + +} // namespace linker +} // namespace art + +#endif // ART_COMPILER_LINKER_MIPS_RELATIVE_PATCHER_MIPS_H_ diff --git a/compiler/linker/mips/relative_patcher_mips32r6_test.cc b/compiler/linker/mips/relative_patcher_mips32r6_test.cc new file mode 100644 index 0000000000..0f1dcbcbf1 --- /dev/null +++ b/compiler/linker/mips/relative_patcher_mips32r6_test.cc @@ -0,0 +1,68 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "linker/relative_patcher_test.h" +#include "linker/mips/relative_patcher_mips.h" + +namespace art { +namespace linker { + +// We'll maximize the range of a single load instruction for dex cache array accesses +// by aligning offset -32768 with the offset of the first used element. +static constexpr uint32_t kDexCacheArrayLwOffset = 0x8000; + +class Mips32r6RelativePatcherTest : public RelativePatcherTest { + public: + Mips32r6RelativePatcherTest() : RelativePatcherTest(kMips, "mips32r6") {} + + protected: + uint32_t GetMethodOffset(uint32_t method_idx) { + auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx)); + CHECK(result.first); + return result.second; + } +}; + +TEST_F(Mips32r6RelativePatcherTest, DexCacheReference) { + dex_cache_arrays_begin_ = 0x12345678; + constexpr size_t kElementOffset = 0x1234; + static const uint8_t raw_code[] = { + 0x34, 0x12, 0x5E, 0xEE, // auipc s2, high(diff); placeholder = 0x1234 + 0x78, 0x56, 0x52, 0x26, // addiu s2, s2, low(diff); placeholder = 0x5678 + }; + constexpr uint32_t literal_offset = 0; // At auipc (where patching starts). + constexpr uint32_t anchor_offset = literal_offset; // At auipc (where PC+0 points). + ArrayRef<const uint8_t> code(raw_code); + LinkerPatch patches[] = { + LinkerPatch::DexCacheArrayPatch(literal_offset, nullptr, anchor_offset, kElementOffset), + }; + AddCompiledMethod(MethodRef(1u), code, ArrayRef<const LinkerPatch>(patches)); + Link(); + + auto result = method_offset_map_.FindMethodOffset(MethodRef(1u)); + ASSERT_TRUE(result.first); + uint32_t diff = dex_cache_arrays_begin_ + kElementOffset - (result.second + anchor_offset) + + kDexCacheArrayLwOffset; + diff += (diff & 0x8000) << 1; // Account for sign extension in addiu. + static const uint8_t expected_code[] = { + static_cast<uint8_t>(diff >> 16), static_cast<uint8_t>(diff >> 24), 0x5E, 0xEE, + static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8), 0x52, 0x26, + }; + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); +} + +} // namespace linker +} // namespace art diff --git a/compiler/linker/mips/relative_patcher_mips_test.cc b/compiler/linker/mips/relative_patcher_mips_test.cc new file mode 100644 index 0000000000..8391b5352a --- /dev/null +++ b/compiler/linker/mips/relative_patcher_mips_test.cc @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "linker/relative_patcher_test.h" +#include "linker/mips/relative_patcher_mips.h" + +namespace art { +namespace linker { + +// We'll maximize the range of a single load instruction for dex cache array accesses +// by aligning offset -32768 with the offset of the first used element. +static constexpr uint32_t kDexCacheArrayLwOffset = 0x8000; + +class MipsRelativePatcherTest : public RelativePatcherTest { + public: + MipsRelativePatcherTest() : RelativePatcherTest(kMips, "mips32r2") {} + + protected: + uint32_t GetMethodOffset(uint32_t method_idx) { + auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx)); + CHECK(result.first); + return result.second; + } +}; + +TEST_F(MipsRelativePatcherTest, DexCacheReference) { + dex_cache_arrays_begin_ = 0x12345678; + constexpr size_t kElementOffset = 0x1234; + static const uint8_t raw_code[] = { + 0x00, 0x00, 0x10, 0x04, // nal + 0x34, 0x12, 0x12, 0x3C, // lui s2, high(diff); placeholder = 0x1234 + 0x78, 0x56, 0x52, 0x36, // ori s2, s2, low(diff); placeholder = 0x5678 + 0x21, 0x90, 0x5F, 0x02, // addu s2, s2, ra + }; + constexpr uint32_t literal_offset = 4; // At lui (where patching starts). + constexpr uint32_t anchor_offset = 8; // At ori (where PC+0 points). + ArrayRef<const uint8_t> code(raw_code); + LinkerPatch patches[] = { + LinkerPatch::DexCacheArrayPatch(literal_offset, nullptr, anchor_offset, kElementOffset), + }; + AddCompiledMethod(MethodRef(1u), code, ArrayRef<const LinkerPatch>(patches)); + Link(); + + auto result = method_offset_map_.FindMethodOffset(MethodRef(1u)); + ASSERT_TRUE(result.first); + uint32_t diff = dex_cache_arrays_begin_ + kElementOffset - (result.second + anchor_offset) + + kDexCacheArrayLwOffset; + static const uint8_t expected_code[] = { + 0x00, 0x00, 0x10, 0x04, + static_cast<uint8_t>(diff >> 16), static_cast<uint8_t>(diff >> 24), 0x12, 0x3C, + static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8), 0x52, 0x36, + 0x21, 0x90, 0x5F, 0x02, + }; + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); +} + +} // namespace linker +} // namespace art diff --git a/compiler/linker/relative_patcher.cc b/compiler/linker/relative_patcher.cc index 3a229831d0..77655947fd 100644 --- a/compiler/linker/relative_patcher.cc +++ b/compiler/linker/relative_patcher.cc @@ -22,6 +22,9 @@ #ifdef ART_ENABLE_CODEGEN_arm64 #include "linker/arm64/relative_patcher_arm64.h" #endif +#ifdef ART_ENABLE_CODEGEN_mips +#include "linker/mips/relative_patcher_mips.h" +#endif #ifdef ART_ENABLE_CODEGEN_x86 #include "linker/x86/relative_patcher_x86.h" #endif @@ -95,6 +98,11 @@ std::unique_ptr<RelativePatcher> RelativePatcher::Create( return std::unique_ptr<RelativePatcher>( new Arm64RelativePatcher(provider, features->AsArm64InstructionSetFeatures())); #endif +#ifdef ART_ENABLE_CODEGEN_mips + case kMips: + return std::unique_ptr<RelativePatcher>( + new MipsRelativePatcher(features->AsMipsInstructionSetFeatures())); +#endif default: return std::unique_ptr<RelativePatcher>(new RelativePatcherNone); } diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index e9fcfe2bed..1fc247faf1 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -847,7 +847,7 @@ class BCEVisitor : public HGraphVisitor { } // Try index range obtained by induction variable analysis. // Disables dynamic bce if OOB is certain. - if (InductionRangeFitsIn(&array_range, bounds_check, index, &try_dynamic_bce)) { + if (InductionRangeFitsIn(&array_range, bounds_check, &try_dynamic_bce)) { ReplaceInstruction(bounds_check, index); return; } @@ -912,9 +912,9 @@ class BCEVisitor : public HGraphVisitor { static bool HasSameInputAtBackEdges(HPhi* phi) { DCHECK(phi->IsLoopHeaderPhi()); - auto&& inputs = phi->GetInputs(); + HConstInputsRef inputs = phi->GetInputs(); // Start with input 1. Input 0 is from the incoming block. - HInstruction* input1 = inputs[1]; + const HInstruction* input1 = inputs[1]; DCHECK(phi->GetBlock()->GetLoopInformation()->IsBackEdge( *phi->GetBlock()->GetPredecessors()[1])); for (size_t i = 2; i < inputs.size(); ++i) { @@ -1299,33 +1299,30 @@ class BCEVisitor : public HGraphVisitor { * parameter try_dynamic_bce is set to false if OOB is certain. */ bool InductionRangeFitsIn(ValueRange* array_range, - HInstruction* context, - HInstruction* index, + HBoundsCheck* context, bool* try_dynamic_bce) { InductionVarRange::Value v1; InductionVarRange::Value v2; bool needs_finite_test = false; - if (induction_range_.GetInductionRange(context, index, &v1, &v2, &needs_finite_test)) { - do { - if (v1.is_known && (v1.a_constant == 0 || v1.a_constant == 1) && - v2.is_known && (v2.a_constant == 0 || v2.a_constant == 1)) { - DCHECK(v1.a_constant == 1 || v1.instruction == nullptr); - DCHECK(v2.a_constant == 1 || v2.instruction == nullptr); - ValueRange index_range(GetGraph()->GetArena(), - ValueBound(v1.instruction, v1.b_constant), - ValueBound(v2.instruction, v2.b_constant)); - // If analysis reveals a certain OOB, disable dynamic BCE. - if (index_range.GetLower().LessThan(array_range->GetLower()) || - index_range.GetUpper().GreaterThan(array_range->GetUpper())) { - *try_dynamic_bce = false; - return false; - } - // Use analysis for static bce only if loop is finite. - if (!needs_finite_test && index_range.FitsIn(array_range)) { - return true; - } + HInstruction* index = context->InputAt(0); + HInstruction* hint = ValueBound::HuntForDeclaration(context->InputAt(1)); + if (induction_range_.GetInductionRange(context, index, hint, &v1, &v2, &needs_finite_test)) { + if (v1.is_known && (v1.a_constant == 0 || v1.a_constant == 1) && + v2.is_known && (v2.a_constant == 0 || v2.a_constant == 1)) { + DCHECK(v1.a_constant == 1 || v1.instruction == nullptr); + DCHECK(v2.a_constant == 1 || v2.instruction == nullptr); + ValueRange index_range(GetGraph()->GetArena(), + ValueBound(v1.instruction, v1.b_constant), + ValueBound(v2.instruction, v2.b_constant)); + // If analysis reveals a certain OOB, disable dynamic BCE. Otherwise, + // use analysis for static bce only if loop is finite. + if (index_range.GetLower().LessThan(array_range->GetLower()) || + index_range.GetUpper().GreaterThan(array_range->GetUpper())) { + *try_dynamic_bce = false; + } else if (!needs_finite_test && index_range.FitsIn(array_range)) { + return true; } - } while (induction_range_.RefineOuter(&v1, &v2)); + } } return false; } diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 12aa15207c..4520f9b3e3 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -111,7 +111,7 @@ static bool CheckTypeConsistency(HInstruction* instruction) { << " " << locations->Out(); } - auto&& inputs = instruction->GetInputs(); + HConstInputsRef inputs = instruction->GetInputs(); for (size_t i = 0; i < inputs.size(); ++i) { DCHECK(CheckType(inputs[i]->GetType(), locations->InAt(i))) << inputs[i]->GetType() << " " << locations->InAt(i); diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 4d44c18dcf..37f1c35c50 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -39,6 +39,10 @@ namespace mips { static constexpr int kCurrentMethodStackOffset = 0; static constexpr Register kMethodRegisterArgument = A0; +// We'll maximize the range of a single load instruction for dex cache array accesses +// by aligning offset -32768 with the offset of the first used element. +static constexpr uint32_t kDexCacheArrayLwOffset = 0x8000; + Location MipsReturnLocation(Primitive::Type return_type) { switch (return_type) { case Primitive::kPrimBoolean: @@ -477,7 +481,12 @@ CodeGeneratorMIPS::CodeGeneratorMIPS(HGraph* graph, instruction_visitor_(graph, this), move_resolver_(graph->GetArena(), this), assembler_(graph->GetArena(), &isa_features), - isa_features_(isa_features) { + isa_features_(isa_features), + method_patches_(MethodReferenceComparator(), + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + call_patches_(MethodReferenceComparator(), + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { // Save RA (containing the return address) to mimic Quick. AddAllocatedRegister(Location::RegisterLocation(RA)); } @@ -948,6 +957,71 @@ void CodeGeneratorMIPS::AddLocationAsTemp(Location location, LocationSummary* lo } } +void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { + DCHECK(linker_patches->empty()); + size_t size = + method_patches_.size() + + call_patches_.size() + + pc_relative_dex_cache_patches_.size(); + linker_patches->reserve(size); + for (const auto& entry : method_patches_) { + const MethodReference& target_method = entry.first; + Literal* literal = entry.second; + DCHECK(literal->GetLabel()->IsBound()); + uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel()); + linker_patches->push_back(LinkerPatch::MethodPatch(literal_offset, + target_method.dex_file, + target_method.dex_method_index)); + } + for (const auto& entry : call_patches_) { + const MethodReference& target_method = entry.first; + Literal* literal = entry.second; + DCHECK(literal->GetLabel()->IsBound()); + uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel()); + linker_patches->push_back(LinkerPatch::CodePatch(literal_offset, + target_method.dex_file, + target_method.dex_method_index)); + } + for (const PcRelativePatchInfo& info : pc_relative_dex_cache_patches_) { + const DexFile& dex_file = info.target_dex_file; + size_t base_element_offset = info.offset_or_index; + DCHECK(info.high_label.IsBound()); + uint32_t high_offset = __ GetLabelLocation(&info.high_label); + DCHECK(info.pc_rel_label.IsBound()); + uint32_t pc_rel_offset = __ GetLabelLocation(&info.pc_rel_label); + linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(high_offset, + &dex_file, + pc_rel_offset, + base_element_offset)); + } +} + +CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeDexCacheArrayPatch( + const DexFile& dex_file, uint32_t element_offset) { + return NewPcRelativePatch(dex_file, element_offset, &pc_relative_dex_cache_patches_); +} + +CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativePatch( + const DexFile& dex_file, uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches) { + patches->emplace_back(dex_file, offset_or_index); + return &patches->back(); +} + +Literal* CodeGeneratorMIPS::DeduplicateMethodLiteral(MethodReference target_method, + MethodToLiteralMap* map) { + return map->GetOrCreate( + target_method, + [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); }); +} + +Literal* CodeGeneratorMIPS::DeduplicateMethodAddressLiteral(MethodReference target_method) { + return DeduplicateMethodLiteral(target_method, &method_patches_); +} + +Literal* CodeGeneratorMIPS::DeduplicateMethodCodeLiteral(MethodReference target_method) { + return DeduplicateMethodLiteral(target_method, &call_patches_); +} + void CodeGeneratorMIPS::MarkGCCard(Register object, Register value) { MipsLabel done; Register card = AT; @@ -3741,12 +3815,38 @@ void LocationsBuilderMIPS::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invo // art::PrepareForRegisterAllocation. DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); + HInvokeStaticOrDirect::MethodLoadKind method_load_kind = invoke->GetMethodLoadKind(); + HInvokeStaticOrDirect::CodePtrLocation code_ptr_location = invoke->GetCodePtrLocation(); + bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); + + // kDirectAddressWithFixup and kCallDirectWithFixup need no extra input on R6 because + // R6 has PC-relative addressing. + bool has_extra_input = !isR6 && + ((method_load_kind == HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup) || + (code_ptr_location == HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup)); + + if (invoke->HasPcRelativeDexCache()) { + // kDexCachePcRelative is mutually exclusive with + // kDirectAddressWithFixup/kCallDirectWithFixup. + CHECK(!has_extra_input); + has_extra_input = true; + } + IntrinsicLocationsBuilderMIPS intrinsic(codegen_); if (intrinsic.TryDispatch(invoke)) { + if (invoke->GetLocations()->CanCall() && has_extra_input) { + invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any()); + } return; } HandleInvoke(invoke); + + // Add the extra input register if either the dex cache array base register + // or the PC-relative base register for accessing literals is needed. + if (has_extra_input) { + invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister()); + } } static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorMIPS* codegen) { @@ -3771,42 +3871,103 @@ HLoadClass::LoadKind CodeGeneratorMIPS::GetSupportedLoadClassKind( return HLoadClass::LoadKind::kDexCacheViaMethod; } +Register CodeGeneratorMIPS::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, + Register temp) { + CHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u); + Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); + if (!invoke->GetLocations()->Intrinsified()) { + return location.AsRegister<Register>(); + } + // For intrinsics we allow any location, so it may be on the stack. + if (!location.IsRegister()) { + __ LoadFromOffset(kLoadWord, temp, SP, location.GetStackIndex()); + return temp; + } + // For register locations, check if the register was saved. If so, get it from the stack. + // Note: There is a chance that the register was saved but not overwritten, so we could + // save one load. However, since this is just an intrinsic slow path we prefer this + // simple and more robust approach rather that trying to determine if that's the case. + SlowPathCode* slow_path = GetCurrentSlowPath(); + DCHECK(slow_path != nullptr); // For intrinsified invokes the call is emitted on the slow path. + if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) { + int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>()); + __ LoadFromOffset(kLoadWord, temp, SP, stack_offset); + return temp; + } + return location.AsRegister<Register>(); +} + HInvokeStaticOrDirect::DispatchInfo CodeGeneratorMIPS::GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, MethodReference target_method ATTRIBUTE_UNUSED) { - switch (desired_dispatch_info.method_load_kind) { + HInvokeStaticOrDirect::DispatchInfo dispatch_info = desired_dispatch_info; + // We disable PC-relative load when there is an irreducible loop, as the optimization + // is incompatible with it. + bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops(); + bool fallback_load = true; + bool fallback_call = true; + switch (dispatch_info.method_load_kind) { case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup: case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: - // TODO: Implement these types. For the moment, we fall back to kDexCacheViaMethod. - return HInvokeStaticOrDirect::DispatchInfo { - HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod, - HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod, - 0u, - 0u - }; + fallback_load = has_irreducible_loops; + break; default: + fallback_load = false; break; } - switch (desired_dispatch_info.code_ptr_location) { + switch (dispatch_info.code_ptr_location) { case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup: + fallback_call = has_irreducible_loops; + break; case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: - // TODO: Implement these types. For the moment, we fall back to kCallArtMethod. - return HInvokeStaticOrDirect::DispatchInfo { - desired_dispatch_info.method_load_kind, - HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod, - desired_dispatch_info.method_load_data, - 0u - }; + // TODO: Implement this type. + break; default: - return desired_dispatch_info; + fallback_call = false; + break; } + if (fallback_load) { + dispatch_info.method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod; + dispatch_info.method_load_data = 0; + } + if (fallback_call) { + dispatch_info.code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod; + dispatch_info.direct_code_ptr = 0; + } + return dispatch_info; } void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) { // All registers are assumed to be correctly set up per the calling convention. - Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. - switch (invoke->GetMethodLoadKind()) { + HInvokeStaticOrDirect::MethodLoadKind method_load_kind = invoke->GetMethodLoadKind(); + HInvokeStaticOrDirect::CodePtrLocation code_ptr_location = invoke->GetCodePtrLocation(); + bool isR6 = isa_features_.IsR6(); + // kDirectAddressWithFixup and kCallDirectWithFixup have no extra input on R6 because + // R6 has PC-relative addressing. + bool has_extra_input = invoke->HasPcRelativeDexCache() || + (!isR6 && + ((method_load_kind == HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup) || + (code_ptr_location == HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup))); + Register base_reg = has_extra_input + ? GetInvokeStaticOrDirectExtraParameter(invoke, temp.AsRegister<Register>()) + : ZERO; + + // For better instruction scheduling we load the direct code pointer before the method pointer. + switch (code_ptr_location) { + case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect: + // T9 = invoke->GetDirectCodePtr(); + __ LoadConst32(T9, invoke->GetDirectCodePtr()); + break; + case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup: + // T9 = code address from literal pool with link-time patch. + __ LoadLiteral(T9, base_reg, DeduplicateMethodCodeLiteral(invoke->GetTargetMethod())); + break; + default: + break; + } + + switch (method_load_kind) { case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: // temp = thread->string_init_entrypoint __ LoadFromOffset(kLoadWord, @@ -3821,11 +3982,18 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke __ LoadConst32(temp.AsRegister<Register>(), invoke->GetMethodAddress()); break; case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup: - case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: - // TODO: Implement these types. - // Currently filtered out by GetSupportedInvokeStaticOrDirectDispatch(). - LOG(FATAL) << "Unsupported"; - UNREACHABLE(); + __ LoadLiteral(temp.AsRegister<Register>(), + base_reg, + DeduplicateMethodAddressLiteral(invoke->GetTargetMethod())); + break; + case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: { + HMipsDexCacheArraysBase* base = + invoke->InputAt(invoke->GetSpecialInputIndex())->AsMipsDexCacheArraysBase(); + int32_t offset = + invoke->GetDexCacheArrayOffset() - base->GetElementOffset() - kDexCacheArrayLwOffset; + __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), base_reg, offset); + break; + } case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); Register reg = temp.AsRegister<Register>(); @@ -3856,20 +4024,19 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke } } - switch (invoke->GetCodePtrLocation()) { + switch (code_ptr_location) { case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf: - __ Jalr(&frame_entry_label_, T9); + __ Bal(&frame_entry_label_); break; case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect: - // LR = invoke->GetDirectCodePtr(); - __ LoadConst32(T9, invoke->GetDirectCodePtr()); - // LR() + case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup: + // T9 prepared above for better instruction scheduling. + // T9() __ Jalr(T9); __ Nop(); break; - case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup: case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: - // TODO: Implement these types. + // TODO: Implement this type. // Currently filtered out by GetSupportedInvokeStaticOrDirectDispatch(). LOG(FATAL) << "Unsupported"; UNREACHABLE(); @@ -5140,6 +5307,57 @@ void InstructionCodeGeneratorMIPS::VisitPackedSwitch(HPackedSwitch* switch_instr } } +void LocationsBuilderMIPS::VisitMipsComputeBaseMethodAddress( + HMipsComputeBaseMethodAddress* insn) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(insn, LocationSummary::kNoCall); + locations->SetOut(Location::RequiresRegister()); +} + +void InstructionCodeGeneratorMIPS::VisitMipsComputeBaseMethodAddress( + HMipsComputeBaseMethodAddress* insn) { + LocationSummary* locations = insn->GetLocations(); + Register reg = locations->Out().AsRegister<Register>(); + + CHECK(!codegen_->GetInstructionSetFeatures().IsR6()); + + // Generate a dummy PC-relative call to obtain PC. + __ Nal(); + // Grab the return address off RA. + __ Move(reg, RA); + + // Remember this offset (the obtained PC value) for later use with constant area. + __ BindPcRelBaseLabel(); +} + +void LocationsBuilderMIPS::VisitMipsDexCacheArraysBase(HMipsDexCacheArraysBase* base) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(base); + locations->SetOut(Location::RequiresRegister()); +} + +void InstructionCodeGeneratorMIPS::VisitMipsDexCacheArraysBase(HMipsDexCacheArraysBase* base) { + Register reg = base->GetLocations()->Out().AsRegister<Register>(); + CodeGeneratorMIPS::PcRelativePatchInfo* info = + codegen_->NewPcRelativeDexCacheArrayPatch(base->GetDexFile(), base->GetElementOffset()); + + if (codegen_->GetInstructionSetFeatures().IsR6()) { + __ Bind(&info->high_label); + __ Bind(&info->pc_rel_label); + // Add a 32-bit offset to PC. + __ Auipc(reg, /* placeholder */ 0x1234); + __ Addiu(reg, reg, /* placeholder */ 0x5678); + } else { + // Generate a dummy PC-relative call to obtain PC. + __ Nal(); + __ Bind(&info->high_label); + __ Lui(reg, /* placeholder */ 0x1234); + __ Bind(&info->pc_rel_label); + __ Ori(reg, reg, /* placeholder */ 0x5678); + // Add a 32-bit offset to PC. + __ Addu(reg, reg, RA); + } +} + void LocationsBuilderMIPS::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { // The trampoline uses the same calling convention as dex calling conventions, // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h index 6487f28ad5..08f74c04d1 100644 --- a/compiler/optimizing/code_generator_mips.h +++ b/compiler/optimizing/code_generator_mips.h @@ -285,6 +285,9 @@ class CodeGeneratorMIPS : public CodeGenerator { MipsAssembler* GetAssembler() OVERRIDE { return &assembler_; } const MipsAssembler& GetAssembler() const OVERRIDE { return assembler_; } + // Emit linker patches. + void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE; + void MarkGCCard(Register object, Register value); // Register allocation. @@ -372,7 +375,39 @@ class CodeGeneratorMIPS : public CodeGenerator { void GenerateImplicitNullCheck(HNullCheck* instruction); void GenerateExplicitNullCheck(HNullCheck* instruction); + // The PcRelativePatchInfo is used for PC-relative addressing of dex cache arrays + // and boot image strings. The only difference is the interpretation of the offset_or_index. + struct PcRelativePatchInfo { + PcRelativePatchInfo(const DexFile& dex_file, uint32_t off_or_idx) + : target_dex_file(dex_file), offset_or_index(off_or_idx) { } + PcRelativePatchInfo(PcRelativePatchInfo&& other) = default; + + const DexFile& target_dex_file; + // Either the dex cache array element offset or the string index. + uint32_t offset_or_index; + // Label for the instruction loading the most significant half of the offset that's added to PC + // to form the base address (the least significant half is loaded with the instruction that + // follows). + MipsLabel high_label; + // Label for the instruction corresponding to PC+0. + MipsLabel pc_rel_label; + }; + + PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, + uint32_t element_offset); + private: + Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp); + + using MethodToLiteralMap = ArenaSafeMap<MethodReference, Literal*, MethodReferenceComparator>; + + Literal* DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map); + Literal* DeduplicateMethodAddressLiteral(MethodReference target_method); + Literal* DeduplicateMethodCodeLiteral(MethodReference target_method); + PcRelativePatchInfo* NewPcRelativePatch(const DexFile& dex_file, + uint32_t offset_or_index, + ArenaDeque<PcRelativePatchInfo>* patches); + // Labels for each block that will be compiled. MipsLabel* block_labels_; MipsLabel frame_entry_label_; @@ -382,6 +417,12 @@ class CodeGeneratorMIPS : public CodeGenerator { MipsAssembler assembler_; const MipsInstructionSetFeatures& isa_features_; + // Method patch info, map MethodReference to a literal for method address and method code. + MethodToLiteralMap method_patches_; + MethodToLiteralMap call_patches_; + // PC-relative patch info for each HMipsDexCacheArraysBase. + ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_; + DISALLOW_COPY_AND_ASSIGN(CodeGeneratorMIPS); }; diff --git a/compiler/optimizing/dex_cache_array_fixups_mips.cc b/compiler/optimizing/dex_cache_array_fixups_mips.cc new file mode 100644 index 0000000000..0f42d9ce0f --- /dev/null +++ b/compiler/optimizing/dex_cache_array_fixups_mips.cc @@ -0,0 +1,94 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dex_cache_array_fixups_mips.h" + +#include "base/arena_containers.h" +#include "utils/dex_cache_arrays_layout-inl.h" + +namespace art { +namespace mips { + +/** + * Finds instructions that need the dex cache arrays base as an input. + */ +class DexCacheArrayFixupsVisitor : public HGraphVisitor { + public: + explicit DexCacheArrayFixupsVisitor(HGraph* graph) + : HGraphVisitor(graph), + dex_cache_array_bases_(std::less<const DexFile*>(), + // Attribute memory use to code generator. + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {} + + void MoveBasesIfNeeded() { + for (const auto& entry : dex_cache_array_bases_) { + // Bring the base closer to the first use (previously, it was in the + // entry block) and relieve some pressure on the register allocator + // while avoiding recalculation of the base in a loop. + HMipsDexCacheArraysBase* base = entry.second; + base->MoveBeforeFirstUserAndOutOfLoops(); + } + } + + private: + void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE { + // If this is an invoke with PC-relative access to the dex cache methods array, + // we need to add the dex cache arrays base as the special input. + if (invoke->HasPcRelativeDexCache()) { + // Initialize base for target method dex file if needed. + MethodReference target_method = invoke->GetTargetMethod(); + HMipsDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(*target_method.dex_file); + // Update the element offset in base. + DexCacheArraysLayout layout(kMipsPointerSize, target_method.dex_file); + base->UpdateElementOffset(layout.MethodOffset(target_method.dex_method_index)); + // Add the special argument base to the method. + DCHECK(!invoke->HasCurrentMethodInput()); + invoke->AddSpecialInput(base); + } + } + + HMipsDexCacheArraysBase* GetOrCreateDexCacheArrayBase(const DexFile& dex_file) { + return dex_cache_array_bases_.GetOrCreate( + &dex_file, + [this, &dex_file]() { + HMipsDexCacheArraysBase* base = + new (GetGraph()->GetArena()) HMipsDexCacheArraysBase(dex_file); + HBasicBlock* entry_block = GetGraph()->GetEntryBlock(); + // Insert the base at the start of the entry block, move it to a better + // position later in MoveBaseIfNeeded(). + entry_block->InsertInstructionBefore(base, entry_block->GetFirstInstruction()); + return base; + }); + } + + using DexCacheArraysBaseMap = + ArenaSafeMap<const DexFile*, HMipsDexCacheArraysBase*, std::less<const DexFile*>>; + DexCacheArraysBaseMap dex_cache_array_bases_; +}; + +void DexCacheArrayFixups::Run() { + if (graph_->HasIrreducibleLoops()) { + // Do not run this optimization, as irreducible loops do not work with an instruction + // that can be live-in at the irreducible loop header. + return; + } + DexCacheArrayFixupsVisitor visitor(graph_); + visitor.VisitInsertionOrder(); + visitor.MoveBasesIfNeeded(); +} + +} // namespace mips +} // namespace art diff --git a/compiler/optimizing/dex_cache_array_fixups_mips.h b/compiler/optimizing/dex_cache_array_fixups_mips.h new file mode 100644 index 0000000000..c8def2842e --- /dev/null +++ b/compiler/optimizing/dex_cache_array_fixups_mips.h @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_MIPS_H_ +#define ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_MIPS_H_ + +#include "nodes.h" +#include "optimization.h" + +namespace art { +namespace mips { + +class DexCacheArrayFixups : public HOptimization { + public: + DexCacheArrayFixups(HGraph* graph, OptimizingCompilerStats* stats) + : HOptimization(graph, "dex_cache_array_fixups_mips", stats) {} + + void Run() OVERRIDE; +}; + +} // namespace mips +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_MIPS_H_ diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index 2bd2403dd6..c8cba205fd 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -362,7 +362,7 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) { instruction->GetId())); } size_t use_index = use.GetIndex(); - auto&& user_inputs = user->GetInputs(); + HConstInputsRef user_inputs = user->GetInputs(); if ((use_index >= user_inputs.size()) || (user_inputs[use_index] != instruction)) { AddError(StringPrintf("User %s:%d of instruction %s:%d has a wrong " "UseListNode index.", @@ -490,7 +490,7 @@ void GraphChecker::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { VisitInstruction(invoke); if (invoke->IsStaticWithExplicitClinitCheck()) { - HInstruction* last_input = invoke->GetInputs().back(); + const HInstruction* last_input = invoke->GetInputs().back(); if (last_input == nullptr) { AddError(StringPrintf("Static invoke %s:%d marked as having an explicit clinit check " "has a null pointer as last input.", @@ -664,17 +664,19 @@ void GraphChecker::HandleLoop(HBasicBlock* loop_header) { } } -static bool IsSameSizeConstant(HInstruction* insn1, HInstruction* insn2) { +static bool IsSameSizeConstant(const HInstruction* insn1, const HInstruction* insn2) { return insn1->IsConstant() && insn2->IsConstant() && Primitive::Is64BitType(insn1->GetType()) == Primitive::Is64BitType(insn2->GetType()); } -static bool IsConstantEquivalent(HInstruction* insn1, HInstruction* insn2, BitVector* visited) { +static bool IsConstantEquivalent(const HInstruction* insn1, + const HInstruction* insn2, + BitVector* visited) { if (insn1->IsPhi() && insn1->AsPhi()->IsVRegEquivalentOf(insn2)) { - auto&& insn1_inputs = insn1->GetInputs(); - auto&& insn2_inputs = insn2->GetInputs(); + HConstInputsRef insn1_inputs = insn1->GetInputs(); + HConstInputsRef insn2_inputs = insn2->GetInputs(); if (insn1_inputs.size() != insn2_inputs.size()) { return false; } diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index 4af8d1985b..9d67373321 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -511,7 +511,7 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { void PrintInstruction(HInstruction* instruction) { output_ << instruction->DebugName(); - auto&& inputs = instruction->GetInputs(); + HConstInputsRef inputs = instruction->GetInputs(); if (!inputs.empty()) { StringList input_list; for (const HInstruction* input : inputs) { diff --git a/compiler/optimizing/induction_var_analysis.cc b/compiler/optimizing/induction_var_analysis.cc index 52426d73c6..129c2a94b5 100644 --- a/compiler/optimizing/induction_var_analysis.cc +++ b/compiler/optimizing/induction_var_analysis.cc @@ -341,7 +341,7 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::TransferPhi(HLoopIn HInstruction* phi, size_t input_index) { // Match all phi inputs from input_index onwards exactly. - auto&& inputs = phi->GetInputs(); + HInputsRef inputs = phi->GetInputs(); DCHECK_LT(input_index, inputs.size()); InductionInfo* a = LookupInfo(loop, inputs[input_index]); for (size_t i = input_index + 1; i < inputs.size(); i++) { @@ -464,7 +464,7 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::TransferCnv(Inducti HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::SolvePhi(HInstruction* phi, size_t input_index) { // Match all phi inputs from input_index onwards exactly. - auto&& inputs = phi->GetInputs(); + HInputsRef inputs = phi->GetInputs(); DCHECK_LT(input_index, inputs.size()); auto ita = cycle_.find(inputs[input_index]); if (ita != cycle_.end()) { diff --git a/compiler/optimizing/induction_var_analysis.h b/compiler/optimizing/induction_var_analysis.h index f1965f07b2..7c74816c26 100644 --- a/compiler/optimizing/induction_var_analysis.h +++ b/compiler/optimizing/induction_var_analysis.h @@ -132,7 +132,7 @@ class HInductionVarAnalysis : public HOptimization { InductionInfo* a, InductionInfo* b, Primitive::Type type) { - DCHECK(a != nullptr); + DCHECK(a != nullptr && b != nullptr); return new (graph_->GetArena()) InductionInfo(kInvariant, op, a, b, nullptr, type); } diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc index bc920d96b5..5e587e0810 100644 --- a/compiler/optimizing/induction_var_range.cc +++ b/compiler/optimizing/induction_var_range.cc @@ -73,9 +73,12 @@ static InductionVarRange::Value SimplifyMax(InductionVarRange::Value v) { return v; } -/** - * Corrects a value for type to account for arithmetic wrap-around in lower precision. - */ +/** Helper method to test for a constant value. */ +static bool IsConstantValue(InductionVarRange::Value v) { + return v.is_known && v.a_constant == 0; +} + +/** Corrects a value for type to account for arithmetic wrap-around in lower precision. */ static InductionVarRange::Value CorrectForType(InductionVarRange::Value v, Primitive::Type type) { switch (type) { case Primitive::kPrimShort: @@ -85,26 +88,15 @@ static InductionVarRange::Value CorrectForType(InductionVarRange::Value v, Primi // TODO: maybe some room for improvement, like allowing widening conversions const int32_t min = Primitive::MinValueOfIntegralType(type); const int32_t max = Primitive::MaxValueOfIntegralType(type); - return (v.is_known && v.a_constant == 0 && min <= v.b_constant && v.b_constant <= max) + return (IsConstantValue(v) && min <= v.b_constant && v.b_constant <= max) ? v : InductionVarRange::Value(); } default: - // At int or higher. return v; } } -/** Helper method to test for a constant value. */ -static bool IsConstantValue(InductionVarRange::Value v) { - return v.is_known && v.a_constant == 0; -} - -/** Helper method to test for same constant value. */ -static bool IsSameConstantValue(InductionVarRange::Value v1, InductionVarRange::Value v2) { - return IsConstantValue(v1) && IsConstantValue(v2) && v1.b_constant == v2.b_constant; -} - /** Helper method to insert an instruction. */ static HInstruction* Insert(HBasicBlock* block, HInstruction* instruction) { DCHECK(block != nullptr); @@ -119,22 +111,22 @@ static HInstruction* Insert(HBasicBlock* block, HInstruction* instruction) { // InductionVarRange::InductionVarRange(HInductionVarAnalysis* induction_analysis) - : induction_analysis_(induction_analysis) { + : induction_analysis_(induction_analysis), + chase_hint_(nullptr) { DCHECK(induction_analysis != nullptr); } bool InductionVarRange::GetInductionRange(HInstruction* context, HInstruction* instruction, + HInstruction* chase_hint, /*out*/Value* min_val, /*out*/Value* max_val, /*out*/bool* needs_finite_test) { - HLoopInformation* loop = context->GetBlock()->GetLoopInformation(); // closest enveloping loop - if (loop == nullptr) { - return false; // no loop - } - HInductionVarAnalysis::InductionInfo* info = induction_analysis_->LookupInfo(loop, instruction); - if (info == nullptr) { - return false; // no induction information + HLoopInformation* loop = nullptr; + HInductionVarAnalysis::InductionInfo* info = nullptr; + HInductionVarAnalysis::InductionInfo* trip = nullptr; + if (!HasInductionInfo(context, instruction, &loop, &info, &trip)) { + return false; } // Type int or lower (this is not too restrictive since intended clients, like // bounds check elimination, will have truncated higher precision induction @@ -148,45 +140,15 @@ bool InductionVarRange::GetInductionRange(HInstruction* context, default: return false; } - // Set up loop information. - HBasicBlock* header = loop->GetHeader(); - bool in_body = context->GetBlock() != header; - HInductionVarAnalysis::InductionInfo* trip = - induction_analysis_->LookupInfo(loop, header->GetLastInstruction()); // Find range. + chase_hint_ = chase_hint; + bool in_body = context->GetBlock() != loop->GetHeader(); *min_val = GetVal(info, trip, in_body, /* is_min */ true); *max_val = SimplifyMax(GetVal(info, trip, in_body, /* is_min */ false)); *needs_finite_test = NeedsTripCount(info) && IsUnsafeTripCount(trip); return true; } -bool InductionVarRange::RefineOuter(/*in-out*/ Value* min_val, - /*in-out*/ Value* max_val) const { - if (min_val->instruction != nullptr || max_val->instruction != nullptr) { - Value v1_min = RefineOuter(*min_val, /* is_min */ true); - Value v2_max = RefineOuter(*max_val, /* is_min */ false); - // The refined range is safe if both sides refine the same instruction. Otherwise, since two - // different ranges are combined, the new refined range is safe to pass back to the client if - // the extremes of the computed ranges ensure no arithmetic wrap-around anomalies occur. - if (min_val->instruction != max_val->instruction) { - Value v1_max = RefineOuter(*min_val, /* is_min */ false); - Value v2_min = RefineOuter(*max_val, /* is_min */ true); - if (!IsConstantValue(v1_max) || - !IsConstantValue(v2_min) || - v1_max.b_constant > v2_min.b_constant) { - return false; - } - } - // Did something change? - if (v1_min.instruction != min_val->instruction || v2_max.instruction != max_val->instruction) { - *min_val = v1_min; - *max_val = v2_max; - return true; - } - } - return false; -} - bool InductionVarRange::CanGenerateCode(HInstruction* context, HInstruction* instruction, /*out*/bool* needs_finite_test, @@ -226,7 +188,7 @@ void InductionVarRange::GenerateTakenTest(HInstruction* context, bool InductionVarRange::IsConstant(HInductionVarAnalysis::InductionInfo* info, ConstantRequest request, - /*out*/ int64_t *value) const { + /*out*/ int64_t* value) const { if (info != nullptr) { // A direct 32-bit or 64-bit constant fetch. This immediately satisfies // any of the three requests (kExact, kAtMost, and KAtLeast). @@ -236,27 +198,16 @@ bool InductionVarRange::IsConstant(HInductionVarAnalysis::InductionInfo* info, return true; } } - // Try range analysis while traversing outward on loops. - bool in_body = true; // no known trip count - Value v_min = GetVal(info, nullptr, in_body, /* is_min */ true); - Value v_max = GetVal(info, nullptr, in_body, /* is_min */ false); - do { - // Make sure *both* extremes are known to avoid arithmetic wrap-around anomalies. - if (IsConstantValue(v_min) && IsConstantValue(v_max) && v_min.b_constant <= v_max.b_constant) { - if ((request == kExact && v_min.b_constant == v_max.b_constant) || request == kAtMost) { - *value = v_max.b_constant; - return true; - } else if (request == kAtLeast) { - *value = v_min.b_constant; - return true; - } - } - } while (RefineOuter(&v_min, &v_max)); - // Exploit array length + c >= c, with c <= 0 to avoid arithmetic wrap-around anomalies - // (e.g. array length == maxint and c == 1 would yield minint). - if (request == kAtLeast) { - if (v_min.a_constant == 1 && v_min.b_constant <= 0 && v_min.instruction->IsArrayLength()) { - *value = v_min.b_constant; + // Try range analysis on the invariant, but only on proper range to avoid wrap-around anomalies. + Value min_val = GetVal(info, nullptr, /* in_body */ true, /* is_min */ true); + Value max_val = GetVal(info, nullptr, /* in_body */ true, /* is_min */ false); + if (IsConstantValue(min_val) && + IsConstantValue(max_val) && min_val.b_constant <= max_val.b_constant) { + if ((request == kExact && min_val.b_constant == max_val.b_constant) || request == kAtMost) { + *value = max_val.b_constant; + return true; + } else if (request == kAtLeast) { + *value = min_val.b_constant; return true; } } @@ -264,6 +215,51 @@ bool InductionVarRange::IsConstant(HInductionVarAnalysis::InductionInfo* info, return false; } +bool InductionVarRange::HasInductionInfo( + HInstruction* context, + HInstruction* instruction, + /*out*/ HLoopInformation** loop, + /*out*/ HInductionVarAnalysis::InductionInfo** info, + /*out*/ HInductionVarAnalysis::InductionInfo** trip) const { + HLoopInformation* l = context->GetBlock()->GetLoopInformation(); // closest enveloping loop + if (l != nullptr) { + HInductionVarAnalysis::InductionInfo* i = induction_analysis_->LookupInfo(l, instruction); + if (i != nullptr) { + *loop = l; + *info = i; + *trip = induction_analysis_->LookupInfo(l, l->GetHeader()->GetLastInstruction()); + return true; + } + } + return false; +} + +bool InductionVarRange::IsWellBehavedTripCount(HInductionVarAnalysis::InductionInfo* trip) const { + if (trip != nullptr) { + // Both bounds that define a trip-count are well-behaved if they either are not defined + // in any loop, or are contained in a proper interval. This allows finding the min/max + // of an expression by chasing outward. + InductionVarRange range(induction_analysis_); + HInductionVarAnalysis::InductionInfo* lower = trip->op_b->op_a; + HInductionVarAnalysis::InductionInfo* upper = trip->op_b->op_b; + int64_t not_used = 0; + return (!HasFetchInLoop(lower) || range.IsConstant(lower, kAtLeast, ¬_used)) && + (!HasFetchInLoop(upper) || range.IsConstant(upper, kAtLeast, ¬_used)); + } + return true; +} + +bool InductionVarRange::HasFetchInLoop(HInductionVarAnalysis::InductionInfo* info) const { + if (info != nullptr) { + if (info->induction_class == HInductionVarAnalysis::kInvariant && + info->operation == HInductionVarAnalysis::kFetch) { + return info->fetch->GetBlock()->GetLoopInformation() != nullptr; + } + return HasFetchInLoop(info->op_a) || HasFetchInLoop(info->op_b); + } + return false; +} + bool InductionVarRange::NeedsTripCount(HInductionVarAnalysis::InductionInfo* info) const { if (info != nullptr) { if (info->induction_class == HInductionVarAnalysis::kLinear) { @@ -299,13 +295,13 @@ InductionVarRange::Value InductionVarRange::GetLinear(HInductionVarAnalysis::Ind HInductionVarAnalysis::InductionInfo* trip, bool in_body, bool is_min) const { - // Detect common situation where an offset inside the trip count cancels out during range + // Detect common situation where an offset inside the trip-count cancels out during range // analysis (finding max a * (TC - 1) + OFFSET for a == 1 and TC = UPPER - OFFSET or finding // min a * (TC - 1) + OFFSET for a == -1 and TC = OFFSET - UPPER) to avoid losing information // with intermediate results that only incorporate single instructions. if (trip != nullptr) { HInductionVarAnalysis::InductionInfo* trip_expr = trip->op_a; - if (trip_expr->operation == HInductionVarAnalysis::kSub) { + if (trip_expr->type == info->type && trip_expr->operation == HInductionVarAnalysis::kSub) { int64_t stride_value = 0; if (IsConstant(info->op_a, kExact, &stride_value)) { if (!is_min && stride_value == 1) { @@ -349,12 +345,25 @@ InductionVarRange::Value InductionVarRange::GetFetch(HInstruction* instruction, HInductionVarAnalysis::InductionInfo* trip, bool in_body, bool is_min) const { - // Detect constants and chase the fetch a bit deeper into the HIR tree, so that it becomes - // more likely range analysis will compare the same instructions as terminal nodes. + // Stop chasing the instruction at constant or hint. int64_t value; - if (IsIntAndGet(instruction, &value) && CanLongValueFitIntoInt(value)) { + if (IsIntAndGet(instruction, &value) && CanLongValueFitIntoInt(value)) { return Value(static_cast<int32_t>(value)); - } else if (instruction->IsAdd()) { + } else if (instruction == chase_hint_) { + return Value(instruction, 1, 0); + } + // Special cases when encountering a single instruction that denotes trip count in the + // loop-body: min is 1 and, when chasing constants, max of safe trip-count is max int + if (in_body && trip != nullptr && instruction == trip->op_a->fetch) { + if (is_min) { + return Value(1); + } else if (chase_hint_ == nullptr && !IsUnsafeTripCount(trip)) { + return Value(std::numeric_limits<int32_t>::max()); + } + } + // Chase the instruction a bit deeper into the HIR tree, so that it becomes more likely + // range analysis will compare the same instructions as terminal nodes. + if (instruction->IsAdd()) { if (IsIntAndGet(instruction->InputAt(0), &value) && CanLongValueFitIntoInt(value)) { return AddValue(Value(static_cast<int32_t>(value)), GetFetch(instruction->InputAt(1), trip, in_body, is_min)); @@ -362,19 +371,35 @@ InductionVarRange::Value InductionVarRange::GetFetch(HInstruction* instruction, return AddValue(GetFetch(instruction->InputAt(0), trip, in_body, is_min), Value(static_cast<int32_t>(value))); } - } else if (instruction->IsArrayLength() && instruction->InputAt(0)->IsNewArray()) { - return GetFetch(instruction->InputAt(0)->InputAt(0), trip, in_body, is_min); + } else if (instruction->IsArrayLength()) { + // Return extreme values when chasing constants. Otherwise, chase deeper. + if (chase_hint_ == nullptr) { + return is_min ? Value(0) : Value(std::numeric_limits<int32_t>::max()); + } else if (instruction->InputAt(0)->IsNewArray()) { + return GetFetch(instruction->InputAt(0)->InputAt(0), trip, in_body, is_min); + } } else if (instruction->IsTypeConversion()) { // Since analysis is 32-bit (or narrower) we allow a widening along the path. if (instruction->AsTypeConversion()->GetInputType() == Primitive::kPrimInt && instruction->AsTypeConversion()->GetResultType() == Primitive::kPrimLong) { return GetFetch(instruction->InputAt(0), trip, in_body, is_min); } - } else if (is_min) { - // Special case for finding minimum: minimum of trip-count in loop-body is 1. - if (trip != nullptr && in_body && instruction == trip->op_a->fetch) { - return Value(1); - } + } + // Chase an invariant fetch that is defined by an outer loop if the trip-count used + // so far is well-behaved in both bounds and the next trip-count is safe. + // Example: + // for (int i = 0; i <= 100; i++) // safe + // for (int j = 0; j <= i; j++) // well-behaved + // j is in range [0, i ] (if i is chase hint) + // or in range [0, 100] (otherwise) + HLoopInformation* next_loop = nullptr; + HInductionVarAnalysis::InductionInfo* next_info = nullptr; + HInductionVarAnalysis::InductionInfo* next_trip = nullptr; + bool next_in_body = true; // inner loop is always in body of outer loop + if (HasInductionInfo(instruction, instruction, &next_loop, &next_info, &next_trip) && + IsWellBehavedTripCount(trip) && + !IsUnsafeTripCount(next_trip)) { + return GetVal(next_info, next_trip, next_in_body, is_min); } return Value(instruction, 1, 0); } @@ -421,9 +446,8 @@ InductionVarRange::Value InductionVarRange::GetVal(HInductionVarAnalysis::Induct break; } break; - case HInductionVarAnalysis::kLinear: { + case HInductionVarAnalysis::kLinear: return CorrectForType(GetLinear(info, trip, in_body, is_min), info->type); - } case HInductionVarAnalysis::kWrapAround: case HInductionVarAnalysis::kPeriodic: return MergeVal(GetVal(info->op_a, trip, in_body, is_min), @@ -438,20 +462,18 @@ InductionVarRange::Value InductionVarRange::GetMul(HInductionVarAnalysis::Induct HInductionVarAnalysis::InductionInfo* trip, bool in_body, bool is_min) const { + // Constant times range. + int64_t value = 0; + if (IsConstant(info1, kExact, &value)) { + return MulRangeAndConstant(value, info2, trip, in_body, is_min); + } else if (IsConstant(info2, kExact, &value)) { + return MulRangeAndConstant(value, info1, trip, in_body, is_min); + } + // Interval ranges. Value v1_min = GetVal(info1, trip, in_body, /* is_min */ true); Value v1_max = GetVal(info1, trip, in_body, /* is_min */ false); Value v2_min = GetVal(info2, trip, in_body, /* is_min */ true); Value v2_max = GetVal(info2, trip, in_body, /* is_min */ false); - // Try to refine first operand. - if (!IsConstantValue(v1_min) && !IsConstantValue(v1_max)) { - RefineOuter(&v1_min, &v1_max); - } - // Constant times range. - if (IsSameConstantValue(v1_min, v1_max)) { - return MulRangeAndConstant(v2_min, v2_max, v1_min, is_min); - } else if (IsSameConstantValue(v2_min, v2_max)) { - return MulRangeAndConstant(v1_min, v1_max, v2_min, is_min); - } // Positive range vs. positive or negative range. if (IsConstantValue(v1_min) && v1_min.b_constant >= 0) { if (IsConstantValue(v2_min) && v2_min.b_constant >= 0) { @@ -476,14 +498,16 @@ InductionVarRange::Value InductionVarRange::GetDiv(HInductionVarAnalysis::Induct HInductionVarAnalysis::InductionInfo* trip, bool in_body, bool is_min) const { + // Range divided by constant. + int64_t value = 0; + if (IsConstant(info2, kExact, &value)) { + return DivRangeAndConstant(value, info1, trip, in_body, is_min); + } + // Interval ranges. Value v1_min = GetVal(info1, trip, in_body, /* is_min */ true); Value v1_max = GetVal(info1, trip, in_body, /* is_min */ false); Value v2_min = GetVal(info2, trip, in_body, /* is_min */ true); Value v2_max = GetVal(info2, trip, in_body, /* is_min */ false); - // Range divided by constant. - if (IsSameConstantValue(v2_min, v2_max)) { - return DivRangeAndConstant(v1_min, v1_max, v2_min, is_min); - } // Positive range vs. positive or negative range. if (IsConstantValue(v1_min) && v1_min.b_constant >= 0) { if (IsConstantValue(v2_min) && v2_min.b_constant >= 0) { @@ -503,18 +527,30 @@ InductionVarRange::Value InductionVarRange::GetDiv(HInductionVarAnalysis::Induct return Value(); } -InductionVarRange::Value InductionVarRange::MulRangeAndConstant(Value v_min, - Value v_max, - Value c, - bool is_min) const { - return is_min == (c.b_constant >= 0) ? MulValue(v_min, c) : MulValue(v_max, c); +InductionVarRange::Value InductionVarRange::MulRangeAndConstant( + int64_t value, + HInductionVarAnalysis::InductionInfo* info, + HInductionVarAnalysis::InductionInfo* trip, + bool in_body, + bool is_min) const { + if (CanLongValueFitIntoInt(value)) { + Value c(static_cast<int32_t>(value)); + return MulValue(GetVal(info, trip, in_body, is_min == value >= 0), c); + } + return Value(); } -InductionVarRange::Value InductionVarRange::DivRangeAndConstant(Value v_min, - Value v_max, - Value c, - bool is_min) const { - return is_min == (c.b_constant >= 0) ? DivValue(v_min, c) : DivValue(v_max, c); +InductionVarRange::Value InductionVarRange::DivRangeAndConstant( + int64_t value, + HInductionVarAnalysis::InductionInfo* info, + HInductionVarAnalysis::InductionInfo* trip, + bool in_body, + bool is_min) const { + if (CanLongValueFitIntoInt(value)) { + Value c(static_cast<int32_t>(value)); + return DivValue(GetVal(info, trip, in_body, is_min == value >= 0), c); + } + return Value(); } InductionVarRange::Value InductionVarRange::AddValue(Value v1, Value v2) const { @@ -580,28 +616,6 @@ InductionVarRange::Value InductionVarRange::MergeVal(Value v1, Value v2, bool is return Value(); } -InductionVarRange::Value InductionVarRange::RefineOuter(Value v, bool is_min) const { - if (v.instruction == nullptr) { - return v; // nothing to refine - } - HLoopInformation* loop = - v.instruction->GetBlock()->GetLoopInformation(); // closest enveloping loop - if (loop == nullptr) { - return v; // no loop - } - HInductionVarAnalysis::InductionInfo* info = induction_analysis_->LookupInfo(loop, v.instruction); - if (info == nullptr) { - return v; // no induction information - } - // Set up loop information. - HBasicBlock* header = loop->GetHeader(); - bool in_body = true; // inner always in more outer - HInductionVarAnalysis::InductionInfo* trip = - induction_analysis_->LookupInfo(loop, header->GetLastInstruction()); - // Try to refine "a x instruction + b" with outer loop range information on instruction. - return AddValue(MulValue(Value(v.a_constant), GetVal(info, trip, in_body, is_min)), Value(v.b_constant)); -} - bool InductionVarRange::GenerateCode(HInstruction* context, HInstruction* instruction, HGraph* graph, @@ -611,27 +625,18 @@ bool InductionVarRange::GenerateCode(HInstruction* context, /*out*/HInstruction** taken_test, /*out*/bool* needs_finite_test, /*out*/bool* needs_taken_test) const { - HLoopInformation* loop = context->GetBlock()->GetLoopInformation(); // closest enveloping loop - if (loop == nullptr) { - return false; // no loop - } - HInductionVarAnalysis::InductionInfo* info = induction_analysis_->LookupInfo(loop, instruction); - if (info == nullptr) { - return false; // no induction information - } - // Set up loop information. - HBasicBlock* header = loop->GetHeader(); - bool in_body = context->GetBlock() != header; - HInductionVarAnalysis::InductionInfo* trip = - induction_analysis_->LookupInfo(loop, header->GetLastInstruction()); - if (trip == nullptr) { - return false; // codegen relies on trip count + HLoopInformation* loop = nullptr; + HInductionVarAnalysis::InductionInfo* info = nullptr; + HInductionVarAnalysis::InductionInfo* trip = nullptr; + if (!HasInductionInfo(context, instruction, &loop, &info, &trip) || trip == nullptr) { + return false; // codegen needs all information, including tripcount } // Determine what tests are needed. A finite test is needed if the evaluation code uses the // trip-count and the loop maybe unsafe (because in such cases, the index could "overshoot" // the computed range). A taken test is needed for any unknown trip-count, even if evaluation // code does not use the trip-count explicitly (since there could be an implicit relation // between e.g. an invariant subscript and a not-taken condition). + bool in_body = context->GetBlock() != loop->GetHeader(); *needs_finite_test = NeedsTripCount(info) && IsUnsafeTripCount(trip); *needs_taken_test = IsBodyTripCount(trip); // Code generation for taken test: generate the code when requested or otherwise analyze diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h index 0af41560ff..00aaa167f8 100644 --- a/compiler/optimizing/induction_var_range.h +++ b/compiler/optimizing/induction_var_range.h @@ -57,21 +57,19 @@ class InductionVarRange { explicit InductionVarRange(HInductionVarAnalysis* induction); /** - * Given a context denoted by the first instruction, returns a possibly conservative - * lower and upper bound on the instruction's value in the output parameters min_val - * and max_val, respectively. The need_finite_test flag denotes if an additional finite-test - * is needed to protect the range evaluation inside its loop. Returns false on failure. + * Given a context denoted by the first instruction, returns a possibly conservative lower + * and upper bound on the instruction's value in the output parameters min_val and max_val, + * respectively. The need_finite_test flag denotes if an additional finite-test is needed + * to protect the range evaluation inside its loop. The parameter chase_hint defines an + * instruction at which chasing may stop. Returns false on failure. */ bool GetInductionRange(HInstruction* context, HInstruction* instruction, + HInstruction* chase_hint, /*out*/ Value* min_val, /*out*/ Value* max_val, /*out*/ bool* needs_finite_test); - /** Refines the values with induction of next outer loop. Returns true on change. */ - bool RefineOuter(/*in-out*/ Value* min_val, - /*in-out*/ Value* max_val) const; - /** * Returns true if range analysis is able to generate code for the lower and upper * bound expressions on the instruction in the given context. The need_finite_test @@ -132,11 +130,20 @@ class InductionVarRange { */ bool IsConstant(HInductionVarAnalysis::InductionInfo* info, ConstantRequest request, - /*out*/ int64_t *value) const; + /*out*/ int64_t* value) const; + + /** Returns whether induction information can be obtained. */ + bool HasInductionInfo(HInstruction* context, + HInstruction* instruction, + /*out*/ HLoopInformation** loop, + /*out*/ HInductionVarAnalysis::InductionInfo** info, + /*out*/ HInductionVarAnalysis::InductionInfo** trip) const; + bool HasFetchInLoop(HInductionVarAnalysis::InductionInfo* info) const; bool NeedsTripCount(HInductionVarAnalysis::InductionInfo* info) const; bool IsBodyTripCount(HInductionVarAnalysis::InductionInfo* trip) const; bool IsUnsafeTripCount(HInductionVarAnalysis::InductionInfo* trip) const; + bool IsWellBehavedTripCount(HInductionVarAnalysis::InductionInfo* trip) const; Value GetLinear(HInductionVarAnalysis::InductionInfo* info, HInductionVarAnalysis::InductionInfo* trip, @@ -161,8 +168,16 @@ class InductionVarRange { bool in_body, bool is_min) const; - Value MulRangeAndConstant(Value v1, Value v2, Value c, bool is_min) const; - Value DivRangeAndConstant(Value v1, Value v2, Value c, bool is_min) const; + Value MulRangeAndConstant(int64_t value, + HInductionVarAnalysis::InductionInfo* info, + HInductionVarAnalysis::InductionInfo* trip, + bool in_body, + bool is_min) const; + Value DivRangeAndConstant(int64_t value, + HInductionVarAnalysis::InductionInfo* info, + HInductionVarAnalysis::InductionInfo* trip, + bool in_body, + bool is_min) const; Value AddValue(Value v1, Value v2) const; Value SubValue(Value v1, Value v2) const; @@ -171,12 +186,6 @@ class InductionVarRange { Value MergeVal(Value v1, Value v2, bool is_min) const; /** - * Returns refined value using induction of next outer loop or the input value if no - * further refinement is possible. - */ - Value RefineOuter(Value val, bool is_min) const; - - /** * Generates code for lower/upper/taken-test in the HIR. Returns true on success. * With values nullptr, the method can be used to determine if code generation * would be successful without generating actual code yet. @@ -200,7 +209,10 @@ class InductionVarRange { bool is_min) const; /** Results of prior induction variable analysis. */ - HInductionVarAnalysis *induction_analysis_; + HInductionVarAnalysis* induction_analysis_; + + /** Instruction at which chasing may stop. */ + HInstruction* chase_hint_; friend class HInductionVarAnalysis; friend class InductionVarRangeTest; diff --git a/compiler/optimizing/induction_var_range_test.cc b/compiler/optimizing/induction_var_range_test.cc index dc04dc2c49..4ea170f659 100644 --- a/compiler/optimizing/induction_var_range_test.cc +++ b/compiler/optimizing/induction_var_range_test.cc @@ -66,6 +66,8 @@ class InductionVarRangeTest : public CommonCompilerTest { entry_block_->AddInstruction(x_); y_ = new (&allocator_) HParameterValue(graph_->GetDexFile(), 0, 0, Primitive::kPrimInt); entry_block_->AddInstruction(y_); + // Set arbitrary range analysis hint while testing private methods. + SetHint(x_); } /** Constructs loop with given upper bound. */ @@ -111,6 +113,11 @@ class InductionVarRangeTest : public CommonCompilerTest { iva_->Run(); } + /** Sets hint. */ + void SetHint(HInstruction* hint) { + range_.chase_hint_ = hint; + } + /** Constructs an invariant. */ HInductionVarAnalysis::InductionInfo* CreateInvariant(char opc, HInductionVarAnalysis::InductionInfo* a, @@ -122,6 +129,7 @@ class InductionVarRangeTest : public CommonCompilerTest { case 'n': op = HInductionVarAnalysis::kNeg; break; case '*': op = HInductionVarAnalysis::kMul; break; case '/': op = HInductionVarAnalysis::kDiv; break; + case '<': op = HInductionVarAnalysis::kLT; break; default: op = HInductionVarAnalysis::kNop; break; } return iva_->CreateInvariantOp(op, a, b); @@ -137,22 +145,21 @@ class InductionVarRangeTest : public CommonCompilerTest { return CreateFetch(graph_->GetIntConstant(c)); } - /** Constructs a trip-count. */ + /** Constructs a constant trip-count. */ HInductionVarAnalysis::InductionInfo* CreateTripCount(int32_t tc, bool in_loop, bool safe) { - Primitive::Type type = Primitive::kPrimInt; + HInductionVarAnalysis::InductionOp op = HInductionVarAnalysis::kTripCountInBodyUnsafe; if (in_loop && safe) { - return iva_->CreateTripCount( - HInductionVarAnalysis::kTripCountInLoop, CreateConst(tc), nullptr, type); + op = HInductionVarAnalysis::kTripCountInLoop; } else if (in_loop) { - return iva_->CreateTripCount( - HInductionVarAnalysis::kTripCountInLoopUnsafe, CreateConst(tc), nullptr, type); + op = HInductionVarAnalysis::kTripCountInLoopUnsafe; } else if (safe) { - return iva_->CreateTripCount( - HInductionVarAnalysis::kTripCountInBody, CreateConst(tc), nullptr, type); - } else { - return iva_->CreateTripCount( - HInductionVarAnalysis::kTripCountInBodyUnsafe, CreateConst(tc), nullptr, type); + op = HInductionVarAnalysis::kTripCountInBody; } + // Return TC with taken-test 0 < TC. + return iva_->CreateTripCount(op, + CreateConst(tc), + CreateInvariant('<', CreateConst(0), CreateConst(tc)), + Primitive::kPrimInt); } /** Constructs a linear a * i + b induction. */ @@ -197,13 +204,13 @@ class InductionVarRangeTest : public CommonCompilerTest { } Value GetMin(HInductionVarAnalysis::InductionInfo* info, - HInductionVarAnalysis::InductionInfo* induc) { - return range_.GetVal(info, induc, /* in_body */ true, /* is_min */ true); + HInductionVarAnalysis::InductionInfo* trip) { + return range_.GetVal(info, trip, /* in_body */ true, /* is_min */ true); } Value GetMax(HInductionVarAnalysis::InductionInfo* info, - HInductionVarAnalysis::InductionInfo* induc) { - return range_.GetVal(info, induc, /* in_body */ true, /* is_min */ false); + HInductionVarAnalysis::InductionInfo* trip) { + return range_.GetVal(info, trip, /* in_body */ true, /* is_min */ false); } Value GetMul(HInductionVarAnalysis::InductionInfo* info1, @@ -558,6 +565,31 @@ TEST_F(InductionVarRangeTest, MaxValue) { ExpectEqual(Value(), MaxValue(Value(55), Value(y_, 1, -50))); } +TEST_F(InductionVarRangeTest, ArrayLengthAndHints) { + HInstruction* new_array = new (&allocator_) + HNewArray(x_, + graph_->GetCurrentMethod(), + 0, Primitive::kPrimInt, + graph_->GetDexFile(), + kQuickAllocArray); + entry_block_->AddInstruction(new_array); + HInstruction* array_length = new (&allocator_) HArrayLength(new_array, 0); + entry_block_->AddInstruction(array_length); + // With null hint: yields extreme constants. + const int32_t max_value = std::numeric_limits<int32_t>::max(); + SetHint(nullptr); + ExpectEqual(Value(0), GetMin(CreateFetch(array_length), nullptr)); + ExpectEqual(Value(max_value), GetMax(CreateFetch(array_length), nullptr)); + // With explicit hint: yields the length instruction. + SetHint(array_length); + ExpectEqual(Value(array_length, 1, 0), GetMin(CreateFetch(array_length), nullptr)); + ExpectEqual(Value(array_length, 1, 0), GetMax(CreateFetch(array_length), nullptr)); + // With any non-null hint: chases beyond the length instruction. + SetHint(x_); + ExpectEqual(Value(x_, 1, 0), GetMin(CreateFetch(array_length), nullptr)); + ExpectEqual(Value(x_, 1, 0), GetMax(CreateFetch(array_length), nullptr)); +} + // // Tests on public methods. // @@ -570,23 +602,20 @@ TEST_F(InductionVarRangeTest, ConstantTripCountUp) { bool needs_finite_test = true; // In context of header: known. - range_.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); + range_.GetInductionRange(condition_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test); EXPECT_FALSE(needs_finite_test); ExpectEqual(Value(0), v1); ExpectEqual(Value(1000), v2); - EXPECT_FALSE(range_.RefineOuter(&v1, &v2)); // In context of loop-body: known. - range_.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); + range_.GetInductionRange(increment_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test); EXPECT_FALSE(needs_finite_test); ExpectEqual(Value(0), v1); ExpectEqual(Value(999), v2); - EXPECT_FALSE(range_.RefineOuter(&v1, &v2)); - range_.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test); + range_.GetInductionRange(increment_, increment_, x_, &v1, &v2, &needs_finite_test); EXPECT_FALSE(needs_finite_test); ExpectEqual(Value(1), v1); ExpectEqual(Value(1000), v2); - EXPECT_FALSE(range_.RefineOuter(&v1, &v2)); } TEST_F(InductionVarRangeTest, ConstantTripCountDown) { @@ -597,23 +626,20 @@ TEST_F(InductionVarRangeTest, ConstantTripCountDown) { bool needs_finite_test = true; // In context of header: known. - range_.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); + range_.GetInductionRange(condition_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test); EXPECT_FALSE(needs_finite_test); ExpectEqual(Value(0), v1); ExpectEqual(Value(1000), v2); - EXPECT_FALSE(range_.RefineOuter(&v1, &v2)); // In context of loop-body: known. - range_.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); + range_.GetInductionRange(increment_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test); EXPECT_FALSE(needs_finite_test); ExpectEqual(Value(1), v1); ExpectEqual(Value(1000), v2); - EXPECT_FALSE(range_.RefineOuter(&v1, &v2)); - range_.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test); + range_.GetInductionRange(increment_, increment_, x_, &v1, &v2, &needs_finite_test); EXPECT_FALSE(needs_finite_test); ExpectEqual(Value(0), v1); ExpectEqual(Value(999), v2); - EXPECT_FALSE(range_.RefineOuter(&v1, &v2)); } TEST_F(InductionVarRangeTest, SymbolicTripCountUp) { @@ -625,23 +651,20 @@ TEST_F(InductionVarRangeTest, SymbolicTripCountUp) { bool needs_taken_test = true; // In context of header: upper unknown. - range_.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); + range_.GetInductionRange(condition_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test); EXPECT_FALSE(needs_finite_test); ExpectEqual(Value(0), v1); ExpectEqual(Value(), v2); - EXPECT_FALSE(range_.RefineOuter(&v1, &v2)); // In context of loop-body: known. - range_.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); + range_.GetInductionRange(increment_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test); EXPECT_FALSE(needs_finite_test); ExpectEqual(Value(0), v1); ExpectEqual(Value(x_, 1, -1), v2); - EXPECT_FALSE(range_.RefineOuter(&v1, &v2)); - range_.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test); + range_.GetInductionRange(increment_, increment_, x_, &v1, &v2, &needs_finite_test); EXPECT_FALSE(needs_finite_test); ExpectEqual(Value(1), v1); ExpectEqual(Value(x_, 1, 0), v2); - EXPECT_FALSE(range_.RefineOuter(&v1, &v2)); HInstruction* lower = nullptr; HInstruction* upper = nullptr; @@ -695,23 +718,20 @@ TEST_F(InductionVarRangeTest, SymbolicTripCountDown) { bool needs_taken_test = true; // In context of header: lower unknown. - range_.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); + range_.GetInductionRange(condition_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test); EXPECT_FALSE(needs_finite_test); ExpectEqual(Value(), v1); ExpectEqual(Value(1000), v2); - EXPECT_FALSE(range_.RefineOuter(&v1, &v2)); // In context of loop-body: known. - range_.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); + range_.GetInductionRange(increment_, condition_->InputAt(0), x_, &v1, &v2, &needs_finite_test); EXPECT_FALSE(needs_finite_test); ExpectEqual(Value(x_, 1, 1), v1); ExpectEqual(Value(1000), v2); - EXPECT_FALSE(range_.RefineOuter(&v1, &v2)); - range_.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test); + range_.GetInductionRange(increment_, increment_, x_, &v1, &v2, &needs_finite_test); EXPECT_FALSE(needs_finite_test); ExpectEqual(Value(x_, 1, 0), v1); ExpectEqual(Value(999), v2); - EXPECT_FALSE(range_.RefineOuter(&v1, &v2)); HInstruction* lower = nullptr; HInstruction* upper = nullptr; diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index 3041c4d2c7..e0410dcdb2 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -54,6 +54,9 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor { // De Morgan's laws: // ~a & ~b = ~(a | b) and ~a | ~b = ~(a & b) bool TryDeMorganNegationFactoring(HBinaryOperation* op); + bool TryHandleAssociativeAndCommutativeOperation(HBinaryOperation* instruction); + bool TrySubtractionChainSimplification(HBinaryOperation* instruction); + void VisitShift(HBinaryOperation* shift); void VisitEqual(HEqual* equal) OVERRIDE; @@ -963,7 +966,18 @@ void InstructionSimplifierVisitor::VisitAdd(HAdd* instruction) { return; } - TryReplaceWithRotate(instruction); + if (TryReplaceWithRotate(instruction)) { + return; + } + + // TryHandleAssociativeAndCommutativeOperation() does not remove its input, + // so no need to return. + TryHandleAssociativeAndCommutativeOperation(instruction); + + if ((instruction->GetLeft()->IsSub() || instruction->GetRight()->IsSub()) && + TrySubtractionChainSimplification(instruction)) { + return; + } } void InstructionSimplifierVisitor::VisitAnd(HAnd* instruction) { @@ -1025,7 +1039,13 @@ void InstructionSimplifierVisitor::VisitAnd(HAnd* instruction) { return; } - TryDeMorganNegationFactoring(instruction); + if (TryDeMorganNegationFactoring(instruction)) { + return; + } + + // TryHandleAssociativeAndCommutativeOperation() does not remove its input, + // so no need to return. + TryHandleAssociativeAndCommutativeOperation(instruction); } void InstructionSimplifierVisitor::VisitGreaterThan(HGreaterThan* condition) { @@ -1242,6 +1262,7 @@ void InstructionSimplifierVisitor::VisitMul(HMul* instruction) { instruction->ReplaceWith(input_cst); instruction->GetBlock()->RemoveInstruction(instruction); RecordSimplification(); + return; } else if (IsPowerOfTwo(factor)) { // Replace code looking like // MUL dst, src, pow_of_2 @@ -1251,6 +1272,7 @@ void InstructionSimplifierVisitor::VisitMul(HMul* instruction) { HShl* shl = new (allocator) HShl(type, input_other, shift); block->ReplaceAndRemoveInstructionWith(instruction, shl); RecordSimplification(); + return; } else if (IsPowerOfTwo(factor - 1)) { // Transform code looking like // MUL dst, src, (2^n + 1) @@ -1265,6 +1287,7 @@ void InstructionSimplifierVisitor::VisitMul(HMul* instruction) { block->InsertInstructionBefore(shl, instruction); block->ReplaceAndRemoveInstructionWith(instruction, add); RecordSimplification(); + return; } else if (IsPowerOfTwo(factor + 1)) { // Transform code looking like // MUL dst, src, (2^n - 1) @@ -1279,8 +1302,13 @@ void InstructionSimplifierVisitor::VisitMul(HMul* instruction) { block->InsertInstructionBefore(shl, instruction); block->ReplaceAndRemoveInstructionWith(instruction, sub); RecordSimplification(); + return; } } + + // TryHandleAssociativeAndCommutativeOperation() does not remove its input, + // so no need to return. + TryHandleAssociativeAndCommutativeOperation(instruction); } void InstructionSimplifierVisitor::VisitNeg(HNeg* instruction) { @@ -1380,7 +1408,13 @@ void InstructionSimplifierVisitor::VisitOr(HOr* instruction) { if (TryDeMorganNegationFactoring(instruction)) return; - TryReplaceWithRotate(instruction); + if (TryReplaceWithRotate(instruction)) { + return; + } + + // TryHandleAssociativeAndCommutativeOperation() does not remove its input, + // so no need to return. + TryHandleAssociativeAndCommutativeOperation(instruction); } void InstructionSimplifierVisitor::VisitShl(HShl* instruction) { @@ -1471,6 +1505,11 @@ void InstructionSimplifierVisitor::VisitSub(HSub* instruction) { instruction->GetBlock()->RemoveInstruction(instruction); RecordSimplification(); left->GetBlock()->RemoveInstruction(left); + return; + } + + if (TrySubtractionChainSimplification(instruction)) { + return; } } @@ -1524,7 +1563,13 @@ void InstructionSimplifierVisitor::VisitXor(HXor* instruction) { return; } - TryReplaceWithRotate(instruction); + if (TryReplaceWithRotate(instruction)) { + return; + } + + // TryHandleAssociativeAndCommutativeOperation() does not remove its input, + // so no need to return. + TryHandleAssociativeAndCommutativeOperation(instruction); } void InstructionSimplifierVisitor::SimplifyStringEquals(HInvoke* instruction) { @@ -1823,4 +1868,150 @@ void InstructionSimplifierVisitor::VisitDeoptimize(HDeoptimize* deoptimize) { } } +// Replace code looking like +// OP y, x, const1 +// OP z, y, const2 +// with +// OP z, x, const3 +// where OP is both an associative and a commutative operation. +bool InstructionSimplifierVisitor::TryHandleAssociativeAndCommutativeOperation( + HBinaryOperation* instruction) { + DCHECK(instruction->IsCommutative()); + + if (!Primitive::IsIntegralType(instruction->GetType())) { + return false; + } + + HInstruction* left = instruction->GetLeft(); + HInstruction* right = instruction->GetRight(); + // Variable names as described above. + HConstant* const2; + HBinaryOperation* y; + + if (instruction->InstructionTypeEquals(left) && right->IsConstant()) { + const2 = right->AsConstant(); + y = left->AsBinaryOperation(); + } else if (left->IsConstant() && instruction->InstructionTypeEquals(right)) { + const2 = left->AsConstant(); + y = right->AsBinaryOperation(); + } else { + // The node does not match the pattern. + return false; + } + + // If `y` has more than one use, we do not perform the optimization + // because it might increase code size (e.g. if the new constant is + // no longer encodable as an immediate operand in the target ISA). + if (!y->HasOnlyOneNonEnvironmentUse()) { + return false; + } + + // GetConstantRight() can return both left and right constants + // for commutative operations. + HConstant* const1 = y->GetConstantRight(); + if (const1 == nullptr) { + return false; + } + + instruction->ReplaceInput(const1, 0); + instruction->ReplaceInput(const2, 1); + HConstant* const3 = instruction->TryStaticEvaluation(); + DCHECK(const3 != nullptr); + instruction->ReplaceInput(y->GetLeastConstantLeft(), 0); + instruction->ReplaceInput(const3, 1); + RecordSimplification(); + return true; +} + +static HBinaryOperation* AsAddOrSub(HInstruction* binop) { + return (binop->IsAdd() || binop->IsSub()) ? binop->AsBinaryOperation() : nullptr; +} + +// Helper function that performs addition statically, considering the result type. +static int64_t ComputeAddition(Primitive::Type type, int64_t x, int64_t y) { + // Use the Compute() method for consistency with TryStaticEvaluation(). + if (type == Primitive::kPrimInt) { + return HAdd::Compute<int32_t>(x, y); + } else { + DCHECK_EQ(type, Primitive::kPrimLong); + return HAdd::Compute<int64_t>(x, y); + } +} + +// Helper function that handles the child classes of HConstant +// and returns an integer with the appropriate sign. +static int64_t GetValue(HConstant* constant, bool is_negated) { + int64_t ret = Int64FromConstant(constant); + return is_negated ? -ret : ret; +} + +// Replace code looking like +// OP1 y, x, const1 +// OP2 z, y, const2 +// with +// OP3 z, x, const3 +// where OPx is either ADD or SUB, and at least one of OP{1,2} is SUB. +bool InstructionSimplifierVisitor::TrySubtractionChainSimplification( + HBinaryOperation* instruction) { + DCHECK(instruction->IsAdd() || instruction->IsSub()) << instruction->DebugName(); + + Primitive::Type type = instruction->GetType(); + if (!Primitive::IsIntegralType(type)) { + return false; + } + + HInstruction* left = instruction->GetLeft(); + HInstruction* right = instruction->GetRight(); + // Variable names as described above. + HConstant* const2 = right->IsConstant() ? right->AsConstant() : left->AsConstant(); + if (const2 == nullptr) { + return false; + } + + HBinaryOperation* y = (AsAddOrSub(left) != nullptr) + ? left->AsBinaryOperation() + : AsAddOrSub(right); + // If y has more than one use, we do not perform the optimization because + // it might increase code size (e.g. if the new constant is no longer + // encodable as an immediate operand in the target ISA). + if ((y == nullptr) || !y->HasOnlyOneNonEnvironmentUse()) { + return false; + } + + left = y->GetLeft(); + HConstant* const1 = left->IsConstant() ? left->AsConstant() : y->GetRight()->AsConstant(); + if (const1 == nullptr) { + return false; + } + + HInstruction* x = (const1 == left) ? y->GetRight() : left; + // If both inputs are constants, let the constant folding pass deal with it. + if (x->IsConstant()) { + return false; + } + + bool is_const2_negated = (const2 == right) && instruction->IsSub(); + int64_t const2_val = GetValue(const2, is_const2_negated); + bool is_y_negated = (y == right) && instruction->IsSub(); + right = y->GetRight(); + bool is_const1_negated = is_y_negated ^ ((const1 == right) && y->IsSub()); + int64_t const1_val = GetValue(const1, is_const1_negated); + bool is_x_negated = is_y_negated ^ ((x == right) && y->IsSub()); + int64_t const3_val = ComputeAddition(type, const1_val, const2_val); + HBasicBlock* block = instruction->GetBlock(); + HConstant* const3 = block->GetGraph()->GetConstant(type, const3_val); + ArenaAllocator* arena = instruction->GetArena(); + HInstruction* z; + + if (is_x_negated) { + z = new (arena) HSub(type, const3, x, instruction->GetDexPc()); + } else { + z = new (arena) HAdd(type, x, const3, instruction->GetDexPc()); + } + + block->ReplaceAndRemoveInstructionWith(instruction, z); + RecordSimplification(); + return true; +} + } // namespace art diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index c2c212b66f..d557f42968 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -754,7 +754,7 @@ bool HBasicBlock::Dominates(HBasicBlock* other) const { } static void UpdateInputsUsers(HInstruction* instruction) { - auto&& inputs = instruction->GetInputs(); + HInputsRef inputs = instruction->GetInputs(); for (size_t i = 0; i < inputs.size(); ++i) { inputs[i]->AddUseAt(instruction, i); } @@ -1312,8 +1312,8 @@ bool HInstruction::Equals(const HInstruction* other) const { DCHECK_EQ(GetKind(), other->GetKind()); if (!InstructionDataEquals(other)) return false; if (GetType() != other->GetType()) return false; - auto&& inputs = GetInputs(); - auto&& other_inputs = other->GetInputs(); + HConstInputsRef inputs = GetInputs(); + HConstInputsRef other_inputs = other->GetInputs(); if (inputs.size() != other_inputs.size()) return false; for (size_t i = 0; i != inputs.size(); ++i) { if (inputs[i] != other_inputs[i]) return false; diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 6b2c33e668..0f0ef26ea9 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -1307,7 +1307,13 @@ class HLoopInformationOutwardIterator : public ValueObject { M(Arm64IntermediateAddress, Instruction) #endif +#ifndef ART_ENABLE_CODEGEN_mips #define FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M) +#else +#define FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M) \ + M(MipsComputeBaseMethodAddress, Instruction) \ + M(MipsDexCacheArraysBase, Instruction) +#endif #define FOR_EACH_CONCRETE_INSTRUCTION_MIPS64(M) @@ -1415,6 +1421,21 @@ class HUserRecord : public ValueObject { typename HUseList<T>::iterator before_use_node_; }; +// Helper class that extracts the input instruction from HUserRecord<HInstruction*>. +// This is used for HInstruction::GetInputs() to return a container wrapper providing +// HInstruction* values even though the underlying container has HUserRecord<>s. +struct HInputExtractor { + HInstruction* operator()(HUserRecord<HInstruction*>& record) const { + return record.GetInstruction(); + } + const HInstruction* operator()(const HUserRecord<HInstruction*>& record) const { + return record.GetInstruction(); + } +}; + +using HInputsRef = TransformArrayRef<HUserRecord<HInstruction*>, HInputExtractor>; +using HConstInputsRef = TransformArrayRef<const HUserRecord<HInstruction*>, HInputExtractor>; + /** * Side-effects representation. * @@ -1804,20 +1825,12 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { const_cast<HInstruction*>(this)->GetInputRecords()); } - auto GetInputs() { - return MakeTransformArrayRef( - GetInputRecords(), - [](HUserRecord<HInstruction*>& record) -> HInstruction* { - return record.GetInstruction(); - }); + HInputsRef GetInputs() { + return MakeTransformArrayRef(GetInputRecords(), HInputExtractor()); } - auto GetInputs() const { - return MakeTransformArrayRef( - GetInputRecords(), - [](const HUserRecord<HInstruction*>& record) -> const HInstruction* { - return record.GetInstruction(); - }); + HConstInputsRef GetInputs() const { + return MakeTransformArrayRef(GetInputRecords(), HInputExtractor()); } size_t InputCount() const { return GetInputRecords().size(); } @@ -2408,7 +2421,7 @@ class HPhi FINAL : public HInstruction { bool IsDead() const { return !IsLive(); } bool IsLive() const { return GetPackedFlag<kFlagIsLive>(); } - bool IsVRegEquivalentOf(HInstruction* other) const { + bool IsVRegEquivalentOf(const HInstruction* other) const { return other != nullptr && other->IsPhi() && other->AsPhi()->GetBlock() == GetBlock() @@ -3160,7 +3173,7 @@ class HEqual FINAL : public HCondition { } private: - template <typename T> bool Compute(T x, T y) const { return x == y; } + template <typename T> static bool Compute(T x, T y) { return x == y; } DISALLOW_COPY_AND_ASSIGN(HEqual); }; @@ -3203,7 +3216,7 @@ class HNotEqual FINAL : public HCondition { } private: - template <typename T> bool Compute(T x, T y) const { return x != y; } + template <typename T> static bool Compute(T x, T y) { return x != y; } DISALLOW_COPY_AND_ASSIGN(HNotEqual); }; @@ -3240,7 +3253,7 @@ class HLessThan FINAL : public HCondition { } private: - template <typename T> bool Compute(T x, T y) const { return x < y; } + template <typename T> static bool Compute(T x, T y) { return x < y; } DISALLOW_COPY_AND_ASSIGN(HLessThan); }; @@ -3277,7 +3290,7 @@ class HLessThanOrEqual FINAL : public HCondition { } private: - template <typename T> bool Compute(T x, T y) const { return x <= y; } + template <typename T> static bool Compute(T x, T y) { return x <= y; } DISALLOW_COPY_AND_ASSIGN(HLessThanOrEqual); }; @@ -3314,7 +3327,7 @@ class HGreaterThan FINAL : public HCondition { } private: - template <typename T> bool Compute(T x, T y) const { return x > y; } + template <typename T> static bool Compute(T x, T y) { return x > y; } DISALLOW_COPY_AND_ASSIGN(HGreaterThan); }; @@ -3351,7 +3364,7 @@ class HGreaterThanOrEqual FINAL : public HCondition { } private: - template <typename T> bool Compute(T x, T y) const { return x >= y; } + template <typename T> static bool Compute(T x, T y) { return x >= y; } DISALLOW_COPY_AND_ASSIGN(HGreaterThanOrEqual); }; @@ -3389,7 +3402,7 @@ class HBelow FINAL : public HCondition { } private: - template <typename T> bool Compute(T x, T y) const { + template <typename T> static bool Compute(T x, T y) { return MakeUnsigned(x) < MakeUnsigned(y); } @@ -3429,7 +3442,7 @@ class HBelowOrEqual FINAL : public HCondition { } private: - template <typename T> bool Compute(T x, T y) const { + template <typename T> static bool Compute(T x, T y) { return MakeUnsigned(x) <= MakeUnsigned(y); } @@ -3469,7 +3482,7 @@ class HAbove FINAL : public HCondition { } private: - template <typename T> bool Compute(T x, T y) const { + template <typename T> static bool Compute(T x, T y) { return MakeUnsigned(x) > MakeUnsigned(y); } @@ -3509,7 +3522,7 @@ class HAboveOrEqual FINAL : public HCondition { } private: - template <typename T> bool Compute(T x, T y) const { + template <typename T> static bool Compute(T x, T y) { return MakeUnsigned(x) >= MakeUnsigned(y); } @@ -4182,7 +4195,7 @@ class HNeg FINAL : public HUnaryOperation { DCHECK_EQ(result_type, Primitive::PrimitiveKind(input->GetType())); } - template <typename T> T Compute(T x) const { return -x; } + template <typename T> static T Compute(T x) { return -x; } HConstant* Evaluate(HIntConstant* x) const OVERRIDE { return GetBlock()->GetGraph()->GetIntConstant(Compute(x->GetValue()), GetDexPc()); @@ -4252,7 +4265,7 @@ class HAdd FINAL : public HBinaryOperation { bool IsCommutative() const OVERRIDE { return true; } - template <typename T> T Compute(T x, T y) const { return x + y; } + template <typename T> static T Compute(T x, T y) { return x + y; } HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { return GetBlock()->GetGraph()->GetIntConstant( @@ -4285,7 +4298,7 @@ class HSub FINAL : public HBinaryOperation { uint32_t dex_pc = kNoDexPc) : HBinaryOperation(result_type, left, right, SideEffects::None(), dex_pc) {} - template <typename T> T Compute(T x, T y) const { return x - y; } + template <typename T> static T Compute(T x, T y) { return x - y; } HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { return GetBlock()->GetGraph()->GetIntConstant( @@ -4320,7 +4333,7 @@ class HMul FINAL : public HBinaryOperation { bool IsCommutative() const OVERRIDE { return true; } - template <typename T> T Compute(T x, T y) const { return x * y; } + template <typename T> static T Compute(T x, T y) { return x * y; } HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { return GetBlock()->GetGraph()->GetIntConstant( @@ -4486,7 +4499,7 @@ class HShl FINAL : public HBinaryOperation { } template <typename T> - T Compute(T value, int32_t distance, int32_t max_shift_distance) const { + static T Compute(T value, int32_t distance, int32_t max_shift_distance) { return value << (distance & max_shift_distance); } @@ -4532,7 +4545,7 @@ class HShr FINAL : public HBinaryOperation { } template <typename T> - T Compute(T value, int32_t distance, int32_t max_shift_distance) const { + static T Compute(T value, int32_t distance, int32_t max_shift_distance) { return value >> (distance & max_shift_distance); } @@ -4578,7 +4591,7 @@ class HUShr FINAL : public HBinaryOperation { } template <typename T> - T Compute(T value, int32_t distance, int32_t max_shift_distance) const { + static T Compute(T value, int32_t distance, int32_t max_shift_distance) { typedef typename std::make_unsigned<T>::type V; V ux = static_cast<V>(value); return static_cast<T>(ux >> (distance & max_shift_distance)); @@ -4624,7 +4637,7 @@ class HAnd FINAL : public HBinaryOperation { bool IsCommutative() const OVERRIDE { return true; } - template <typename T> T Compute(T x, T y) const { return x & y; } + template <typename T> static T Compute(T x, T y) { return x & y; } HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { return GetBlock()->GetGraph()->GetIntConstant( @@ -4661,7 +4674,7 @@ class HOr FINAL : public HBinaryOperation { bool IsCommutative() const OVERRIDE { return true; } - template <typename T> T Compute(T x, T y) const { return x | y; } + template <typename T> static T Compute(T x, T y) { return x | y; } HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { return GetBlock()->GetGraph()->GetIntConstant( @@ -4698,7 +4711,7 @@ class HXor FINAL : public HBinaryOperation { bool IsCommutative() const OVERRIDE { return true; } - template <typename T> T Compute(T x, T y) const { return x ^ y; } + template <typename T> static T Compute(T x, T y) { return x ^ y; } HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { return GetBlock()->GetGraph()->GetIntConstant( @@ -4734,7 +4747,7 @@ class HRor FINAL : public HBinaryOperation { } template <typename T> - T Compute(T value, int32_t distance, int32_t max_shift_value) const { + static T Compute(T value, int32_t distance, int32_t max_shift_value) { typedef typename std::make_unsigned<T>::type V; V ux = static_cast<V>(value); if ((distance & max_shift_value) == 0) { @@ -4830,7 +4843,7 @@ class HNot FINAL : public HUnaryOperation { return true; } - template <typename T> T Compute(T x) const { return ~x; } + template <typename T> static T Compute(T x) { return ~x; } HConstant* Evaluate(HIntConstant* x) const OVERRIDE { return GetBlock()->GetGraph()->GetIntConstant(Compute(x->GetValue()), GetDexPc()); @@ -4863,7 +4876,7 @@ class HBooleanNot FINAL : public HUnaryOperation { return true; } - template <typename T> bool Compute(T x) const { + template <typename T> static bool Compute(T x) { DCHECK(IsUint<1>(x)) << x; return !x; } @@ -6544,6 +6557,9 @@ class HParallelMove FINAL : public HTemplateInstruction<0> { #ifdef ART_ENABLE_CODEGEN_arm64 #include "nodes_arm64.h" #endif +#ifdef ART_ENABLE_CODEGEN_mips +#include "nodes_mips.h" +#endif #ifdef ART_ENABLE_CODEGEN_x86 #include "nodes_x86.h" #endif diff --git a/compiler/optimizing/nodes_mips.h b/compiler/optimizing/nodes_mips.h new file mode 100644 index 0000000000..de77245e17 --- /dev/null +++ b/compiler/optimizing/nodes_mips.h @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_NODES_MIPS_H_ +#define ART_COMPILER_OPTIMIZING_NODES_MIPS_H_ + +namespace art { + +// Compute the address of the method for MIPS Constant area support. +class HMipsComputeBaseMethodAddress : public HExpression<0> { + public: + // Treat the value as an int32_t, but it is really a 32 bit native pointer. + HMipsComputeBaseMethodAddress() + : HExpression(Primitive::kPrimInt, SideEffects::None(), kNoDexPc) {} + + bool CanBeMoved() const OVERRIDE { return true; } + + DECLARE_INSTRUCTION(MipsComputeBaseMethodAddress); + + private: + DISALLOW_COPY_AND_ASSIGN(HMipsComputeBaseMethodAddress); +}; + +class HMipsDexCacheArraysBase : public HExpression<0> { + public: + explicit HMipsDexCacheArraysBase(const DexFile& dex_file) + : HExpression(Primitive::kPrimInt, SideEffects::None(), kNoDexPc), + dex_file_(&dex_file), + element_offset_(static_cast<size_t>(-1)) { } + + bool CanBeMoved() const OVERRIDE { return true; } + + void UpdateElementOffset(size_t element_offset) { + // We'll maximize the range of a single load instruction for dex cache array accesses + // by aligning offset -32768 with the offset of the first used element. + element_offset_ = std::min(element_offset_, element_offset); + } + + const DexFile& GetDexFile() const { + return *dex_file_; + } + + size_t GetElementOffset() const { + return element_offset_; + } + + DECLARE_INSTRUCTION(MipsDexCacheArraysBase); + + private: + const DexFile* dex_file_; + size_t element_offset_; + + DISALLOW_COPY_AND_ASSIGN(HMipsDexCacheArraysBase); +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_NODES_MIPS_H_ diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index c9a4bfe987..d703b0f94f 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -28,6 +28,11 @@ #include "instruction_simplifier_arm64.h" #endif +#ifdef ART_ENABLE_CODEGEN_mips +#include "dex_cache_array_fixups_mips.h" +#include "pc_relative_fixups_mips.h" +#endif + #ifdef ART_ENABLE_CODEGEN_x86 #include "pc_relative_fixups_x86.h" #endif @@ -462,6 +467,20 @@ static void RunArchOptimizations(InstructionSet instruction_set, break; } #endif +#ifdef ART_ENABLE_CODEGEN_mips + case kMips: { + mips::PcRelativeFixups* pc_relative_fixups = + new (arena) mips::PcRelativeFixups(graph, codegen, stats); + mips::DexCacheArrayFixups* dex_cache_array_fixups = + new (arena) mips::DexCacheArrayFixups(graph, stats); + HOptimization* mips_optimizations[] = { + pc_relative_fixups, + dex_cache_array_fixups + }; + RunOptimizations(mips_optimizations, arraysize(mips_optimizations), pass_observer); + break; + } +#endif #ifdef ART_ENABLE_CODEGEN_x86 case kX86: { x86::PcRelativeFixups* pc_relative_fixups = diff --git a/compiler/optimizing/pc_relative_fixups_mips.cc b/compiler/optimizing/pc_relative_fixups_mips.cc new file mode 100644 index 0000000000..ba405cdb69 --- /dev/null +++ b/compiler/optimizing/pc_relative_fixups_mips.cc @@ -0,0 +1,133 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "pc_relative_fixups_mips.h" +#include "code_generator_mips.h" +#include "intrinsics_mips.h" + +namespace art { +namespace mips { + +/** + * Finds instructions that need the constant area base as an input. + */ +class PCRelativeHandlerVisitor : public HGraphVisitor { + public: + PCRelativeHandlerVisitor(HGraph* graph, CodeGenerator* codegen) + : HGraphVisitor(graph), + codegen_(down_cast<CodeGeneratorMIPS*>(codegen)), + base_(nullptr) {} + + void MoveBaseIfNeeded() { + if (base_ != nullptr) { + // Bring the base closer to the first use (previously, it was in the + // entry block) and relieve some pressure on the register allocator + // while avoiding recalculation of the base in a loop. + base_->MoveBeforeFirstUserAndOutOfLoops(); + } + } + + private: + void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE { + HandleInvoke(invoke); + } + + void InitializePCRelativeBasePointer() { + // Ensure we only initialize the pointer once. + if (base_ != nullptr) { + return; + } + // Insert the base at the start of the entry block, move it to a better + // position later in MoveBaseIfNeeded(). + base_ = new (GetGraph()->GetArena()) HMipsComputeBaseMethodAddress(); + HBasicBlock* entry_block = GetGraph()->GetEntryBlock(); + entry_block->InsertInstructionBefore(base_, entry_block->GetFirstInstruction()); + DCHECK(base_ != nullptr); + } + + void HandleInvoke(HInvoke* invoke) { + // If this is an invoke-static/-direct with PC-relative dex cache array + // addressing, we need the PC-relative address base. + HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect(); + if (invoke_static_or_direct != nullptr) { + HInvokeStaticOrDirect::MethodLoadKind method_load_kind = + invoke_static_or_direct->GetMethodLoadKind(); + HInvokeStaticOrDirect::CodePtrLocation code_ptr_location = + invoke_static_or_direct->GetCodePtrLocation(); + + bool has_extra_input = + (method_load_kind == HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup) || + (code_ptr_location == HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup); + + // We can't add a pointer to the constant area if we already have a current + // method pointer. This may arise when sharpening doesn't remove the current + // method pointer from the invoke. + if (invoke_static_or_direct->HasCurrentMethodInput()) { + DCHECK(!invoke_static_or_direct->HasPcRelativeDexCache()); + CHECK(!has_extra_input); // TODO: review this. + return; + } + + if (has_extra_input && !WillHaveCallFreeIntrinsicsCodeGen(invoke)) { + InitializePCRelativeBasePointer(); + // Add the extra parameter base_. + invoke_static_or_direct->AddSpecialInput(base_); + } + } + } + + bool WillHaveCallFreeIntrinsicsCodeGen(HInvoke* invoke) { + if (invoke->GetIntrinsic() != Intrinsics::kNone) { + // This invoke may have intrinsic code generation defined. However, we must + // now also determine if this code generation is truly there and call-free + // (not unimplemented, no bail on instruction features, or call on slow path). + // This is done by actually calling the locations builder on the instruction + // and clearing out the locations once result is known. We assume this + // call only has creating locations as side effects! + IntrinsicLocationsBuilderMIPS builder(codegen_); + bool success = builder.TryDispatch(invoke) && !invoke->GetLocations()->CanCall(); + invoke->SetLocations(nullptr); + return success; + } + return false; + } + + CodeGeneratorMIPS* codegen_; + + // The generated HMipsComputeBaseMethodAddress in the entry block needed as an + // input to the HMipsLoadFromConstantTable instructions. + HMipsComputeBaseMethodAddress* base_; +}; + +void PcRelativeFixups::Run() { + CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen_); + if (mips_codegen->GetInstructionSetFeatures().IsR6()) { + // Do nothing for R6 because it has PC-relative addressing. + // TODO: review. Move this check into RunArchOptimizations()? + return; + } + if (graph_->HasIrreducibleLoops()) { + // Do not run this optimization, as irreducible loops do not work with an instruction + // that can be live-in at the irreducible loop header. + return; + } + PCRelativeHandlerVisitor visitor(graph_, codegen_); + visitor.VisitInsertionOrder(); + visitor.MoveBaseIfNeeded(); +} + +} // namespace mips +} // namespace art diff --git a/compiler/optimizing/pc_relative_fixups_mips.h b/compiler/optimizing/pc_relative_fixups_mips.h new file mode 100644 index 0000000000..1e8b071bb3 --- /dev/null +++ b/compiler/optimizing/pc_relative_fixups_mips.h @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_PC_RELATIVE_FIXUPS_MIPS_H_ +#define ART_COMPILER_OPTIMIZING_PC_RELATIVE_FIXUPS_MIPS_H_ + +#include "nodes.h" +#include "optimization.h" + +namespace art { + +class CodeGenerator; + +namespace mips { + +class PcRelativeFixups : public HOptimization { + public: + PcRelativeFixups(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats) + : HOptimization(graph, "pc_relative_fixups_mips", stats), + codegen_(codegen) {} + + void Run() OVERRIDE; + + private: + CodeGenerator* codegen_; +}; + +} // namespace mips +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_PC_RELATIVE_FIXUPS_MIPS_H_ diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc index 93116f8bab..921f3dfff6 100644 --- a/compiler/optimizing/pc_relative_fixups_x86.cc +++ b/compiler/optimizing/pc_relative_fixups_x86.cc @@ -211,7 +211,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { } // Ensure that we can load FP arguments from the constant area. - auto&& inputs = invoke->GetInputs(); + HInputsRef inputs = invoke->GetInputs(); for (size_t i = 0; i < inputs.size(); i++) { HConstant* input = inputs[i]->AsConstant(); if (input != nullptr && Primitive::IsFloatingPointType(input->GetType())) { diff --git a/compiler/optimizing/pretty_printer.h b/compiler/optimizing/pretty_printer.h index f9bef6809f..5891350894 100644 --- a/compiler/optimizing/pretty_printer.h +++ b/compiler/optimizing/pretty_printer.h @@ -39,7 +39,7 @@ class HPrettyPrinter : public HGraphVisitor { } void PrintPostInstruction(HInstruction* instruction) { - auto&& inputs = instruction->GetInputs(); + HConstInputsRef inputs = instruction->GetInputs(); if (!inputs.empty()) { PrintString("("); bool first = true; diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc index 3dfd7282cd..965d5ee4f9 100644 --- a/compiler/optimizing/reference_type_propagation.cc +++ b/compiler/optimizing/reference_type_propagation.cc @@ -816,10 +816,10 @@ void ReferenceTypePropagation::UpdateBoundType(HBoundType* instr) { void ReferenceTypePropagation::UpdatePhi(HPhi* instr) { DCHECK(instr->IsLive()); - auto&& inputs = instr->GetInputs(); + HInputsRef inputs = instr->GetInputs(); size_t first_input_index_not_null = 0; while (first_input_index_not_null < inputs.size() && - inputs[first_input_index_not_null]->IsNullConstant()) { + inputs[first_input_index_not_null]->IsNullConstant()) { first_input_index_not_null++; } if (first_input_index_not_null == inputs.size()) { diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index 4a6b835e80..9d99668484 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -753,7 +753,7 @@ bool RegisterAllocator::TryAllocateFreeReg(LiveInterval* current) { if (defined_by != nullptr && !current->IsSplit()) { LocationSummary* locations = defined_by->GetLocations(); if (!locations->OutputCanOverlapWithInputs() && locations->Out().IsUnallocated()) { - auto&& inputs = defined_by->GetInputs(); + HInputsRef inputs = defined_by->GetInputs(); for (size_t i = 0; i < inputs.size(); ++i) { // Take the last interval of the input. It is the location of that interval // that will be used at `defined_by`. diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index ed50c69b5d..5a574d9af7 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -181,7 +181,7 @@ bool SsaBuilder::TypeInputsOfPhi(HPhi* phi, ArenaVector<HPhi*>* worklist) { return true; } else { DCHECK(common_type == Primitive::kPrimNot || Primitive::IsFloatingPointType(common_type)); - auto&& inputs = phi->GetInputs(); + HInputsRef inputs = phi->GetInputs(); for (size_t i = 0; i < inputs.size(); ++i) { HInstruction* input = inputs[i]; if (input->GetType() != common_type) { @@ -617,7 +617,7 @@ HPhi* SsaBuilder::GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive: || (next->AsPhi()->GetRegNumber() != phi->GetRegNumber()) || (next->GetType() != type)) { ArenaAllocator* allocator = graph_->GetArena(); - auto&& inputs = phi->GetInputs(); + HInputsRef inputs = phi->GetInputs(); HPhi* new_phi = new (allocator) HPhi(allocator, phi->GetRegNumber(), inputs.size(), type); // Copy the inputs. Note that the graph may not be correctly typed diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc index 212d93532c..7af4302884 100644 --- a/compiler/optimizing/ssa_liveness_analysis.cc +++ b/compiler/optimizing/ssa_liveness_analysis.cc @@ -177,7 +177,7 @@ void SsaLivenessAnalysis::ComputeLiveness() { static void RecursivelyProcessInputs(HInstruction* current, HInstruction* actual_user, BitVector* live_in) { - auto&& inputs = current->GetInputs(); + HInputsRef inputs = current->GetInputs(); for (size_t i = 0; i < inputs.size(); ++i) { HInstruction* input = inputs[i]; bool has_in_location = current->GetLocations()->InAt(i).IsValid(); @@ -431,7 +431,7 @@ int LiveInterval::FindFirstRegisterHint(size_t* free_until, // If the instruction dies at the phi assignment, we can try having the // same register. if (end == user->GetBlock()->GetPredecessors()[input_index]->GetLifetimeEnd()) { - auto&& inputs = user->GetInputs(); + HInputsRef inputs = user->GetInputs(); for (size_t i = 0; i < inputs.size(); ++i) { if (i == input_index) { continue; @@ -472,7 +472,7 @@ int LiveInterval::FindHintAtDefinition() const { if (defined_by_->IsPhi()) { // Try to use the same register as one of the inputs. const ArenaVector<HBasicBlock*>& predecessors = defined_by_->GetBlock()->GetPredecessors(); - auto&& inputs = defined_by_->GetInputs(); + HInputsRef inputs = defined_by_->GetInputs(); for (size_t i = 0; i < inputs.size(); ++i) { size_t end = predecessors[i]->GetLifetimeEnd(); LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(end - 1); diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc index ac930833f2..ebaf1c0cab 100644 --- a/compiler/utils/mips/assembler_mips.cc +++ b/compiler/utils/mips/assembler_mips.cc @@ -39,6 +39,7 @@ void MipsAssembler::FinalizeCode() { for (auto& exception_block : exception_blocks_) { EmitExceptionPoll(&exception_block); } + EmitLiterals(); PromoteBranches(); } @@ -444,6 +445,12 @@ void MipsAssembler::Lhu(Register rt, Register rs, uint16_t imm16) { EmitI(0x25, rs, rt, imm16); } +void MipsAssembler::Lwpc(Register rs, uint32_t imm19) { + CHECK(IsR6()); + CHECK(IsUint<19>(imm19)) << imm19; + EmitI21(0x3B, rs, (0x01 << 19) | imm19); +} + void MipsAssembler::Lui(Register rt, uint16_t imm16) { EmitI(0xf, static_cast<Register>(0), rt, imm16); } @@ -532,6 +539,10 @@ void MipsAssembler::B(uint16_t imm16) { EmitI(0x4, static_cast<Register>(0), static_cast<Register>(0), imm16); } +void MipsAssembler::Bal(uint16_t imm16) { + EmitI(0x1, static_cast<Register>(0), static_cast<Register>(0x11), imm16); +} + void MipsAssembler::Beq(Register rs, Register rt, uint16_t imm16) { EmitI(0x4, rs, rt, imm16); } @@ -624,6 +635,11 @@ void MipsAssembler::Bc(uint32_t imm26) { EmitI26(0x32, imm26); } +void MipsAssembler::Balc(uint32_t imm26) { + CHECK(IsR6()); + EmitI26(0x3A, imm26); +} + void MipsAssembler::Jic(Register rt, uint16_t imm16) { CHECK(IsR6()); EmitI(0x36, static_cast<Register>(0), rt, imm16); @@ -1489,30 +1505,47 @@ void MipsAssembler::Branch::InitShortOrLong(MipsAssembler::Branch::OffsetBits of type_ = (offset_size <= branch_info_[short_type].offset_size) ? short_type : long_type; } -void MipsAssembler::Branch::InitializeType(bool is_call, bool is_r6) { +void MipsAssembler::Branch::InitializeType(bool is_call, bool is_literal, bool is_r6) { + CHECK_EQ(is_call && is_literal, false); OffsetBits offset_size = GetOffsetSizeNeeded(location_, target_); if (is_r6) { // R6 - if (is_call) { + if (is_literal) { + CHECK(!IsResolved()); + type_ = kR6Literal; + } else if (is_call) { InitShortOrLong(offset_size, kR6Call, kR6LongCall); - } else if (condition_ == kUncond) { - InitShortOrLong(offset_size, kR6UncondBranch, kR6LongUncondBranch); } else { - if (condition_ == kCondEQZ || condition_ == kCondNEZ) { - // Special case for beqzc/bnezc with longer offset than in other b<cond>c instructions. - type_ = (offset_size <= kOffset23) ? kR6CondBranch : kR6LongCondBranch; - } else { - InitShortOrLong(offset_size, kR6CondBranch, kR6LongCondBranch); + switch (condition_) { + case kUncond: + InitShortOrLong(offset_size, kR6UncondBranch, kR6LongUncondBranch); + break; + case kCondEQZ: + case kCondNEZ: + // Special case for beqzc/bnezc with longer offset than in other b<cond>c instructions. + type_ = (offset_size <= kOffset23) ? kR6CondBranch : kR6LongCondBranch; + break; + default: + InitShortOrLong(offset_size, kR6CondBranch, kR6LongCondBranch); + break; } } } else { // R2 - if (is_call) { + if (is_literal) { + CHECK(!IsResolved()); + type_ = kLiteral; + } else if (is_call) { InitShortOrLong(offset_size, kCall, kLongCall); - } else if (condition_ == kUncond) { - InitShortOrLong(offset_size, kUncondBranch, kLongUncondBranch); } else { - InitShortOrLong(offset_size, kCondBranch, kLongCondBranch); + switch (condition_) { + case kUncond: + InitShortOrLong(offset_size, kUncondBranch, kLongUncondBranch); + break; + default: + InitShortOrLong(offset_size, kCondBranch, kLongCondBranch); + break; + } } } old_type_ = type_; @@ -1544,14 +1577,14 @@ bool MipsAssembler::Branch::IsUncond(BranchCondition condition, Register lhs, Re } } -MipsAssembler::Branch::Branch(bool is_r6, uint32_t location, uint32_t target) +MipsAssembler::Branch::Branch(bool is_r6, uint32_t location, uint32_t target, bool is_call) : old_location_(location), location_(location), target_(target), lhs_reg_(0), rhs_reg_(0), condition_(kUncond) { - InitializeType(false, is_r6); + InitializeType(is_call, /* is_literal */ false, is_r6); } MipsAssembler::Branch::Branch(bool is_r6, @@ -1608,19 +1641,23 @@ MipsAssembler::Branch::Branch(bool is_r6, // Branch condition is always true, make the branch unconditional. condition_ = kUncond; } - InitializeType(false, is_r6); + InitializeType(/* is_call */ false, /* is_literal */ false, is_r6); } -MipsAssembler::Branch::Branch(bool is_r6, uint32_t location, uint32_t target, Register indirect_reg) +MipsAssembler::Branch::Branch(bool is_r6, uint32_t location, Register dest_reg, Register base_reg) : old_location_(location), location_(location), - target_(target), - lhs_reg_(indirect_reg), - rhs_reg_(0), + target_(kUnresolved), + lhs_reg_(dest_reg), + rhs_reg_(base_reg), condition_(kUncond) { - CHECK_NE(indirect_reg, ZERO); - CHECK_NE(indirect_reg, AT); - InitializeType(true, is_r6); + CHECK_NE(dest_reg, ZERO); + if (is_r6) { + CHECK_EQ(base_reg, ZERO); + } else { + CHECK_NE(base_reg, ZERO); + } + InitializeType(/* is_call */ false, /* is_literal */ true, is_r6); } MipsAssembler::BranchCondition MipsAssembler::Branch::OppositeCondition( @@ -1722,19 +1759,27 @@ bool MipsAssembler::Branch::IsLong() const { case kUncondBranch: case kCondBranch: case kCall: + // R2 near literal. + case kLiteral: // R6 short branches. case kR6UncondBranch: case kR6CondBranch: case kR6Call: + // R6 near literal. + case kR6Literal: return false; // R2 long branches. case kLongUncondBranch: case kLongCondBranch: case kLongCall: + // R2 far literal. + case kFarLiteral: // R6 long branches. case kR6LongUncondBranch: case kR6LongCondBranch: case kR6LongCall: + // R6 far literal. + case kR6FarLiteral: return true; } UNREACHABLE(); @@ -1803,6 +1848,10 @@ void MipsAssembler::Branch::PromoteToLong() { case kCall: type_ = kLongCall; break; + // R2 near literal. + case kLiteral: + type_ = kFarLiteral; + break; // R6 short branches. case kR6UncondBranch: type_ = kR6LongUncondBranch; @@ -1813,6 +1862,10 @@ void MipsAssembler::Branch::PromoteToLong() { case kR6Call: type_ = kR6LongCall; break; + // R6 near literal. + case kR6Literal: + type_ = kR6FarLiteral; + break; default: // Note: 'type_' is already long. break; @@ -1820,14 +1873,26 @@ void MipsAssembler::Branch::PromoteToLong() { CHECK(IsLong()); } -uint32_t MipsAssembler::Branch::PromoteIfNeeded(uint32_t max_short_distance) { +uint32_t MipsAssembler::GetBranchLocationOrPcRelBase(const MipsAssembler::Branch* branch) const { + switch (branch->GetType()) { + case Branch::kLiteral: + case Branch::kFarLiteral: + return GetLabelLocation(&pc_rel_base_label_); + default: + return branch->GetLocation(); + } +} + +uint32_t MipsAssembler::Branch::PromoteIfNeeded(uint32_t location, uint32_t max_short_distance) { + // `location` is either `GetLabelLocation(&pc_rel_base_label_)` for R2 literals or + // `this->GetLocation()` for everything else. // If the branch is still unresolved or already long, nothing to do. if (IsLong() || !IsResolved()) { return 0; } // Promote the short branch to long if the offset size is too small - // to hold the distance between location_ and target_. - if (GetOffsetSizeNeeded(location_, target_) > GetOffsetSize()) { + // to hold the distance between location and target_. + if (GetOffsetSizeNeeded(location, target_) > GetOffsetSize()) { PromoteToLong(); uint32_t old_size = GetOldSize(); uint32_t new_size = GetSize(); @@ -1837,7 +1902,7 @@ uint32_t MipsAssembler::Branch::PromoteIfNeeded(uint32_t max_short_distance) { // The following logic is for debugging/testing purposes. // Promote some short branches to long when it's not really required. if (UNLIKELY(max_short_distance != std::numeric_limits<uint32_t>::max())) { - int64_t distance = static_cast<int64_t>(target_) - location_; + int64_t distance = static_cast<int64_t>(target_) - location; distance = (distance >= 0) ? distance : -distance; if (distance >= max_short_distance) { PromoteToLong(); @@ -1854,12 +1919,26 @@ uint32_t MipsAssembler::Branch::GetOffsetLocation() const { return location_ + branch_info_[type_].instr_offset * sizeof(uint32_t); } -uint32_t MipsAssembler::Branch::GetOffset() const { +uint32_t MipsAssembler::GetBranchOrPcRelBaseForEncoding(const MipsAssembler::Branch* branch) const { + switch (branch->GetType()) { + case Branch::kLiteral: + case Branch::kFarLiteral: + return GetLabelLocation(&pc_rel_base_label_); + default: + return branch->GetOffsetLocation() + + Branch::branch_info_[branch->GetType()].pc_org * sizeof(uint32_t); + } +} + +uint32_t MipsAssembler::Branch::GetOffset(uint32_t location) const { + // `location` is either `GetLabelLocation(&pc_rel_base_label_)` for R2 literals or + // `this->GetOffsetLocation() + branch_info_[this->GetType()].pc_org * sizeof(uint32_t)` + // for everything else. CHECK(IsResolved()); uint32_t ofs_mask = 0xFFFFFFFF >> (32 - GetOffsetSize()); // Calculate the byte distance between instructions and also account for // different PC-relative origins. - uint32_t offset = target_ - GetOffsetLocation() - branch_info_[type_].pc_org * sizeof(uint32_t); + uint32_t offset = target_ - location; // Prepare the offset for encoding into the instruction(s). offset = (offset & ofs_mask) >> branch_info_[type_].offset_shift; return offset; @@ -1906,7 +1985,7 @@ void MipsAssembler::Bind(MipsLabel* label) { label->BindTo(bound_pc); } -uint32_t MipsAssembler::GetLabelLocation(MipsLabel* label) const { +uint32_t MipsAssembler::GetLabelLocation(const MipsLabel* label) const { CHECK(label->IsBound()); uint32_t target = label->Position(); if (label->prev_branch_id_plus_one_) { @@ -1941,6 +2020,10 @@ uint32_t MipsAssembler::GetAdjustedPosition(uint32_t old_position) { return old_position + last_position_adjustment_; } +void MipsAssembler::BindPcRelBaseLabel() { + Bind(&pc_rel_base_label_); +} + void MipsAssembler::FinalizeLabeledBranch(MipsLabel* label) { uint32_t length = branches_.back().GetLength(); if (!label->IsBound()) { @@ -1962,7 +2045,7 @@ void MipsAssembler::FinalizeLabeledBranch(MipsLabel* label) { void MipsAssembler::Buncond(MipsLabel* label) { uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved; - branches_.emplace_back(IsR6(), buffer_.Size(), target); + branches_.emplace_back(IsR6(), buffer_.Size(), target, /* is_call */ false); FinalizeLabeledBranch(label); } @@ -1976,12 +2059,46 @@ void MipsAssembler::Bcond(MipsLabel* label, BranchCondition condition, Register FinalizeLabeledBranch(label); } -void MipsAssembler::Call(MipsLabel* label, Register indirect_reg) { +void MipsAssembler::Call(MipsLabel* label) { uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved; - branches_.emplace_back(IsR6(), buffer_.Size(), target, indirect_reg); + branches_.emplace_back(IsR6(), buffer_.Size(), target, /* is_call */ true); FinalizeLabeledBranch(label); } +Literal* MipsAssembler::NewLiteral(size_t size, const uint8_t* data) { + DCHECK(size == 4u || size == 8u) << size; + literals_.emplace_back(size, data); + return &literals_.back(); +} + +void MipsAssembler::LoadLiteral(Register dest_reg, Register base_reg, Literal* literal) { + // Literal loads are treated as pseudo branches since they require very similar handling. + DCHECK_EQ(literal->GetSize(), 4u); + MipsLabel* label = literal->GetLabel(); + DCHECK(!label->IsBound()); + branches_.emplace_back(IsR6(), + buffer_.Size(), + dest_reg, + base_reg); + FinalizeLabeledBranch(label); +} + +void MipsAssembler::EmitLiterals() { + if (!literals_.empty()) { + // We don't support byte and half-word literals. + // TODO: proper alignment for 64-bit literals when they're implemented. + for (Literal& literal : literals_) { + MipsLabel* label = literal.GetLabel(); + Bind(label); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + DCHECK(literal.GetSize() == 4u || literal.GetSize() == 8u); + for (size_t i = 0, size = literal.GetSize(); i != size; ++i) { + buffer_.Emit<uint8_t>(literal.GetData()[i]); + } + } + } +} + void MipsAssembler::PromoteBranches() { // Promote short branches to long as necessary. bool changed; @@ -1989,7 +2106,8 @@ void MipsAssembler::PromoteBranches() { changed = false; for (auto& branch : branches_) { CHECK(branch.IsResolved()); - uint32_t delta = branch.PromoteIfNeeded(); + uint32_t base = GetBranchLocationOrPcRelBase(&branch); + uint32_t delta = branch.PromoteIfNeeded(base); // If this branch has been promoted and needs to expand in size, // relocate all branches by the expansion size. if (delta) { @@ -2027,27 +2145,35 @@ const MipsAssembler::Branch::BranchInfo MipsAssembler::Branch::branch_info_[] = // R2 short branches. { 2, 0, 1, MipsAssembler::Branch::kOffset18, 2 }, // kUncondBranch { 2, 0, 1, MipsAssembler::Branch::kOffset18, 2 }, // kCondBranch - { 5, 2, 0, MipsAssembler::Branch::kOffset16, 0 }, // kCall + { 2, 0, 1, MipsAssembler::Branch::kOffset18, 2 }, // kCall + // R2 near literal. + { 1, 0, 0, MipsAssembler::Branch::kOffset16, 0 }, // kLiteral // R2 long branches. { 9, 3, 1, MipsAssembler::Branch::kOffset32, 0 }, // kLongUncondBranch { 10, 4, 1, MipsAssembler::Branch::kOffset32, 0 }, // kLongCondBranch { 6, 1, 1, MipsAssembler::Branch::kOffset32, 0 }, // kLongCall + // R2 far literal. + { 3, 0, 0, MipsAssembler::Branch::kOffset32, 0 }, // kFarLiteral // R6 short branches. { 1, 0, 1, MipsAssembler::Branch::kOffset28, 2 }, // kR6UncondBranch { 2, 0, 1, MipsAssembler::Branch::kOffset18, 2 }, // kR6CondBranch // Exception: kOffset23 for beqzc/bnezc. - { 2, 0, 0, MipsAssembler::Branch::kOffset21, 2 }, // kR6Call + { 1, 0, 1, MipsAssembler::Branch::kOffset28, 2 }, // kR6Call + // R6 near literal. + { 1, 0, 0, MipsAssembler::Branch::kOffset21, 2 }, // kR6Literal // R6 long branches. { 2, 0, 0, MipsAssembler::Branch::kOffset32, 0 }, // kR6LongUncondBranch { 3, 1, 0, MipsAssembler::Branch::kOffset32, 0 }, // kR6LongCondBranch - { 3, 0, 0, MipsAssembler::Branch::kOffset32, 0 }, // kR6LongCall + { 2, 0, 0, MipsAssembler::Branch::kOffset32, 0 }, // kR6LongCall + // R6 far literal. + { 2, 0, 0, MipsAssembler::Branch::kOffset32, 0 }, // kR6FarLiteral }; -// Note: make sure branch_info_[] and mitBranch() are kept synchronized. +// Note: make sure branch_info_[] and EmitBranch() are kept synchronized. void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) { CHECK_EQ(overwriting_, true); overwrite_location_ = branch->GetLocation(); - uint32_t offset = branch->GetOffset(); + uint32_t offset = branch->GetOffset(GetBranchOrPcRelBaseForEncoding(branch)); BranchCondition condition = branch->GetCondition(); Register lhs = branch->GetLeftRegister(); Register rhs = branch->GetRightRegister(); @@ -2064,12 +2190,15 @@ void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) { Nop(); // TODO: improve by filling the delay slot. break; case Branch::kCall: - Nal(); - Nop(); // TODO: is this NOP really needed here? CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); - Addiu(lhs, RA, offset); - Jalr(lhs); - Nop(); + Bal(offset); + Nop(); // TODO: improve by filling the delay slot. + break; + + // R2 near literal. + case Branch::kLiteral: + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Lw(lhs, rhs, offset); break; // R2 long branches. @@ -2123,11 +2252,20 @@ void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) { CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); Lui(AT, High16Bits(offset)); Ori(AT, AT, Low16Bits(offset)); - Addu(lhs, AT, RA); - Jalr(lhs); + Addu(AT, AT, RA); + Jalr(AT); Nop(); break; + // R2 far literal. + case Branch::kFarLiteral: + offset += (offset & 0x8000) << 1; // Account for sign extension in lw. + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Lui(AT, High16Bits(offset)); + Addu(AT, AT, rhs); + Lw(lhs, AT, Low16Bits(offset)); + break; + // R6 short branches. case Branch::kR6UncondBranch: CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); @@ -2140,8 +2278,13 @@ void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) { break; case Branch::kR6Call: CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); - Addiupc(lhs, offset); - Jialc(lhs, 0); + Balc(offset); + break; + + // R6 near literal. + case Branch::kR6Literal: + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Lwpc(lhs, offset); break; // R6 long branches. @@ -2159,11 +2302,18 @@ void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) { Jic(AT, Low16Bits(offset)); break; case Branch::kR6LongCall: - offset += (offset & 0x8000) << 1; // Account for sign extension in addiu. + offset += (offset & 0x8000) << 1; // Account for sign extension in jialc. CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); - Auipc(lhs, High16Bits(offset)); - Addiu(lhs, lhs, Low16Bits(offset)); - Jialc(lhs, 0); + Auipc(AT, High16Bits(offset)); + Jialc(AT, Low16Bits(offset)); + break; + + // R6 far literal. + case Branch::kR6FarLiteral: + offset += (offset & 0x8000) << 1; // Account for sign extension in lw. + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Auipc(AT, High16Bits(offset)); + Lw(lhs, AT, Low16Bits(offset)); break; } CHECK_EQ(overwrite_location_, branch->GetEndLocation()); @@ -2174,8 +2324,8 @@ void MipsAssembler::B(MipsLabel* label) { Buncond(label); } -void MipsAssembler::Jalr(MipsLabel* label, Register indirect_reg) { - Call(label, indirect_reg); +void MipsAssembler::Bal(MipsLabel* label) { + Call(label); } void MipsAssembler::Beq(Register rs, Register rt, MipsLabel* label) { diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h index 31b3b311eb..1f7781fef9 100644 --- a/compiler/utils/mips/assembler_mips.h +++ b/compiler/utils/mips/assembler_mips.h @@ -17,10 +17,12 @@ #ifndef ART_COMPILER_UTILS_MIPS_ASSEMBLER_MIPS_H_ #define ART_COMPILER_UTILS_MIPS_ASSEMBLER_MIPS_H_ +#include <deque> #include <utility> #include <vector> #include "arch/mips/instruction_set_features_mips.h" +#include "base/arena_containers.h" #include "base/macros.h" #include "constants_mips.h" #include "globals.h" @@ -79,6 +81,49 @@ class MipsLabel : public Label { DISALLOW_COPY_AND_ASSIGN(MipsLabel); }; +// Assembler literal is a value embedded in code, retrieved using a PC-relative load. +class Literal { + public: + static constexpr size_t kMaxSize = 8; + + Literal(uint32_t size, const uint8_t* data) + : label_(), size_(size) { + DCHECK_LE(size, Literal::kMaxSize); + memcpy(data_, data, size); + } + + template <typename T> + T GetValue() const { + DCHECK_EQ(size_, sizeof(T)); + T value; + memcpy(&value, data_, sizeof(T)); + return value; + } + + uint32_t GetSize() const { + return size_; + } + + const uint8_t* GetData() const { + return data_; + } + + MipsLabel* GetLabel() { + return &label_; + } + + const MipsLabel* GetLabel() const { + return &label_; + } + + private: + MipsLabel label_; + const uint32_t size_; + uint8_t data_[kMaxSize]; + + DISALLOW_COPY_AND_ASSIGN(Literal); +}; + // Slowpath entered when Thread::Current()->_exception is non-null. class MipsExceptionSlowPath { public: @@ -107,6 +152,7 @@ class MipsAssembler FINAL : public Assembler { : Assembler(arena), overwriting_(false), overwrite_location_(0), + literals_(arena->Adapter(kArenaAllocAssembler)), last_position_adjustment_(0), last_old_position_(0), last_branch_id_(0), @@ -182,6 +228,7 @@ class MipsAssembler FINAL : public Assembler { void Lwr(Register rt, Register rs, uint16_t imm16); void Lbu(Register rt, Register rs, uint16_t imm16); void Lhu(Register rt, Register rs, uint16_t imm16); + void Lwpc(Register rs, uint32_t imm19); // R6 void Lui(Register rt, uint16_t imm16); void Aui(Register rt, Register rs, uint16_t imm16); // R6 void Sync(uint32_t stype); @@ -205,6 +252,7 @@ class MipsAssembler FINAL : public Assembler { void Sltiu(Register rt, Register rs, uint16_t imm16); void B(uint16_t imm16); + void Bal(uint16_t imm16); void Beq(Register rs, Register rt, uint16_t imm16); void Bne(Register rs, Register rt, uint16_t imm16); void Beqz(Register rt, uint16_t imm16); @@ -226,6 +274,7 @@ class MipsAssembler FINAL : public Assembler { void Auipc(Register rs, uint16_t imm16); // R6 void Addiupc(Register rs, uint32_t imm19); // R6 void Bc(uint32_t imm26); // R6 + void Balc(uint32_t imm26); // R6 void Jic(Register rt, uint16_t imm16); // R6 void Jialc(Register rt, uint16_t imm16); // R6 void Bltc(Register rs, Register rt, uint16_t imm16); // R6 @@ -365,7 +414,7 @@ class MipsAssembler FINAL : public Assembler { // These will generate R2 branches or R6 branches as appropriate. void Bind(MipsLabel* label); void B(MipsLabel* label); - void Jalr(MipsLabel* label, Register indirect_reg); + void Bal(MipsLabel* label); void Beq(Register rs, Register rt, MipsLabel* label); void Bne(Register rs, Register rt, MipsLabel* label); void Beqz(Register rt, MipsLabel* label); @@ -412,6 +461,21 @@ class MipsAssembler FINAL : public Assembler { UNIMPLEMENTED(FATAL) << "Do not use Jump for MIPS"; } + // Create a new literal with a given value. + // NOTE: Force the template parameter to be explicitly specified. + template <typename T> + Literal* NewLiteral(typename Identity<T>::type value) { + static_assert(std::is_integral<T>::value, "T must be an integral type."); + return NewLiteral(sizeof(value), reinterpret_cast<const uint8_t*>(&value)); + } + + // Create a new literal with the given data. + Literal* NewLiteral(size_t size, const uint8_t* data); + + // Load literal using the base register (for R2 only) or using PC-relative loads + // (for R6 only; base_reg must be ZERO). + void LoadLiteral(Register dest_reg, Register base_reg, Literal* literal); + // // Overridden common assembler high-level functionality. // @@ -569,12 +633,22 @@ class MipsAssembler FINAL : public Assembler { // Returns the (always-)current location of a label (can be used in class CodeGeneratorMIPS, // must be used instead of MipsLabel::GetPosition()). - uint32_t GetLabelLocation(MipsLabel* label) const; + uint32_t GetLabelLocation(const MipsLabel* label) const; // Get the final position of a label after local fixup based on the old position // recorded before FinalizeCode(). uint32_t GetAdjustedPosition(uint32_t old_position); + // R2 doesn't have PC-relative addressing, which we need to access literals. We simulate it by + // reading the PC value into a general-purpose register with the NAL instruction and then loading + // literals through this base register. The code generator calls this method (at most once per + // method being compiled) to bind a label to the location for which the PC value is acquired. + // The assembler then computes literal offsets relative to this label. + void BindPcRelBaseLabel(); + + // Note that PC-relative literal loads are handled as pseudo branches because they need very + // similar relocation and may similarly expand in size to accomodate for larger offsets relative + // to PC. enum BranchCondition { kCondLT, kCondGE, @@ -604,18 +678,26 @@ class MipsAssembler FINAL : public Assembler { kUncondBranch, kCondBranch, kCall, + // R2 near literal. + kLiteral, // R2 long branches. kLongUncondBranch, kLongCondBranch, kLongCall, + // R2 far literal. + kFarLiteral, // R6 short branches. kR6UncondBranch, kR6CondBranch, kR6Call, + // R6 near literal. + kR6Literal, // R6 long branches. kR6LongUncondBranch, kR6LongCondBranch, kR6LongCall, + // R6 far literal. + kR6FarLiteral, }; // Bit sizes of offsets defined as enums to minimize chance of typos. enum OffsetBits { @@ -650,17 +732,17 @@ class MipsAssembler FINAL : public Assembler { }; static const BranchInfo branch_info_[/* Type */]; - // Unconditional branch. - Branch(bool is_r6, uint32_t location, uint32_t target); + // Unconditional branch or call. + Branch(bool is_r6, uint32_t location, uint32_t target, bool is_call); // Conditional branch. Branch(bool is_r6, uint32_t location, uint32_t target, BranchCondition condition, Register lhs_reg, - Register rhs_reg = ZERO); - // Call (branch and link) that stores the target address in a given register (i.e. T9). - Branch(bool is_r6, uint32_t location, uint32_t target, Register indirect_reg); + Register rhs_reg); + // Literal. + Branch(bool is_r6, uint32_t location, Register dest_reg, Register base_reg); // Some conditional branches with lhs = rhs are effectively NOPs, while some // others are effectively unconditional. MIPSR6 conditional branches require lhs != rhs. @@ -736,17 +818,18 @@ class MipsAssembler FINAL : public Assembler { // that is allowed for short branches. This is for debugging/testing purposes. // max_short_distance = 0 forces all short branches to become long. // Use the implicit default argument when not debugging/testing. - uint32_t PromoteIfNeeded(uint32_t max_short_distance = std::numeric_limits<uint32_t>::max()); + uint32_t PromoteIfNeeded(uint32_t location, + uint32_t max_short_distance = std::numeric_limits<uint32_t>::max()); // Returns the location of the instruction(s) containing the offset. uint32_t GetOffsetLocation() const; // Calculates and returns the offset ready for encoding in the branch instruction(s). - uint32_t GetOffset() const; + uint32_t GetOffset(uint32_t location) const; private: // Completes branch construction by determining and recording its type. - void InitializeType(bool is_call, bool is_r6); + void InitializeType(bool is_call, bool is_literal, bool is_r6); // Helper for the above. void InitShortOrLong(OffsetBits ofs_size, Type short_type, Type long_type); @@ -776,12 +859,15 @@ class MipsAssembler FINAL : public Assembler { void Buncond(MipsLabel* label); void Bcond(MipsLabel* label, BranchCondition condition, Register lhs, Register rhs = ZERO); - void Call(MipsLabel* label, Register indirect_reg); + void Call(MipsLabel* label); void FinalizeLabeledBranch(MipsLabel* label); Branch* GetBranch(uint32_t branch_id); const Branch* GetBranch(uint32_t branch_id) const; + uint32_t GetBranchLocationOrPcRelBase(const MipsAssembler::Branch* branch) const; + uint32_t GetBranchOrPcRelBaseForEncoding(const MipsAssembler::Branch* branch) const; + void EmitLiterals(); void PromoteBranches(); void EmitBranch(Branch* branch); void EmitBranches(); @@ -816,6 +902,15 @@ class MipsAssembler FINAL : public Assembler { // The current overwrite location. uint32_t overwrite_location_; + // Use std::deque<> for literal labels to allow insertions at the end + // without invalidating pointers and references to existing elements. + ArenaDeque<Literal> literals_; + + // There's no PC-relative addressing on MIPS32R2. So, in order to access literals relative to PC + // we get PC using the NAL instruction. This label marks the position within the assembler buffer + // that PC (from NAL) points to. + MipsLabel pc_rel_base_label_; + // Data for AdjustedPosition(), see the description there. uint32_t last_position_adjustment_; uint32_t last_old_position_; diff --git a/compiler/utils/mips/assembler_mips32r6_test.cc b/compiler/utils/mips/assembler_mips32r6_test.cc index ce92d602d0..49ef272fb0 100644 --- a/compiler/utils/mips/assembler_mips32r6_test.cc +++ b/compiler/utils/mips/assembler_mips32r6_test.cc @@ -48,8 +48,30 @@ class AssemblerMIPS32r6Test : public AssemblerTest<mips::MipsAssembler, return "mips"; } + std::string GetAssemblerCmdName() OVERRIDE { + // We assemble and link for MIPS32R6. See GetAssemblerParameters() for details. + return "gcc"; + } + std::string GetAssemblerParameters() OVERRIDE { - return " --no-warn -32 -march=mips32r6"; + // We assemble and link for MIPS32R6. The reason is that object files produced for MIPS32R6 + // (and MIPS64R6) with the GNU assembler don't have correct final offsets in PC-relative + // branches in the .text section and so they require a relocation pass (there's a relocation + // section, .rela.text, that has the needed info to fix up the branches). + // We use "-modd-spreg" so we can use odd-numbered single precision FPU registers. + // We put the code at address 0x1000000 (instead of 0) to avoid overlapping with the + // .MIPS.abiflags section (there doesn't seem to be a way to suppress its generation easily). + return " -march=mips32r6 -modd-spreg -Wa,--no-warn" + " -Wl,-Ttext=0x1000000 -Wl,-e0x1000000 -nostdlib"; + } + + void Pad(std::vector<uint8_t>& data) OVERRIDE { + // The GNU linker unconditionally pads the code segment with NOPs to a size that is a multiple + // of 16 and there doesn't appear to be a way to suppress this padding. Our assembler doesn't + // pad, so, in order for two assembler outputs to match, we need to match the padding as well. + // NOP is encoded as four zero bytes on MIPS. + size_t pad_size = RoundUp(data.size(), 16u) - data.size(); + data.insert(data.end(), pad_size, 0); } std::string GetDisassembleParameters() OVERRIDE { @@ -272,6 +294,21 @@ TEST_F(AssemblerMIPS32r6Test, Aui) { DriverStr(RepeatRRIb(&mips::MipsAssembler::Aui, 16, "aui ${reg1}, ${reg2}, {imm}"), "Aui"); } +TEST_F(AssemblerMIPS32r6Test, Auipc) { + DriverStr(RepeatRIb(&mips::MipsAssembler::Auipc, 16, "auipc ${reg}, {imm}"), "Auipc"); +} + +TEST_F(AssemblerMIPS32r6Test, Lwpc) { + // Lwpc() takes an unsigned 19-bit immediate, while the GNU assembler needs a signed offset, + // hence the sign extension from bit 18 with `imm - ((imm & 0x40000) << 1)`. + // The GNU assembler also wants the offset to be a multiple of 4, which it will shift right + // by 2 positions when encoding, hence `<< 2` to compensate for that shift. + // We capture the value of the immediate with `.set imm, {imm}` because the value is needed + // twice for the sign extension, but `{imm}` is substituted only once. + const char* code = ".set imm, {imm}\nlw ${reg}, ((imm - ((imm & 0x40000) << 1)) << 2)($pc)"; + DriverStr(RepeatRIb(&mips::MipsAssembler::Lwpc, 19, code), "Lwpc"); +} + TEST_F(AssemblerMIPS32r6Test, Bitswap) { DriverStr(RepeatRR(&mips::MipsAssembler::Bitswap, "bitswap ${reg1}, ${reg2}"), "bitswap"); } @@ -598,12 +635,45 @@ TEST_F(AssemblerMIPS32r6Test, StoreDToOffset) { DriverStr(expected, "StoreDToOffset"); } +TEST_F(AssemblerMIPS32r6Test, LoadFarthestNearLiteral) { + mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678); + __ LoadLiteral(mips::V0, mips::ZERO, literal); + constexpr size_t kAdduCount = 0x3FFDE; + for (size_t i = 0; i != kAdduCount; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + + std::string expected = + "lwpc $v0, 1f\n" + + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "1:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadFarthestNearLiteral"); +} + +TEST_F(AssemblerMIPS32r6Test, LoadNearestFarLiteral) { + mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678); + __ LoadLiteral(mips::V0, mips::ZERO, literal); + constexpr size_t kAdduCount = 0x3FFDF; + for (size_t i = 0; i != kAdduCount; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + + std::string expected = + "1:\n" + "auipc $at, %hi(2f - 1b)\n" + "lw $v0, %lo(2f - 1b)($at)\n" + + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadNearestFarLiteral"); +} + ////////////// // BRANCHES // ////////////// -// TODO: MipsAssembler::Auipc -// MipsAssembler::Addiupc +// TODO: MipsAssembler::Addiupc // MipsAssembler::Bc // MipsAssembler::Jic // MipsAssembler::Jialc diff --git a/compiler/utils/mips/assembler_mips_test.cc b/compiler/utils/mips/assembler_mips_test.cc index a1d6ad6a2f..50a8dc202a 100644 --- a/compiler/utils/mips/assembler_mips_test.cc +++ b/compiler/utils/mips/assembler_mips_test.cc @@ -2293,6 +2293,44 @@ TEST_F(AssemblerMIPSTest, LoadConst32) { DriverStr(expected, "LoadConst32"); } +TEST_F(AssemblerMIPSTest, LoadFarthestNearLiteral) { + mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678); + __ BindPcRelBaseLabel(); + __ LoadLiteral(mips::V0, mips::V1, literal); + constexpr size_t kAddiuCount = 0x1FDE; + for (size_t i = 0; i != kAddiuCount; ++i) { + __ Addiu(mips::A0, mips::A1, 0); + } + + std::string expected = + "1:\n" + "lw $v0, %lo(2f - 1b)($v1)\n" + + RepeatInsn(kAddiuCount, "addiu $a0, $a1, %hi(2f - 1b)\n") + + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadFarthestNearLiteral"); +} + +TEST_F(AssemblerMIPSTest, LoadNearestFarLiteral) { + mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678); + __ BindPcRelBaseLabel(); + __ LoadLiteral(mips::V0, mips::V1, literal); + constexpr size_t kAdduCount = 0x1FDF; + for (size_t i = 0; i != kAdduCount; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + + std::string expected = + "1:\n" + "lui $at, %hi(2f - 1b)\n" + "addu $at, $at, $v1\n" + "lw $v0, %lo(2f - 1b)($at)\n" + + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadNearestFarLiteral"); +} + #undef __ } // namespace art diff --git a/compiler/utils/transform_array_ref.h b/compiler/utils/transform_array_ref.h index 6297b88e69..a6da34fb40 100644 --- a/compiler/utils/transform_array_ref.h +++ b/compiler/utils/transform_array_ref.h @@ -70,6 +70,11 @@ class TransformArrayRef { TransformArrayRef(const ArrayRef<OtherBT>& base, Function fn) : data_(base, fn) { } + template <typename OtherBT, + typename = typename std::enable_if<std::is_same<BaseType, const OtherBT>::value>::type> + TransformArrayRef(const TransformArrayRef<OtherBT, Function>& other) + : TransformArrayRef(other.base(), other.GetFunction()) { } + // Assignment operators. TransformArrayRef& operator=(const TransformArrayRef& other) = default; @@ -149,6 +154,9 @@ class TransformArrayRef { } Data data_; + + template <typename OtherBT, typename OtherFunction> + friend class TransformArrayRef; }; template <typename BaseType, typename Function> diff --git a/compiler/utils/transform_array_ref_test.cc b/compiler/utils/transform_array_ref_test.cc index 2593fade2f..8d71fd7179 100644 --- a/compiler/utils/transform_array_ref_test.cc +++ b/compiler/utils/transform_array_ref_test.cc @@ -160,6 +160,48 @@ TEST(TransformArrayRef, ConstAndNonConstRef) { taref[i] = transform_input[i]; } ASSERT_EQ(std::vector<ValueHolder>({ 24, 37, 11, 71 }), transformed); + + const std::vector<ValueHolder>& cinput = input; + + auto ctaref = MakeTransformArrayRef(cinput, ref); + static_assert(std::is_same<int, decltype(ctaref)::value_type>::value, "value_type"); + static_assert(std::is_same<const int*, decltype(ctaref)::pointer>::value, "pointer"); + static_assert(std::is_same<const int&, decltype(ctaref)::reference>::value, "reference"); + static_assert(std::is_same<const int*, decltype(ctaref)::const_pointer>::value, "const_pointer"); + static_assert(std::is_same<const int&, decltype(ctaref)::const_reference>::value, + "const_reference"); + + std::copy(ctaref.begin(), ctaref.end(), std::back_inserter(output)); + ASSERT_EQ(std::vector<int>({ 1, 0, 1, 0, 3, 1 }), output); + output.clear(); + + std::copy(ctaref.cbegin(), ctaref.cend(), std::back_inserter(output)); + ASSERT_EQ(std::vector<int>({ 1, 0, 1, 0, 3, 1 }), output); + output.clear(); + + std::copy(ctaref.rbegin(), ctaref.rend(), std::back_inserter(output)); + ASSERT_EQ(std::vector<int>({ 1, 3, 0, 1, 0, 1 }), output); + output.clear(); + + std::copy(ctaref.crbegin(), ctaref.crend(), std::back_inserter(output)); + ASSERT_EQ(std::vector<int>({ 1, 3, 0, 1, 0, 1 }), output); + output.clear(); + + ASSERT_EQ(cinput.size(), ctaref.size()); + ASSERT_EQ(cinput.empty(), ctaref.empty()); + ASSERT_EQ(cinput.front().value, ctaref.front()); + ASSERT_EQ(cinput.back().value, ctaref.back()); + + for (size_t i = 0; i != cinput.size(); ++i) { + ASSERT_EQ(cinput[i].value, ctaref[i]); + } + + // Test conversion adding const. + decltype(ctaref) ctaref2 = taref; + ASSERT_EQ(taref.size(), ctaref2.size()); + for (size_t i = 0; i != taref.size(); ++i) { + ASSERT_EQ(taref[i], ctaref2[i]); + } } } // namespace art diff --git a/compiler/utils/transform_iterator.h b/compiler/utils/transform_iterator.h index f0769d4800..3bc9046408 100644 --- a/compiler/utils/transform_iterator.h +++ b/compiler/utils/transform_iterator.h @@ -44,11 +44,7 @@ class TransformIterator { typename std::iterator_traits<BaseIterator>::iterator_category>::value, "Transform iterator base must be an input iterator."); - using InputType = - typename std::conditional< - std::is_same<void, typename std::iterator_traits<BaseIterator>::reference>::value, - typename std::iterator_traits<BaseIterator>::value_type, - typename std::iterator_traits<BaseIterator>::reference>::type; + using InputType = typename std::iterator_traits<BaseIterator>::reference; using ResultType = typename std::result_of<Function(InputType)>::type; public: diff --git a/compiler/utils/transform_iterator_test.cc b/compiler/utils/transform_iterator_test.cc index dbb4779330..57ff0a62ac 100644 --- a/compiler/utils/transform_iterator_test.cc +++ b/compiler/utils/transform_iterator_test.cc @@ -20,8 +20,6 @@ #include <type_traits> #include <vector> -#include <array> - #include "gtest/gtest.h" #include "utils/transform_iterator.h" diff --git a/dex2oat/dex2oat_test.cc b/dex2oat/dex2oat_test.cc index 6188883358..93351e9f28 100644 --- a/dex2oat/dex2oat_test.cc +++ b/dex2oat/dex2oat_test.cc @@ -154,33 +154,44 @@ class Dex2oatTest : public Dex2oatEnvironmentTest { CHECK(android_root != nullptr); argv.push_back("--android-root=" + std::string(android_root)); - std::string command_line(Join(argv, ' ')); + int link[2]; - // We need to fix up the '&' being used for "do not check classpath." - size_t ampersand = command_line.find(" &"); - CHECK_NE(ampersand, std::string::npos); - command_line = command_line.replace(ampersand, 2, " \\&"); - - command_line += " 2>&1"; - - // We need dex2oat to actually log things. - setenv("ANDROID_LOG_TAGS", "*:d", 1); - - FILE* pipe = popen(command_line.c_str(), "r"); + if (pipe(link) == -1) { + return false; + } - setenv("ANDROID_LOG_TAGS", "*:e", 1); + pid_t pid = fork(); + if (pid == -1) { + return false; + } - if (pipe == nullptr) { - success_ = false; + if (pid == 0) { + // We need dex2oat to actually log things. + setenv("ANDROID_LOG_TAGS", "*:d", 1); + dup2(link[1], STDERR_FILENO); + close(link[0]); + close(link[1]); + std::vector<const char*> c_args; + for (const std::string& str : argv) { + c_args.push_back(str.c_str()); + } + c_args.push_back(nullptr); + execv(c_args[0], const_cast<char* const*>(c_args.data())); + exit(1); } else { + close(link[1]); char buffer[128]; + memset(buffer, 0, 128); + ssize_t bytes_read = 0; - while (fgets(buffer, 128, pipe) != nullptr) { - output_ += buffer; + while (TEMP_FAILURE_RETRY(bytes_read = read(link[0], buffer, 128)) > 0) { + output_ += std::string(buffer, bytes_read); + } + close(link[0]); + int status = 0; + if (waitpid(pid, &status, 0) != -1) { + success_ = (status == 0); } - - int result = pclose(pipe); - success_ = result == 0; } return success_; } diff --git a/disassembler/disassembler_mips.cc b/disassembler/disassembler_mips.cc index 1f513113ec..769263ec5b 100644 --- a/disassembler/disassembler_mips.cc +++ b/disassembler/disassembler_mips.cc @@ -330,8 +330,10 @@ static const MipsInstruction gMipsInstructions[] = { { kITypeMask, 55u << kOpcodeShift, "ld", "TO", }, { kITypeMask, 56u << kOpcodeShift, "sc", "TO", }, { kITypeMask, 57u << kOpcodeShift, "swc1", "tO", }, + { kJTypeMask, 58u << kOpcodeShift, "balc", "P" }, { kITypeMask | (0x1f << 16), (59u << kOpcodeShift) | (30 << 16), "auipc", "Si" }, { kITypeMask | (0x3 << 19), (59u << kOpcodeShift) | (0 << 19), "addiupc", "Sp" }, + { kITypeMask | (0x3 << 19), (59u << kOpcodeShift) | (1 << 19), "lwpc", "So" }, { kITypeMask, 61u << kOpcodeShift, "sdc1", "tO", }, { kITypeMask | (0x1f << 21), 62u << kOpcodeShift, "jialc", "Ti" }, { kITypeMask | (1 << 21), (62u << kOpcodeShift) | (1 << 21), "bnezc", "Sb" }, // TODO: de-dup? @@ -509,7 +511,15 @@ size_t DisassemblerMips::Dump(std::ostream& os, const uint8_t* instr_ptr) { } } break; - case 'P': // 26-bit offset in bc. + case 'o': // 19-bit offset in lwpc. + { + int32_t offset = (instruction & 0x7ffff) - ((instruction & 0x40000) << 1); + offset <<= 2; + args << FormatInstructionPointer(instr_ptr + offset); + args << StringPrintf(" ; %+d", offset); + } + break; + case 'P': // 26-bit offset in bc and balc. { int32_t offset = (instruction & 0x3ffffff) - ((instruction & 0x2000000) << 1); offset <<= 2; @@ -540,6 +550,7 @@ size_t DisassemblerMips::Dump(std::ostream& os, const uint8_t* instr_ptr) { } } + // TODO: Simplify this once these sequences are simplified in the compiler. // Special cases for sequences of: // pc-relative +/- 2GB branch: // auipc reg, imm diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc index bb35f8dc30..3f031a3318 100644 --- a/oatdump/oatdump.cc +++ b/oatdump/oatdump.cc @@ -1751,7 +1751,7 @@ class ImageDumper { const auto& method_section = state->image_header_.GetMethodsSection(); size_t num_methods = dex_cache->NumResolvedMethods(); if (num_methods != 0u) { - os << "Methods (size=" << num_methods << "):"; + os << "Methods (size=" << num_methods << "):\n"; ScopedIndentation indent2(&state->vios_); auto* resolved_methods = dex_cache->GetResolvedMethods(); for (size_t i = 0, length = dex_cache->NumResolvedMethods(); i < length; ++i) { @@ -1760,10 +1760,12 @@ class ImageDumper { image_pointer_size); size_t run = 0; for (size_t j = i + 1; - j != length && elem == mirror::DexCache::GetElementPtrSize(resolved_methods, - j, - image_pointer_size); - ++j, ++run) {} + j != length && elem == mirror::DexCache::GetElementPtrSize(resolved_methods, + j, + image_pointer_size); + ++j) { + ++run; + } if (run == 0) { os << StringPrintf("%zd: ", i); } else { @@ -1784,17 +1786,20 @@ class ImageDumper { } size_t num_fields = dex_cache->NumResolvedFields(); if (num_fields != 0u) { - os << "Fields (size=" << num_fields << "):"; + os << "Fields (size=" << num_fields << "):\n"; ScopedIndentation indent2(&state->vios_); auto* resolved_fields = dex_cache->GetResolvedFields(); for (size_t i = 0, length = dex_cache->NumResolvedFields(); i < length; ++i) { - auto* elem = mirror::DexCache::GetElementPtrSize(resolved_fields, i, image_pointer_size); + auto* elem = mirror::DexCache::GetElementPtrSize( + resolved_fields, i, image_pointer_size); size_t run = 0; for (size_t j = i + 1; - j != length && elem == mirror::DexCache::GetElementPtrSize(resolved_fields, - j, - image_pointer_size); - ++j, ++run) {} + j != length && elem == mirror::DexCache::GetElementPtrSize(resolved_fields, + j, + image_pointer_size); + ++j) { + ++run; + } if (run == 0) { os << StringPrintf("%zd: ", i); } else { @@ -1813,6 +1818,32 @@ class ImageDumper { os << StringPrintf("%p %s\n", elem, msg.c_str()); } } + size_t num_types = dex_cache->NumResolvedTypes(); + if (num_types != 0u) { + os << "Types (size=" << num_types << "):\n"; + ScopedIndentation indent2(&state->vios_); + auto* resolved_types = dex_cache->GetResolvedTypes(); + for (size_t i = 0; i < num_types; ++i) { + auto* elem = resolved_types[i].Read(); + size_t run = 0; + for (size_t j = i + 1; j != num_types && elem == resolved_types[j].Read(); ++j) { + ++run; + } + if (run == 0) { + os << StringPrintf("%zd: ", i); + } else { + os << StringPrintf("%zd to %zd: ", i, i + run); + i = i + run; + } + std::string msg; + if (elem == nullptr) { + msg = "null"; + } else { + msg = PrettyClass(elem); + } + os << StringPrintf("%p %s\n", elem, msg.c_str()); + } + } } } std::string temp; diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S index 0797def8e8..d940164c2c 100644 --- a/runtime/arch/arm/quick_entrypoints_arm.S +++ b/runtime/arch/arm/quick_entrypoints_arm.S @@ -88,12 +88,6 @@ #endif .endm - // Ugly compile-time check, but we only have the preprocessor. -#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 28 + 4) -#error "REFS_ONLY_CALLEE_SAVE_FRAME(ARM) size not as expected." -#endif -.endm - .macro RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME add sp, #4 @ bottom word holds Method* .cfi_adjust_cfa_offset -4 diff --git a/runtime/arch/mips/fault_handler_mips.cc b/runtime/arch/mips/fault_handler_mips.cc index 754284c833..7969a8f5ca 100644 --- a/runtime/arch/mips/fault_handler_mips.cc +++ b/runtime/arch/mips/fault_handler_mips.cc @@ -44,7 +44,7 @@ void FaultManager::GetMethodAndReturnPcAndSp(siginfo_t* siginfo, void* context, uintptr_t* out_return_pc, uintptr_t* out_sp) { struct ucontext* uc = reinterpret_cast<struct ucontext*>(context); struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext); - *out_sp = static_cast<uintptr_t>(sc->sc_regs[29]); // SP register + *out_sp = static_cast<uintptr_t>(sc->sc_regs[mips::SP]); VLOG(signals) << "sp: " << *out_sp; if (*out_sp == 0) { return; @@ -56,7 +56,7 @@ void FaultManager::GetMethodAndReturnPcAndSp(siginfo_t* siginfo, void* context, uintptr_t* overflow_addr = reinterpret_cast<uintptr_t*>( reinterpret_cast<uint8_t*>(*out_sp) - GetStackOverflowReservedBytes(kMips)); if (overflow_addr == fault_addr) { - *out_method = reinterpret_cast<ArtMethod*>(sc->sc_regs[4]); // A0 register + *out_method = reinterpret_cast<ArtMethod*>(sc->sc_regs[mips::A0]); } else { // The method is at the top of the stack. *out_method = *reinterpret_cast<ArtMethod**>(*out_sp); @@ -82,12 +82,12 @@ bool NullPointerHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* info, void* struct ucontext *uc = reinterpret_cast<struct ucontext*>(context); struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext); - sc->sc_regs[31] = sc->sc_pc + 4; // RA needs to point to gc map location + sc->sc_regs[mips::RA] = sc->sc_pc + 4; // RA needs to point to gc map location sc->sc_pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception_from_signal); - sc->sc_regs[25] = sc->sc_pc; // make sure T9 points to the function + sc->sc_regs[mips::T9] = sc->sc_pc; // make sure T9 points to the function // Pass the faulting address as the first argument of // art_quick_throw_null_pointer_exception_from_signal. - sc->sc_regs[0] = reinterpret_cast<uintptr_t>(info->si_addr); + sc->sc_regs[mips::A0] = reinterpret_cast<uintptr_t>(info->si_addr); VLOG(signals) << "Generating null pointer exception"; return true; } @@ -116,7 +116,7 @@ bool StackOverflowHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* info, voi VLOG(signals) << "stack overflow handler with sp at " << std::hex << &uc; VLOG(signals) << "sigcontext: " << std::hex << sc; - uintptr_t sp = sc->sc_regs[29]; // SP register + uintptr_t sp = sc->sc_regs[mips::SP]; VLOG(signals) << "sp: " << std::hex << sp; uintptr_t fault_addr = reinterpret_cast<uintptr_t>(info->si_addr); // BVA addr @@ -139,7 +139,7 @@ bool StackOverflowHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* info, voi // caused this fault. This will be inserted into a callee save frame by // the function to which this handler returns (art_quick_throw_stack_overflow). sc->sc_pc = reinterpret_cast<uintptr_t>(art_quick_throw_stack_overflow); - sc->sc_regs[25] = sc->sc_pc; // make sure T9 points to the function + sc->sc_regs[mips::T9] = sc->sc_pc; // make sure T9 points to the function // The kernel will now return to the address in sc->arm_pc. return true; diff --git a/runtime/arch/mips/instruction_set_features_mips.cc b/runtime/arch/mips/instruction_set_features_mips.cc index 93d79b7763..b3a98667d0 100644 --- a/runtime/arch/mips/instruction_set_features_mips.cc +++ b/runtime/arch/mips/instruction_set_features_mips.cc @@ -76,21 +76,22 @@ const MipsInstructionSetFeatures* MipsInstructionSetFeatures::FromVariant( GetFlagsFromCppDefined(&mips_isa_gte2, &r6, &fpu_32bit); // Override defaults based on variant string. - // Only care if it is R1, R2 or R6 and we assume all CPUs will have a FP unit. + // Only care if it is R1, R2, R5 or R6 and we assume all CPUs will have a FP unit. constexpr const char* kMips32Prefix = "mips32r"; const size_t kPrefixLength = strlen(kMips32Prefix); if (variant.compare(0, kPrefixLength, kMips32Prefix, kPrefixLength) == 0 && variant.size() > kPrefixLength) { - if (variant[kPrefixLength] >= '6') { - fpu_32bit = false; - r6 = true; - } - if (variant[kPrefixLength] >= '2') { - mips_isa_gte2 = true; - } + r6 = (variant[kPrefixLength] >= '6'); + fpu_32bit = (variant[kPrefixLength] < '5'); + mips_isa_gte2 = (variant[kPrefixLength] >= '2'); } else if (variant == "default") { - // Default variant is: smp = true, has fpu, is gte2, is not r6. This is the traditional - // setting. + // Default variant is: smp = true, has FPU, is gte2. This is the traditional setting. + // + // Note, we get FPU bitness and R6-ness from the build (using cpp defines, see above) + // and don't override them because many things depend on the "default" variant being + // sufficient for most purposes. That is, "default" should work for both R2 and R6. + // Use "mips32r#" to get a specific configuration, possibly not matching the runtime + // ISA (e.g. for ISA-specific testing of dex2oat internals). mips_isa_gte2 = true; } else { LOG(WARNING) << "Unexpected CPU variant for Mips32 using defaults: " << variant; diff --git a/runtime/arch/mips/instruction_set_features_mips.h b/runtime/arch/mips/instruction_set_features_mips.h index aac436e468..120dc1c0a5 100644 --- a/runtime/arch/mips/instruction_set_features_mips.h +++ b/runtime/arch/mips/instruction_set_features_mips.h @@ -81,8 +81,19 @@ class MipsInstructionSetFeatures FINAL : public InstructionSetFeatures { private: MipsInstructionSetFeatures(bool smp, bool fpu_32bit, bool mips_isa_gte2, bool r6) - : InstructionSetFeatures(smp), fpu_32bit_(fpu_32bit), mips_isa_gte2_(mips_isa_gte2), r6_(r6) - {} + : InstructionSetFeatures(smp), + fpu_32bit_(fpu_32bit), + mips_isa_gte2_(mips_isa_gte2), + r6_(r6) { + // Sanity checks. + if (r6) { + CHECK(mips_isa_gte2); + CHECK(!fpu_32bit); + } + if (!mips_isa_gte2) { + CHECK(fpu_32bit); + } + } // Bitmap positions for encoding features as a bitmap. enum { diff --git a/runtime/arch/mips64/fault_handler_mips64.cc b/runtime/arch/mips64/fault_handler_mips64.cc index c9a32ad7f9..0bbb6e1b03 100644 --- a/runtime/arch/mips64/fault_handler_mips64.cc +++ b/runtime/arch/mips64/fault_handler_mips64.cc @@ -44,7 +44,7 @@ void FaultManager::GetMethodAndReturnPcAndSp(siginfo_t* siginfo, void* context, uintptr_t* out_return_pc, uintptr_t* out_sp) { struct ucontext* uc = reinterpret_cast<struct ucontext*>(context); struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext); - *out_sp = static_cast<uintptr_t>(sc->sc_regs[29]); // SP register + *out_sp = static_cast<uintptr_t>(sc->sc_regs[mips64::SP]); VLOG(signals) << "sp: " << *out_sp; if (*out_sp == 0) { return; @@ -56,7 +56,7 @@ void FaultManager::GetMethodAndReturnPcAndSp(siginfo_t* siginfo, void* context, uintptr_t* overflow_addr = reinterpret_cast<uintptr_t*>( reinterpret_cast<uint8_t*>(*out_sp) - GetStackOverflowReservedBytes(kMips64)); if (overflow_addr == fault_addr) { - *out_method = reinterpret_cast<ArtMethod*>(sc->sc_regs[4]); // A0 register + *out_method = reinterpret_cast<ArtMethod*>(sc->sc_regs[mips64::A0]); } else { // The method is at the top of the stack. *out_method = *reinterpret_cast<ArtMethod**>(*out_sp); @@ -83,12 +83,12 @@ bool NullPointerHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* info, void* struct ucontext *uc = reinterpret_cast<struct ucontext*>(context); struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext); - sc->sc_regs[31] = sc->sc_pc + 4; // RA needs to point to gc map location + sc->sc_regs[mips64::RA] = sc->sc_pc + 4; // RA needs to point to gc map location sc->sc_pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception_from_signal); - sc->sc_regs[25] = sc->sc_pc; // make sure T9 points to the function + sc->sc_regs[mips64::T9] = sc->sc_pc; // make sure T9 points to the function // Pass the faulting address as the first argument of // art_quick_throw_null_pointer_exception_from_signal. - sc->sc_regs[0] = reinterpret_cast<uintptr_t>(info->si_addr); + sc->sc_regs[mips64::A0] = reinterpret_cast<uintptr_t>(info->si_addr); VLOG(signals) << "Generating null pointer exception"; return true; } @@ -117,7 +117,7 @@ bool StackOverflowHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* info, voi VLOG(signals) << "stack overflow handler with sp at " << std::hex << &uc; VLOG(signals) << "sigcontext: " << std::hex << sc; - uintptr_t sp = sc->sc_regs[29]; // SP register + uintptr_t sp = sc->sc_regs[mips64::SP]; VLOG(signals) << "sp: " << std::hex << sp; uintptr_t fault_addr = reinterpret_cast<uintptr_t>(info->si_addr); // BVA addr @@ -140,7 +140,7 @@ bool StackOverflowHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* info, voi // caused this fault. This will be inserted into a callee save frame by // the function to which this handler returns (art_quick_throw_stack_overflow). sc->sc_pc = reinterpret_cast<uintptr_t>(art_quick_throw_stack_overflow); - sc->sc_regs[25] = sc->sc_pc; // make sure T9 points to the function + sc->sc_regs[mips64::T9] = sc->sc_pc; // make sure T9 points to the function // The kernel will now return to the address in sc->arm_pc. return true; diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S index 5851fbd804..6234f0f73a 100644 --- a/runtime/arch/x86/quick_entrypoints_x86.S +++ b/runtime/arch/x86/quick_entrypoints_x86.S @@ -228,7 +228,7 @@ END_MACRO MACRO0(DELIVER_PENDING_EXCEPTION) SETUP_SAVE_ALL_CALLEE_SAVE_FRAME ebx, ebx // save callee saves for throw // Outgoing argument set up - subl MACRO_LITERAL(12), %esp // Alignment padding + subl MACRO_LITERAL(12), %esp // alignment padding CFI_ADJUST_CFA_OFFSET(12) pushl %fs:THREAD_SELF_OFFSET // pass Thread::Current() CFI_ADJUST_CFA_OFFSET(4) @@ -254,7 +254,7 @@ MACRO2(ONE_ARG_RUNTIME_EXCEPTION, c_name, cxx_name) SETUP_SAVE_ALL_CALLEE_SAVE_FRAME ebx, ebx // save all registers as basis for long jump context mov %esp, %ecx // Outgoing argument set up - subl MACRO_LITERAL(8), %esp // alignment padding + subl MACRO_LITERAL(8), %esp // alignment padding CFI_ADJUST_CFA_OFFSET(8) pushl %fs:THREAD_SELF_OFFSET // pass Thread::Current() CFI_ADJUST_CFA_OFFSET(4) @@ -1909,10 +1909,12 @@ DEFINE_FUNCTION art_nested_signal_return END_FUNCTION art_nested_signal_return DEFINE_FUNCTION art_quick_read_barrier_mark + subl LITERAL(8), %esp // alignment padding + CFI_ADJUST_CFA_OFFSET(8) PUSH eax // pass arg1 - obj call SYMBOL(artReadBarrierMark) // artReadBarrierMark(obj) - addl LITERAL(4), %esp // pop argument - CFI_ADJUST_CFA_OFFSET(-4) + addl LITERAL(12), %esp // pop argument and remove padding + CFI_ADJUST_CFA_OFFSET(-12) ret END_FUNCTION art_quick_read_barrier_mark @@ -1927,10 +1929,12 @@ DEFINE_FUNCTION art_quick_read_barrier_slow END_FUNCTION art_quick_read_barrier_slow DEFINE_FUNCTION art_quick_read_barrier_for_root_slow + subl LITERAL(8), %esp // alignment padding + CFI_ADJUST_CFA_OFFSET(8) PUSH eax // pass arg1 - root call SYMBOL(artReadBarrierForRootSlow) // artReadBarrierForRootSlow(root) - addl LITERAL(4), %esp // pop argument - CFI_ADJUST_CFA_OFFSET(-4) + addl LITERAL(12), %esp // pop argument and remove padding + CFI_ADJUST_CFA_OFFSET(-12) ret END_FUNCTION art_quick_read_barrier_for_root_slow diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc index fe7448fa25..3ec8f21ce0 100644 --- a/runtime/class_linker.cc +++ b/runtime/class_linker.cc @@ -2536,6 +2536,10 @@ mirror::Class* ClassLinker::DefineClass(Thread* self, CHECK(h_new_class.Get() != nullptr) << descriptor; CHECK(h_new_class->IsResolved()) << descriptor; + // Update the dex cache of where the class is defined. Inlining depends on having + // this filled. + h_new_class->GetDexCache()->SetResolvedType(h_new_class->GetDexTypeIndex(), h_new_class.Get()); + // Instrumentation may have updated entrypoints for all methods of all // classes. However it could not update methods of this class while we // were loading it. Now the class is resolved, we can update entrypoints diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h index 1b55d2f331..d61d0aa55b 100644 --- a/runtime/entrypoints/entrypoint_utils-inl.h +++ b/runtime/entrypoints/entrypoint_utils-inl.h @@ -39,45 +39,83 @@ namespace art { -template <bool kResolve = true> inline ArtMethod* GetResolvedMethod(ArtMethod* outer_method, const InlineInfo& inline_info, const InlineInfoEncoding& encoding, uint8_t inlining_depth) - SHARED_REQUIRES(Locks::mutator_lock_) { + SHARED_REQUIRES(Locks::mutator_lock_) { + // This method is being used by artQuickResolutionTrampoline, before it sets up + // the passed parameters in a GC friendly way. Therefore we must never be + // suspended while executing it. + ScopedAssertNoThreadSuspension sants(Thread::Current(), __FUNCTION__); + uint32_t method_index = inline_info.GetMethodIndexAtDepth(encoding, inlining_depth); InvokeType invoke_type = static_cast<InvokeType>( inline_info.GetInvokeTypeAtDepth(encoding, inlining_depth)); - ArtMethod* caller = outer_method->GetDexCacheResolvedMethod(method_index, sizeof(void*)); - if (!caller->IsRuntimeMethod()) { - return caller; - } - if (!kResolve) { - return nullptr; + ArtMethod* inlined_method = outer_method->GetDexCacheResolvedMethod(method_index, sizeof(void*)); + if (!inlined_method->IsRuntimeMethod()) { + return inlined_method; } - // The method in the dex cache can be the runtime method responsible for invoking + // The method in the dex cache is the runtime method responsible for invoking // the stub that will then update the dex cache. Therefore, we need to do the // resolution ourselves. - // We first find the class loader of our caller. If it is the outer method, we can directly - // use its class loader. Otherwise, we also need to resolve our caller. - StackHandleScope<2> hs(Thread::Current()); - ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); - MutableHandle<mirror::ClassLoader> class_loader(hs.NewHandle<mirror::Class>(nullptr)); - Handle<mirror::DexCache> dex_cache(hs.NewHandle(outer_method->GetDexCache())); - if (inlining_depth == 0) { - class_loader.Assign(outer_method->GetClassLoader()); + // We first find the dex cache of our caller. If it is the outer method, we can directly + // use its dex cache. Otherwise, we also need to resolve our caller. + ArtMethod* caller = outer_method; + if (inlining_depth != 0) { + caller = GetResolvedMethod(outer_method, + inline_info, + encoding, + inlining_depth - 1); + } + DCHECK_EQ(caller->GetDexCache(), outer_method->GetDexCache()) + << "Compiler only supports inlining calls within the same dex cache"; + const DexFile* dex_file = outer_method->GetDexFile(); + const DexFile::MethodId& method_id = dex_file->GetMethodId(method_index); + + if (inline_info.GetDexPcAtDepth(encoding, inlining_depth) == static_cast<uint32_t>(-1)) { + // "charAt" special case. It is the only non-leaf method we inline across dex files. + if (kIsDebugBuild) { + const char* name = dex_file->StringDataByIdx(method_id.name_idx_); + DCHECK_EQ(std::string(name), "charAt"); + DCHECK_EQ(std::string(dex_file->GetMethodShorty(method_id)), "CI") + << std::string(dex_file->GetMethodShorty(method_id)); + DCHECK_EQ(std::string(dex_file->StringByTypeIdx(method_id.class_idx_)), "Ljava/lang/String;") + << std::string(dex_file->StringByTypeIdx(method_id.class_idx_)); + } + mirror::Class* cls = + Runtime::Current()->GetClassLinker()->GetClassRoot(ClassLinker::kJavaLangString); + // Update the dex cache for future lookups. + caller->GetDexCache()->SetResolvedType(method_id.class_idx_, cls); + inlined_method = cls->FindVirtualMethod("charAt", "(I)C", sizeof(void*)); } else { - caller = GetResolvedMethod<kResolve>(outer_method, - inline_info, - encoding, - inlining_depth - 1); - class_loader.Assign(caller->GetClassLoader()); + mirror::Class* klass = caller->GetDexCache()->GetResolvedType(method_id.class_idx_); + DCHECK_EQ(klass->GetDexCache(), caller->GetDexCache()) + << "Compiler only supports inlining calls within the same dex cache"; + switch (invoke_type) { + case kDirect: + case kStatic: + inlined_method = + klass->FindDirectMethod(klass->GetDexCache(), method_index, sizeof(void*)); + break; + case kSuper: + case kVirtual: + inlined_method = + klass->FindVirtualMethod(klass->GetDexCache(), method_index, sizeof(void*)); + break; + default: + LOG(FATAL) << "Unimplemented inlined invocation type: " << invoke_type; + UNREACHABLE(); + } } - return class_linker->ResolveMethod<ClassLinker::kNoICCECheckForCache>( - *outer_method->GetDexFile(), method_index, dex_cache, class_loader, nullptr, invoke_type); + // Update the dex cache for future lookups. Note that for static methods, this is safe + // when the class is being initialized, as the entrypoint for the ArtMethod is at + // this point still the resolution trampoline. + outer_method->SetDexCacheResolvedMethod(method_index, inlined_method, sizeof(void*)); + return inlined_method; } inline ArtMethod* GetCalleeSaveMethodCaller(Thread* self, Runtime::CalleeSaveType type) diff --git a/runtime/gc/allocation_record.cc b/runtime/gc/allocation_record.cc index d9f1507afe..6489a396d6 100644 --- a/runtime/gc/allocation_record.cc +++ b/runtime/gc/allocation_record.cc @@ -187,7 +187,7 @@ class AllocRecordStackVisitor : public StackVisitor { public: AllocRecordStackVisitor(Thread* thread, size_t max_depth, AllocRecordStackTrace* trace_out) SHARED_REQUIRES(Locks::mutator_lock_) - : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFramesNoResolve), + : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames), max_depth_(max_depth), trace_(trace_out) {} diff --git a/runtime/gc/collector/concurrent_copying-inl.h b/runtime/gc/collector/concurrent_copying-inl.h index 64fa4344d6..301111251a 100644 --- a/runtime/gc/collector/concurrent_copying-inl.h +++ b/runtime/gc/collector/concurrent_copying-inl.h @@ -28,7 +28,7 @@ namespace art { namespace gc { namespace collector { -inline mirror::Object* ConcurrentCopying::MarkUnevacFromSpaceRegionOrImmuneSpace( +inline mirror::Object* ConcurrentCopying::MarkUnevacFromSpaceRegion( mirror::Object* ref, accounting::ContinuousSpaceBitmap* bitmap) { // For the Baker-style RB, in a rare case, we could incorrectly change the object from white // to gray even though the object has already been marked through. This happens if a mutator @@ -69,6 +69,37 @@ inline mirror::Object* ConcurrentCopying::MarkUnevacFromSpaceRegionOrImmuneSpace return ref; } +template<bool kGrayImmuneObject> +inline mirror::Object* ConcurrentCopying::MarkImmuneSpace(mirror::Object* ref) { + if (kUseBakerReadBarrier) { + // The GC-running thread doesn't (need to) gray immune objects except when updating thread roots + // in the thread flip on behalf of suspended threads (when gc_grays_immune_objects_ is + // true). Also, a mutator doesn't (need to) gray an immune object after GC has updated all + // immune space objects (when updated_all_immune_objects_ is true). + if (kIsDebugBuild) { + if (Thread::Current() == thread_running_gc_) { + DCHECK(!kGrayImmuneObject || + updated_all_immune_objects_.LoadRelaxed() || + gc_grays_immune_objects_); + } else { + DCHECK(kGrayImmuneObject); + } + } + if (!kGrayImmuneObject || updated_all_immune_objects_.LoadRelaxed()) { + return ref; + } + // This may or may not succeed, which is ok because the object may already be gray. + bool success = ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(), + ReadBarrier::GrayPtr()); + if (success) { + MutexLock mu(Thread::Current(), immune_gray_stack_lock_); + immune_gray_stack_.push_back(ref); + } + } + return ref; +} + +template<bool kGrayImmuneObject> inline mirror::Object* ConcurrentCopying::Mark(mirror::Object* from_ref) { if (from_ref == nullptr) { return nullptr; @@ -109,10 +140,14 @@ inline mirror::Object* ConcurrentCopying::Mark(mirror::Object* from_ref) { return to_ref; } case space::RegionSpace::RegionType::kRegionTypeUnevacFromSpace: { - return MarkUnevacFromSpaceRegionOrImmuneSpace(from_ref, region_space_bitmap_); + return MarkUnevacFromSpaceRegion(from_ref, region_space_bitmap_); } case space::RegionSpace::RegionType::kRegionTypeNone: - return MarkNonMoving(from_ref); + if (immune_spaces_.ContainsObject(from_ref)) { + return MarkImmuneSpace<kGrayImmuneObject>(from_ref); + } else { + return MarkNonMoving(from_ref); + } default: UNREACHABLE(); } diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc index dd750060b8..b7b5aa0059 100644 --- a/runtime/gc/collector/concurrent_copying.cc +++ b/runtime/gc/collector/concurrent_copying.cc @@ -50,14 +50,16 @@ ConcurrentCopying::ConcurrentCopying(Heap* heap, const std::string& name_prefix) mark_stack_lock_("concurrent copying mark stack lock", kMarkSweepMarkStackLock), thread_running_gc_(nullptr), is_marking_(false), is_active_(false), is_asserting_to_space_invariant_(false), + region_space_bitmap_(nullptr), heap_mark_bitmap_(nullptr), live_stack_freeze_size_(0), mark_stack_mode_(kMarkStackModeOff), weak_ref_access_enabled_(true), skipped_blocks_lock_("concurrent copying bytes blocks lock", kMarkSweepMarkStackLock), rb_table_(heap_->GetReadBarrierTable()), - force_evacuate_all_(false) { + force_evacuate_all_(false), + immune_gray_stack_lock_("concurrent copying immune gray stack lock", + kMarkSweepMarkStackLock) { static_assert(space::RegionSpace::kRegionSize == accounting::ReadBarrierTable::kRegionSize, "The region space size and the read barrier table region size must match"); - cc_heap_bitmap_.reset(new accounting::HeapBitmap(heap)); Thread* self = Thread::Current(); { ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_); @@ -139,19 +141,10 @@ void ConcurrentCopying::BindBitmaps() { space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect) { CHECK(space->IsZygoteSpace() || space->IsImageSpace()); immune_spaces_.AddSpace(space); - const char* bitmap_name = space->IsImageSpace() ? "cc image space bitmap" : - "cc zygote space bitmap"; - // TODO: try avoiding using bitmaps for image/zygote to save space. - accounting::ContinuousSpaceBitmap* bitmap = - accounting::ContinuousSpaceBitmap::Create(bitmap_name, space->Begin(), space->Capacity()); - cc_heap_bitmap_->AddContinuousSpaceBitmap(bitmap); - cc_bitmaps_.push_back(bitmap); } else if (space == region_space_) { accounting::ContinuousSpaceBitmap* bitmap = accounting::ContinuousSpaceBitmap::Create("cc region space bitmap", space->Begin(), space->Capacity()); - cc_heap_bitmap_->AddContinuousSpaceBitmap(bitmap); - cc_bitmaps_.push_back(bitmap); region_space_bitmap_ = bitmap; } } @@ -179,6 +172,15 @@ void ConcurrentCopying::InitializePhase() { } else { force_evacuate_all_ = false; } + if (kUseBakerReadBarrier) { + updated_all_immune_objects_.StoreRelaxed(false); + // GC may gray immune objects in the thread flip. + gc_grays_immune_objects_ = true; + if (kIsDebugBuild) { + MutexLock mu(Thread::Current(), immune_gray_stack_lock_); + DCHECK(immune_gray_stack_.empty()); + } + } BindBitmaps(); if (kVerboseMode) { LOG(INFO) << "force_evacuate_all=" << force_evacuate_all_; @@ -303,30 +305,6 @@ void ConcurrentCopying::RecordLiveStackFreezeSize(Thread* self) { live_stack_freeze_size_ = heap_->GetLiveStack()->Size(); } -// Used to visit objects in the immune spaces. -class ConcurrentCopying::ImmuneSpaceObjVisitor { - public: - explicit ImmuneSpaceObjVisitor(ConcurrentCopying* cc) : collector_(cc) {} - - void operator()(mirror::Object* obj) const SHARED_REQUIRES(Locks::mutator_lock_) - SHARED_REQUIRES(Locks::heap_bitmap_lock_) { - DCHECK(obj != nullptr); - DCHECK(collector_->immune_spaces_.ContainsObject(obj)); - accounting::ContinuousSpaceBitmap* cc_bitmap = - collector_->cc_heap_bitmap_->GetContinuousSpaceBitmap(obj); - DCHECK(cc_bitmap != nullptr) - << "An immune space object must have a bitmap"; - if (kIsDebugBuild) { - DCHECK(collector_->heap_->GetMarkBitmap()->Test(obj)) - << "Immune space object must be already marked"; - } - collector_->MarkUnevacFromSpaceRegionOrImmuneSpace(obj, cc_bitmap); - } - - private: - ConcurrentCopying* const collector_; -}; - class EmptyCheckpoint : public Closure { public: explicit EmptyCheckpoint(ConcurrentCopying* concurrent_copying) @@ -347,6 +325,27 @@ class EmptyCheckpoint : public Closure { ConcurrentCopying* const concurrent_copying_; }; +// Used to visit objects in the immune spaces. +inline void ConcurrentCopying::ScanImmuneObject(mirror::Object* obj) { + DCHECK(obj != nullptr); + DCHECK(immune_spaces_.ContainsObject(obj)); + // Update the fields without graying it or pushing it onto the mark stack. + Scan(obj); +} + +class ConcurrentCopying::ImmuneSpaceScanObjVisitor { + public: + explicit ImmuneSpaceScanObjVisitor(ConcurrentCopying* cc) + : collector_(cc) {} + + void operator()(mirror::Object* obj) const SHARED_REQUIRES(Locks::mutator_lock_) { + collector_->ScanImmuneObject(obj); + } + + private: + ConcurrentCopying* const collector_; +}; + // Concurrently mark roots that are guarded by read barriers and process the mark stack. void ConcurrentCopying::MarkingPhase() { TimingLogger::ScopedTiming split("MarkingPhase", GetTimings()); @@ -354,25 +353,46 @@ void ConcurrentCopying::MarkingPhase() { LOG(INFO) << "GC MarkingPhase"; } CHECK(weak_ref_access_enabled_); - { - // Mark the image root. The WB-based collectors do not need to - // scan the image objects from roots by relying on the card table, - // but it's necessary for the RB to-space invariant to hold. - TimingLogger::ScopedTiming split1("VisitImageRoots", GetTimings()); - for (space::ContinuousSpace* space : heap_->GetContinuousSpaces()) { - if (space->IsImageSpace()) { - gc::space::ImageSpace* image = space->AsImageSpace(); - if (image != nullptr) { - mirror::ObjectArray<mirror::Object>* image_root = image->GetImageHeader().GetImageRoots(); - mirror::Object* marked_image_root = Mark(image_root); - CHECK_EQ(image_root, marked_image_root) << "An image object does not move"; - if (ReadBarrier::kEnableToSpaceInvariantChecks) { - AssertToSpaceInvariant(nullptr, MemberOffset(0), marked_image_root); - } - } - } + + // Scan immune spaces. + // Update all the fields in the immune spaces first without graying the objects so that we + // minimize dirty pages in the immune spaces. Note mutators can concurrently access and gray some + // of the objects. + if (kUseBakerReadBarrier) { + gc_grays_immune_objects_ = false; + } + for (auto& space : immune_spaces_.GetSpaces()) { + DCHECK(space->IsImageSpace() || space->IsZygoteSpace()); + accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap(); + ImmuneSpaceScanObjVisitor visitor(this); + live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(space->Begin()), + reinterpret_cast<uintptr_t>(space->Limit()), + visitor); + } + if (kUseBakerReadBarrier) { + // This release fence makes the field updates in the above loop visible before allowing mutator + // getting access to immune objects without graying it first. + updated_all_immune_objects_.StoreRelease(true); + // Now whiten immune objects concurrently accessed and grayed by mutators. We can't do this in + // the above loop because we would incorrectly disable the read barrier by whitening an object + // which may point to an unscanned, white object, breaking the to-space invariant. + // + // Make sure no mutators are in the middle of marking an immune object before whitening immune + // objects. + IssueEmptyCheckpoint(); + MutexLock mu(Thread::Current(), immune_gray_stack_lock_); + if (kVerboseMode) { + LOG(INFO) << "immune gray stack size=" << immune_gray_stack_.size(); } + for (mirror::Object* obj : immune_gray_stack_) { + DCHECK(obj->GetReadBarrierPointer() == ReadBarrier::GrayPtr()); + bool success = obj->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(), + ReadBarrier::WhitePtr()); + DCHECK(success); + } + immune_gray_stack_.clear(); } + { TimingLogger::ScopedTiming split2("VisitConcurrentRoots", GetTimings()); Runtime::Current()->VisitConcurrentRoots(this, kVisitRootFlagAllRoots); @@ -383,16 +403,6 @@ void ConcurrentCopying::MarkingPhase() { Runtime::Current()->VisitNonThreadRoots(this); } - // Immune spaces. - for (auto& space : immune_spaces_.GetSpaces()) { - DCHECK(space->IsImageSpace() || space->IsZygoteSpace()); - accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap(); - ImmuneSpaceObjVisitor visitor(this); - live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(space->Begin()), - reinterpret_cast<uintptr_t>(space->Limit()), - visitor); - } - Thread* self = Thread::Current(); { TimingLogger::ScopedTiming split7("ProcessMarkStack", GetTimings()); @@ -1239,6 +1249,9 @@ void ConcurrentCopying::ReclaimPhase() { IssueEmptyCheckpoint(); // Disable the check. is_mark_stack_push_disallowed_.StoreSequentiallyConsistent(0); + if (kUseBakerReadBarrier) { + updated_all_immune_objects_.StoreSequentiallyConsistent(false); + } CheckEmptyMarkStack(); } @@ -1288,13 +1301,9 @@ void ConcurrentCopying::ReclaimPhase() { SwapBitmaps(); heap_->UnBindBitmaps(); - // Remove bitmaps for the immune spaces. - while (!cc_bitmaps_.empty()) { - accounting::ContinuousSpaceBitmap* cc_bitmap = cc_bitmaps_.back(); - cc_heap_bitmap_->RemoveContinuousSpaceBitmap(cc_bitmap); - delete cc_bitmap; - cc_bitmaps_.pop_back(); - } + // Delete the region bitmap. + DCHECK(region_space_bitmap_ != nullptr); + delete region_space_bitmap_; region_space_bitmap_ = nullptr; } @@ -1410,15 +1419,6 @@ void ConcurrentCopying::LogFromSpaceRefHolder(mirror::Object* obj, MemberOffset // In a non-moving space. if (immune_spaces_.ContainsObject(obj)) { LOG(INFO) << "holder is in an immune image or the zygote space."; - accounting::ContinuousSpaceBitmap* cc_bitmap = - cc_heap_bitmap_->GetContinuousSpaceBitmap(obj); - CHECK(cc_bitmap != nullptr) - << "An immune space object must have a bitmap."; - if (cc_bitmap->Test(obj)) { - LOG(INFO) << "holder is marked in the bit map."; - } else { - LOG(INFO) << "holder is NOT marked in the bit map."; - } } else { LOG(INFO) << "holder is in a non-immune, non-moving (or main) space."; accounting::ContinuousSpaceBitmap* mark_bitmap = @@ -1449,17 +1449,17 @@ void ConcurrentCopying::AssertToSpaceInvariantInNonMovingSpace(mirror::Object* o mirror::Object* ref) { // In a non-moving spaces. Check that the ref is marked. if (immune_spaces_.ContainsObject(ref)) { - accounting::ContinuousSpaceBitmap* cc_bitmap = - cc_heap_bitmap_->GetContinuousSpaceBitmap(ref); - CHECK(cc_bitmap != nullptr) - << "An immune space ref must have a bitmap. " << ref; if (kUseBakerReadBarrier) { - CHECK(cc_bitmap->Test(ref)) + // Immune object may not be gray if called from the GC. + if (Thread::Current() == thread_running_gc_ && !gc_grays_immune_objects_) { + return; + } + bool updated_all_immune_objects = updated_all_immune_objects_.LoadSequentiallyConsistent(); + CHECK(updated_all_immune_objects || ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr()) << "Unmarked immune space ref. obj=" << obj << " rb_ptr=" - << obj->GetReadBarrierPointer() << " ref=" << ref; - } else { - CHECK(cc_bitmap->Test(ref)) - << "Unmarked immune space ref. obj=" << obj << " ref=" << ref; + << (obj != nullptr ? obj->GetReadBarrierPointer() : nullptr) + << " ref=" << ref << " ref rb_ptr=" << ref->GetReadBarrierPointer() + << " updated_all_immune_objects=" << updated_all_immune_objects; } } else { accounting::ContinuousSpaceBitmap* mark_bitmap = @@ -1510,7 +1510,7 @@ class ConcurrentCopying::RefFieldsVisitor { void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const ALWAYS_INLINE SHARED_REQUIRES(Locks::mutator_lock_) { - collector_->MarkRoot(root); + collector_->MarkRoot</*kGrayImmuneObject*/false>(root); } private: @@ -1520,6 +1520,7 @@ class ConcurrentCopying::RefFieldsVisitor { // Scan ref fields of an object. inline void ConcurrentCopying::Scan(mirror::Object* to_ref) { DCHECK(!region_space_->IsInFromSpace(to_ref)); + DCHECK_EQ(Thread::Current(), thread_running_gc_); RefFieldsVisitor visitor(this); // Disable the read barrier for a performance reason. to_ref->VisitReferences</*kVisitNativeRoots*/true, kDefaultVerifyFlags, kWithoutReadBarrier>( @@ -1528,9 +1529,10 @@ inline void ConcurrentCopying::Scan(mirror::Object* to_ref) { // Process a field. inline void ConcurrentCopying::Process(mirror::Object* obj, MemberOffset offset) { + DCHECK_EQ(Thread::Current(), thread_running_gc_); mirror::Object* ref = obj->GetFieldObject< mirror::Object, kVerifyNone, kWithoutReadBarrier, false>(offset); - mirror::Object* to_ref = Mark(ref); + mirror::Object* to_ref = Mark</*kGrayImmuneObject*/false>(ref); if (to_ref == ref) { return; } @@ -1569,10 +1571,11 @@ inline void ConcurrentCopying::VisitRoots( } } +template<bool kGrayImmuneObject> inline void ConcurrentCopying::MarkRoot(mirror::CompressedReference<mirror::Object>* root) { DCHECK(!root->IsNull()); mirror::Object* const ref = root->AsMirrorPtr(); - mirror::Object* to_ref = Mark(ref); + mirror::Object* to_ref = Mark<kGrayImmuneObject>(ref); if (to_ref != ref) { auto* addr = reinterpret_cast<Atomic<mirror::CompressedReference<mirror::Object>>*>(root); auto expected_ref = mirror::CompressedReference<mirror::Object>::FromMirrorPtr(ref); @@ -1593,14 +1596,46 @@ inline void ConcurrentCopying::VisitRoots( for (size_t i = 0; i < count; ++i) { mirror::CompressedReference<mirror::Object>* const root = roots[i]; if (!root->IsNull()) { - MarkRoot(root); + // kGrayImmuneObject is true because this is used for the thread flip. + MarkRoot</*kGrayImmuneObject*/true>(root); } } } +// Temporary set gc_grays_immune_objects_ to true in a scope if the current thread is GC. +class ConcurrentCopying::ScopedGcGraysImmuneObjects { + public: + explicit ScopedGcGraysImmuneObjects(ConcurrentCopying* collector) + : collector_(collector), enabled_(false) { + if (kUseBakerReadBarrier && + collector_->thread_running_gc_ == Thread::Current() && + !collector_->gc_grays_immune_objects_) { + collector_->gc_grays_immune_objects_ = true; + enabled_ = true; + } + } + + ~ScopedGcGraysImmuneObjects() { + if (kUseBakerReadBarrier && + collector_->thread_running_gc_ == Thread::Current() && + enabled_) { + DCHECK(collector_->gc_grays_immune_objects_); + collector_->gc_grays_immune_objects_ = false; + } + } + + private: + ConcurrentCopying* const collector_; + bool enabled_; +}; + // Fill the given memory block with a dummy object. Used to fill in a // copy of objects that was lost in race. void ConcurrentCopying::FillWithDummyObject(mirror::Object* dummy_obj, size_t byte_size) { + // GC doesn't gray immune objects while scanning immune objects. But we need to trigger the read + // barriers here because we need the updated reference to the int array class, etc. Temporary set + // gc_grays_immune_objects_ to true so that we won't cause a DCHECK failure in MarkImmuneSpace(). + ScopedGcGraysImmuneObjects scoped_gc_gray_immune_objects(this); CHECK_ALIGNED(byte_size, kObjectAlignment); memset(dummy_obj, 0, byte_size); mirror::Class* int_array_class = mirror::IntArray::GetArrayClass(); @@ -1836,21 +1871,8 @@ mirror::Object* ConcurrentCopying::IsMarked(mirror::Object* from_ref) { } else { // from_ref is in a non-moving space. if (immune_spaces_.ContainsObject(from_ref)) { - accounting::ContinuousSpaceBitmap* cc_bitmap = - cc_heap_bitmap_->GetContinuousSpaceBitmap(from_ref); - DCHECK(cc_bitmap != nullptr) - << "An immune space object must have a bitmap"; - if (kIsDebugBuild) { - DCHECK(heap_mark_bitmap_->GetContinuousSpaceBitmap(from_ref)->Test(from_ref)) - << "Immune space object must be already marked"; - } - if (cc_bitmap->Test(from_ref)) { - // Already marked. - to_ref = from_ref; - } else { - // Newly marked. - to_ref = nullptr; - } + // An immune object is alive. + to_ref = from_ref; } else { // Non-immune non-moving space. Use the mark bitmap. accounting::ContinuousSpaceBitmap* mark_bitmap = @@ -1889,85 +1911,74 @@ bool ConcurrentCopying::IsOnAllocStack(mirror::Object* ref) { mirror::Object* ConcurrentCopying::MarkNonMoving(mirror::Object* ref) { // ref is in a non-moving space (from_ref == to_ref). DCHECK(!region_space_->HasAddress(ref)) << ref; - if (immune_spaces_.ContainsObject(ref)) { - accounting::ContinuousSpaceBitmap* cc_bitmap = - cc_heap_bitmap_->GetContinuousSpaceBitmap(ref); - DCHECK(cc_bitmap != nullptr) - << "An immune space object must have a bitmap"; - if (kIsDebugBuild) { - DCHECK(heap_mark_bitmap_->GetContinuousSpaceBitmap(ref)->Test(ref)) - << "Immune space object must be already marked"; + DCHECK(!immune_spaces_.ContainsObject(ref)); + // Use the mark bitmap. + accounting::ContinuousSpaceBitmap* mark_bitmap = + heap_mark_bitmap_->GetContinuousSpaceBitmap(ref); + accounting::LargeObjectBitmap* los_bitmap = + heap_mark_bitmap_->GetLargeObjectBitmap(ref); + CHECK(los_bitmap != nullptr) << "LOS bitmap covers the entire address range"; + bool is_los = mark_bitmap == nullptr; + if (!is_los && mark_bitmap->Test(ref)) { + // Already marked. + if (kUseBakerReadBarrier) { + DCHECK(ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr() || + ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr()); + } + } else if (is_los && los_bitmap->Test(ref)) { + // Already marked in LOS. + if (kUseBakerReadBarrier) { + DCHECK(ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr() || + ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr()); } - MarkUnevacFromSpaceRegionOrImmuneSpace(ref, cc_bitmap); } else { - // Use the mark bitmap. - accounting::ContinuousSpaceBitmap* mark_bitmap = - heap_mark_bitmap_->GetContinuousSpaceBitmap(ref); - accounting::LargeObjectBitmap* los_bitmap = - heap_mark_bitmap_->GetLargeObjectBitmap(ref); - CHECK(los_bitmap != nullptr) << "LOS bitmap covers the entire address range"; - bool is_los = mark_bitmap == nullptr; - if (!is_los && mark_bitmap->Test(ref)) { - // Already marked. - if (kUseBakerReadBarrier) { - DCHECK(ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr() || - ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr()); + // Not marked. + if (IsOnAllocStack(ref)) { + // If it's on the allocation stack, it's considered marked. Keep it white. + // Objects on the allocation stack need not be marked. + if (!is_los) { + DCHECK(!mark_bitmap->Test(ref)); + } else { + DCHECK(!los_bitmap->Test(ref)); } - } else if (is_los && los_bitmap->Test(ref)) { - // Already marked in LOS. if (kUseBakerReadBarrier) { - DCHECK(ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr() || - ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr()); + DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::WhitePtr()); } } else { - // Not marked. - if (IsOnAllocStack(ref)) { - // If it's on the allocation stack, it's considered marked. Keep it white. - // Objects on the allocation stack need not be marked. - if (!is_los) { - DCHECK(!mark_bitmap->Test(ref)); - } else { - DCHECK(!los_bitmap->Test(ref)); + // For the baker-style RB, we need to handle 'false-gray' cases. See the + // kRegionTypeUnevacFromSpace-case comment in Mark(). + if (kUseBakerReadBarrier) { + // Test the bitmap first to reduce the chance of false gray cases. + if ((!is_los && mark_bitmap->Test(ref)) || + (is_los && los_bitmap->Test(ref))) { + return ref; } - if (kUseBakerReadBarrier) { - DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::WhitePtr()); + } + // Not marked or on the allocation stack. Try to mark it. + // This may or may not succeed, which is ok. + bool cas_success = false; + if (kUseBakerReadBarrier) { + cas_success = ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(), + ReadBarrier::GrayPtr()); + } + if (!is_los && mark_bitmap->AtomicTestAndSet(ref)) { + // Already marked. + if (kUseBakerReadBarrier && cas_success && + ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr()) { + PushOntoFalseGrayStack(ref); } - } else { - // For the baker-style RB, we need to handle 'false-gray' cases. See the - // kRegionTypeUnevacFromSpace-case comment in Mark(). - if (kUseBakerReadBarrier) { - // Test the bitmap first to reduce the chance of false gray cases. - if ((!is_los && mark_bitmap->Test(ref)) || - (is_los && los_bitmap->Test(ref))) { - return ref; - } + } else if (is_los && los_bitmap->AtomicTestAndSet(ref)) { + // Already marked in LOS. + if (kUseBakerReadBarrier && cas_success && + ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr()) { + PushOntoFalseGrayStack(ref); } - // Not marked or on the allocation stack. Try to mark it. - // This may or may not succeed, which is ok. - bool cas_success = false; + } else { + // Newly marked. if (kUseBakerReadBarrier) { - cas_success = ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(), - ReadBarrier::GrayPtr()); - } - if (!is_los && mark_bitmap->AtomicTestAndSet(ref)) { - // Already marked. - if (kUseBakerReadBarrier && cas_success && - ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr()) { - PushOntoFalseGrayStack(ref); - } - } else if (is_los && los_bitmap->AtomicTestAndSet(ref)) { - // Already marked in LOS. - if (kUseBakerReadBarrier && cas_success && - ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr()) { - PushOntoFalseGrayStack(ref); - } - } else { - // Newly marked. - if (kUseBakerReadBarrier) { - DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::GrayPtr()); - } - PushOntoMarkStack(ref); + DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::GrayPtr()); } + PushOntoMarkStack(ref); } } } diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h index a986a7a1db..166a1f0b2a 100644 --- a/runtime/gc/collector/concurrent_copying.h +++ b/runtime/gc/collector/concurrent_copying.h @@ -61,10 +61,12 @@ class ConcurrentCopying : public GarbageCollector { ConcurrentCopying(Heap* heap, const std::string& name_prefix = ""); ~ConcurrentCopying(); - virtual void RunPhases() OVERRIDE REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_); - void InitializePhase() SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_); + virtual void RunPhases() OVERRIDE + REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_); + void InitializePhase() SHARED_REQUIRES(Locks::mutator_lock_) + REQUIRES(!mark_stack_lock_, !immune_gray_stack_lock_); void MarkingPhase() SHARED_REQUIRES(Locks::mutator_lock_) - REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_); + REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_); void ReclaimPhase() SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_); void FinishPhase() REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_); @@ -92,8 +94,9 @@ class ConcurrentCopying : public GarbageCollector { DCHECK(ref != nullptr); return IsMarked(ref) == ref; } + template<bool kGrayImmuneObject = true> ALWAYS_INLINE mirror::Object* Mark(mirror::Object* from_ref) SHARED_REQUIRES(Locks::mutator_lock_) - REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_); + REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_); bool IsMarking() const { return is_marking_; } @@ -117,16 +120,19 @@ class ConcurrentCopying : public GarbageCollector { void Scan(mirror::Object* to_ref) SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_); void Process(mirror::Object* obj, MemberOffset offset) - SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_ , !skipped_blocks_lock_); + SHARED_REQUIRES(Locks::mutator_lock_) + REQUIRES(!mark_stack_lock_ , !skipped_blocks_lock_, !immune_gray_stack_lock_); virtual void VisitRoots(mirror::Object*** roots, size_t count, const RootInfo& info) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) - REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_); + REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_); + template<bool kGrayImmuneObject> void MarkRoot(mirror::CompressedReference<mirror::Object>* root) - SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_); + SHARED_REQUIRES(Locks::mutator_lock_) + REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_); virtual void VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count, const RootInfo& info) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) - REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_); + REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_); void VerifyNoFromSpaceReferences() REQUIRES(Locks::mutator_lock_); accounting::ObjectStack* GetAllocationStack(); accounting::ObjectStack* GetLiveStack(); @@ -146,9 +152,11 @@ class ConcurrentCopying : public GarbageCollector { SHARED_REQUIRES(Locks::mutator_lock_); void ProcessReferences(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_); virtual mirror::Object* MarkObject(mirror::Object* from_ref) OVERRIDE - SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_); + SHARED_REQUIRES(Locks::mutator_lock_) + REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_); virtual void MarkHeapReference(mirror::HeapReference<mirror::Object>* from_ref) OVERRIDE - SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_); + SHARED_REQUIRES(Locks::mutator_lock_) + REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_); virtual mirror::Object* IsMarked(mirror::Object* from_ref) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_); virtual bool IsMarkedHeapReference(mirror::HeapReference<mirror::Object>* field) OVERRIDE @@ -182,14 +190,19 @@ class ConcurrentCopying : public GarbageCollector { void ExpandGcMarkStack() SHARED_REQUIRES(Locks::mutator_lock_); mirror::Object* MarkNonMoving(mirror::Object* from_ref) SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_); - ALWAYS_INLINE mirror::Object* MarkUnevacFromSpaceRegionOrImmuneSpace(mirror::Object* from_ref, + ALWAYS_INLINE mirror::Object* MarkUnevacFromSpaceRegion(mirror::Object* from_ref, accounting::SpaceBitmap<kObjectAlignment>* bitmap) SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_); + template<bool kGrayImmuneObject> + ALWAYS_INLINE mirror::Object* MarkImmuneSpace(mirror::Object* from_ref) + SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!immune_gray_stack_lock_); void PushOntoFalseGrayStack(mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_); void ProcessFalseGrayStack() SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_); + void ScanImmuneObject(mirror::Object* obj) + SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_); space::RegionSpace* region_space_; // The underlying region space. std::unique_ptr<Barrier> gc_barrier_; @@ -207,8 +220,6 @@ class ConcurrentCopying : public GarbageCollector { bool is_active_; // True while the collection is ongoing. bool is_asserting_to_space_invariant_; // True while asserting the to-space invariant. ImmuneSpaces immune_spaces_; - std::unique_ptr<accounting::HeapBitmap> cc_heap_bitmap_; - std::vector<accounting::SpaceBitmap<kObjectAlignment>*> cc_bitmaps_; accounting::SpaceBitmap<kObjectAlignment>* region_space_bitmap_; // A cache of Heap::GetMarkBitmap(). accounting::HeapBitmap* heap_mark_bitmap_; @@ -242,6 +253,10 @@ class ConcurrentCopying : public GarbageCollector { accounting::ReadBarrierTable* rb_table_; bool force_evacuate_all_; // True if all regions are evacuated. + Atomic<bool> updated_all_immune_objects_; + bool gc_grays_immune_objects_; + Mutex immune_gray_stack_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; + std::vector<mirror::Object*> immune_gray_stack_ GUARDED_BY(immune_gray_stack_lock_); class AssertToSpaceInvariantFieldVisitor; class AssertToSpaceInvariantObjectVisitor; @@ -250,14 +265,15 @@ class ConcurrentCopying : public GarbageCollector { class ComputeUnevacFromSpaceLiveRatioVisitor; class DisableMarkingCheckpoint; class FlipCallback; - class ImmuneSpaceObjVisitor; + class ImmuneSpaceScanObjVisitor; class LostCopyVisitor; class RefFieldsVisitor; class RevokeThreadLocalMarkStackCheckpoint; + class ScopedGcGraysImmuneObjects; + class ThreadFlipVisitor; class VerifyNoFromSpaceRefsFieldVisitor; class VerifyNoFromSpaceRefsObjectVisitor; class VerifyNoFromSpaceRefsVisitor; - class ThreadFlipVisitor; DISALLOW_IMPLICIT_CONSTRUCTORS(ConcurrentCopying); }; diff --git a/runtime/monitor.cc b/runtime/monitor.cc index 71c866f3d6..396c946014 100644 --- a/runtime/monitor.cc +++ b/runtime/monitor.cc @@ -220,7 +220,7 @@ void Monitor::SetObject(mirror::Object* object) { struct NthCallerWithDexPcVisitor FINAL : public StackVisitor { explicit NthCallerWithDexPcVisitor(Thread* thread, size_t frame) SHARED_REQUIRES(Locks::mutator_lock_) - : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFramesNoResolve), + : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames), method_(nullptr), dex_pc_(0), current_frame_number_(0), diff --git a/runtime/stack.cc b/runtime/stack.cc index a5ca527aa2..1d913f2222 100644 --- a/runtime/stack.cc +++ b/runtime/stack.cc @@ -131,16 +131,10 @@ ArtMethod* StackVisitor::GetMethod() const { const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader(); CodeInfoEncoding encoding = method_header->GetOptimizedCodeInfo().ExtractEncoding(); DCHECK(walk_kind_ != StackWalkKind::kSkipInlinedFrames); - bool allow_resolve = walk_kind_ != StackWalkKind::kIncludeInlinedFramesNoResolve; - return allow_resolve - ? GetResolvedMethod<true>(*GetCurrentQuickFrame(), - inline_info, - encoding.inline_info_encoding, - depth_in_stack_map) - : GetResolvedMethod<false>(*GetCurrentQuickFrame(), - inline_info, - encoding.inline_info_encoding, - depth_in_stack_map); + return GetResolvedMethod(*GetCurrentQuickFrame(), + inline_info, + encoding.inline_info_encoding, + depth_in_stack_map); } else { return *cur_quick_frame_; } @@ -791,8 +785,7 @@ void StackVisitor::WalkStack(bool include_transitions) { cur_oat_quick_method_header_ = method->GetOatQuickMethodHeader(cur_quick_frame_pc_); SanityCheckFrame(); - if ((walk_kind_ == StackWalkKind::kIncludeInlinedFrames || - walk_kind_ == StackWalkKind::kIncludeInlinedFramesNoResolve) + if ((walk_kind_ == StackWalkKind::kIncludeInlinedFrames) && (cur_oat_quick_method_header_ != nullptr) && cur_oat_quick_method_header_->IsOptimized()) { CodeInfo code_info = cur_oat_quick_method_header_->GetOptimizedCodeInfo(); diff --git a/runtime/stack.h b/runtime/stack.h index e77ab4647e..84b665effa 100644 --- a/runtime/stack.h +++ b/runtime/stack.h @@ -568,7 +568,6 @@ class StackVisitor { // when walking the stack. enum class StackWalkKind { kIncludeInlinedFrames, - kIncludeInlinedFramesNoResolve, kSkipInlinedFrames, }; diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc index 97bcb7d406..16ef0fff22 100644 --- a/runtime/thread_list.cc +++ b/runtime/thread_list.cc @@ -613,11 +613,7 @@ void ThreadList::SuspendAllInternal(Thread* self, PLOG(FATAL) << "futex wait failed for SuspendAllInternal()"; } } - } else { - cur_val = pending_threads.LoadRelaxed(); - CHECK_EQ(cur_val, 0); - break; - } + } // else re-check pending_threads in the next iteration (this may be a spurious wake-up). #else // Spin wait. This is likely to be slow, but on most architecture ART_USE_FUTEXES is set. #endif diff --git a/test/458-checker-instruction-simplification/smali/SmaliTests.smali b/test/458-checker-instruction-simplification/smali/SmaliTests.smali index ede599b213..6845961f39 100644 --- a/test/458-checker-instruction-simplification/smali/SmaliTests.smali +++ b/test/458-checker-instruction-simplification/smali/SmaliTests.smali @@ -191,3 +191,139 @@ .end method +## CHECK-START: int SmaliTests.AddSubConst(int) instruction_simplifier (before) +## CHECK-DAG: <<ArgValue:i\d+>> ParameterValue +## CHECK-DAG: <<Const7:i\d+>> IntConstant 7 +## CHECK-DAG: <<Const8:i\d+>> IntConstant 8 +## CHECK-DAG: <<Add:i\d+>> Add [<<ArgValue>>,<<Const7>>] +## CHECK-DAG: <<Sub:i\d+>> Sub [<<Add>>,<<Const8>>] +## CHECK-DAG: Return [<<Sub>>] + +## CHECK-START: int SmaliTests.AddSubConst(int) instruction_simplifier (after) +## CHECK-DAG: <<ArgValue:i\d+>> ParameterValue +## CHECK-DAG: <<ConstM1:i\d+>> IntConstant -1 +## CHECK-DAG: <<Add:i\d+>> Add [<<ArgValue>>,<<ConstM1>>] +## CHECK-DAG: Return [<<Add>>] + +.method public static AddSubConst(I)I + .registers 3 + + .prologue + add-int/lit8 v0, p0, 7 + + const/16 v1, 8 + + sub-int v0, v0, v1 + + return v0 +.end method + +## CHECK-START: int SmaliTests.SubAddConst(int) instruction_simplifier (before) +## CHECK-DAG: <<ArgValue:i\d+>> ParameterValue +## CHECK-DAG: <<Const3:i\d+>> IntConstant 3 +## CHECK-DAG: <<Const4:i\d+>> IntConstant 4 +## CHECK-DAG: <<Sub:i\d+>> Sub [<<ArgValue>>,<<Const3>>] +## CHECK-DAG: <<Add:i\d+>> Add [<<Sub>>,<<Const4>>] +## CHECK-DAG: Return [<<Add>>] + +## CHECK-START: int SmaliTests.SubAddConst(int) instruction_simplifier (after) +## CHECK-DAG: <<ArgValue:i\d+>> ParameterValue +## CHECK-DAG: <<Const1:i\d+>> IntConstant 1 +## CHECK-DAG: <<Add:i\d+>> Add [<<ArgValue>>,<<Const1>>] +## CHECK-DAG: Return [<<Add>>] + +.method public static SubAddConst(I)I + .registers 2 + + .prologue + const/4 v0, 3 + + sub-int v0, p0, v0 + + add-int/lit8 v0, v0, 4 + + return v0 +.end method + +## CHECK-START: int SmaliTests.SubSubConst1(int) instruction_simplifier (before) +## CHECK-DAG: <<ArgValue:i\d+>> ParameterValue +## CHECK-DAG: <<Const9:i\d+>> IntConstant 9 +## CHECK-DAG: <<Const10:i\d+>> IntConstant 10 +## CHECK-DAG: <<Sub1:i\d+>> Sub [<<ArgValue>>,<<Const9>>] +## CHECK-DAG: <<Sub2:i\d+>> Sub [<<Sub1>>,<<Const10>>] +## CHECK-DAG: Return [<<Sub2>>] + +## CHECK-START: int SmaliTests.SubSubConst1(int) instruction_simplifier (after) +## CHECK-DAG: <<ArgValue:i\d+>> ParameterValue +## CHECK-DAG: <<ConstM19:i\d+>> IntConstant -19 +## CHECK-DAG: <<Add:i\d+>> Add [<<ArgValue>>,<<ConstM19>>] +## CHECK-DAG: Return [<<Add>>] + +.method public static SubSubConst1(I)I + .registers 3 + + .prologue + const/16 v1, 9 + + sub-int v0, p0, v1 + + const/16 v1, 10 + + sub-int v0, v0, v1 + + return v0 +.end method + +## CHECK-START: int SmaliTests.SubSubConst2(int) instruction_simplifier (before) +## CHECK-DAG: <<ArgValue:i\d+>> ParameterValue +## CHECK-DAG: <<Const11:i\d+>> IntConstant 11 +## CHECK-DAG: <<Const12:i\d+>> IntConstant 12 +## CHECK-DAG: <<Sub1:i\d+>> Sub [<<Const11>>,<<ArgValue>>] +## CHECK-DAG: <<Sub2:i\d+>> Sub [<<Sub1>>,<<Const12>>] +## CHECK-DAG: Return [<<Sub2>>] + +## CHECK-START: int SmaliTests.SubSubConst2(int) instruction_simplifier (after) +## CHECK-DAG: <<ArgValue:i\d+>> ParameterValue +## CHECK-DAG: <<ConstM1:i\d+>> IntConstant -1 +## CHECK-DAG: <<Sub:i\d+>> Sub [<<ConstM1>>,<<ArgValue>>] +## CHECK-DAG: Return [<<Sub>>] + +.method public static SubSubConst2(I)I + .registers 3 + + .prologue + rsub-int/lit8 v0, p0, 11 + + const/16 v1, 12 + + sub-int v0, v0, v1 + + return v0 +.end method + +## CHECK-START: int SmaliTests.SubSubConst3(int) instruction_simplifier (before) +## CHECK-DAG: <<ArgValue:i\d+>> ParameterValue +## CHECK-DAG: <<Const15:i\d+>> IntConstant 15 +## CHECK-DAG: <<Const16:i\d+>> IntConstant 16 +## CHECK-DAG: <<Sub1:i\d+>> Sub [<<ArgValue>>,<<Const16>>] +## CHECK-DAG: <<Sub2:i\d+>> Sub [<<Const15>>,<<Sub1>>] +## CHECK-DAG: Return [<<Sub2>>] + +## CHECK-START: int SmaliTests.SubSubConst3(int) instruction_simplifier (after) +## CHECK-DAG: <<ArgValue:i\d+>> ParameterValue +## CHECK-DAG: <<Const31:i\d+>> IntConstant 31 +## CHECK-DAG: <<Sub:i\d+>> Sub [<<Const31>>,<<ArgValue>>] +## CHECK-DAG: Return [<<Sub>>] + +.method public static SubSubConst3(I)I + .registers 2 + + .prologue + const/16 v0, 16 + + sub-int v0, p0, v0 + + rsub-int/lit8 v0, v0, 15 + + return v0 +.end method diff --git a/test/458-checker-instruction-simplification/src/Main.java b/test/458-checker-instruction-simplification/src/Main.java index ffce49d2e1..c717eaa85e 100644 --- a/test/458-checker-instruction-simplification/src/Main.java +++ b/test/458-checker-instruction-simplification/src/Main.java @@ -78,6 +78,29 @@ public class Main { return 0 + arg; } + /// CHECK-START: int Main.$noinline$AddAddSubAddConst(int) instruction_simplifier (before) + /// CHECK-DAG: <<ArgValue:i\d+>> ParameterValue + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 + /// CHECK-DAG: <<Const2:i\d+>> IntConstant 2 + /// CHECK-DAG: <<ConstM3:i\d+>> IntConstant -3 + /// CHECK-DAG: <<Const4:i\d+>> IntConstant 4 + /// CHECK-DAG: <<Add1:i\d+>> Add [<<ArgValue>>,<<Const1>>] + /// CHECK-DAG: <<Add2:i\d+>> Add [<<Add1>>,<<Const2>>] + /// CHECK-DAG: <<Add3:i\d+>> Add [<<Add2>>,<<ConstM3>>] + /// CHECK-DAG: <<Add4:i\d+>> Add [<<Add3>>,<<Const4>>] + /// CHECK-DAG: Return [<<Add4>>] + + /// CHECK-START: int Main.$noinline$AddAddSubAddConst(int) instruction_simplifier (after) + /// CHECK-DAG: <<ArgValue:i\d+>> ParameterValue + /// CHECK-DAG: <<Const4:i\d+>> IntConstant 4 + /// CHECK-DAG: <<Add:i\d+>> Add [<<ArgValue>>,<<Const4>>] + /// CHECK-DAG: Return [<<Add>>] + + public static int $noinline$AddAddSubAddConst(int arg) { + if (doThrow) { throw new Error(); } + return arg + 1 + 2 - 3 + 4; + } + /// CHECK-START: int Main.$noinline$AndAllOnes(int) instruction_simplifier (before) /// CHECK-DAG: <<Arg:i\d+>> ParameterValue /// CHECK-DAG: <<ConstF:i\d+>> IntConstant -1 @@ -364,6 +387,27 @@ public class Main { return arg * 128; } + /// CHECK-START: long Main.$noinline$MulMulMulConst(long) instruction_simplifier (before) + /// CHECK-DAG: <<ArgValue:j\d+>> ParameterValue + /// CHECK-DAG: <<Const10:j\d+>> LongConstant 10 + /// CHECK-DAG: <<Const11:j\d+>> LongConstant 11 + /// CHECK-DAG: <<Const12:j\d+>> LongConstant 12 + /// CHECK-DAG: <<Mul1:j\d+>> Mul [<<Const10>>,<<ArgValue>>] + /// CHECK-DAG: <<Mul2:j\d+>> Mul [<<Mul1>>,<<Const11>>] + /// CHECK-DAG: <<Mul3:j\d+>> Mul [<<Mul2>>,<<Const12>>] + /// CHECK-DAG: Return [<<Mul3>>] + + /// CHECK-START: long Main.$noinline$MulMulMulConst(long) instruction_simplifier (after) + /// CHECK-DAG: <<ArgValue:j\d+>> ParameterValue + /// CHECK-DAG: <<Const1320:j\d+>> LongConstant 1320 + /// CHECK-DAG: <<Mul:j\d+>> Mul [<<ArgValue>>,<<Const1320>>] + /// CHECK-DAG: Return [<<Mul>>] + + public static long $noinline$MulMulMulConst(long arg) { + if (doThrow) { throw new Error(); } + return 10 * arg * 11 * 12; + } + /// CHECK-START: int Main.$noinline$Or0(int) instruction_simplifier (before) /// CHECK-DAG: <<Arg:i\d+>> ParameterValue /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 @@ -490,6 +534,63 @@ public class Main { return 0 - arg; } + /// CHECK-START: int Main.$noinline$SubAddConst1(int) instruction_simplifier (before) + /// CHECK-DAG: <<ArgValue:i\d+>> ParameterValue + /// CHECK-DAG: <<Const5:i\d+>> IntConstant 5 + /// CHECK-DAG: <<Const6:i\d+>> IntConstant 6 + /// CHECK-DAG: <<Sub:i\d+>> Sub [<<Const5>>,<<ArgValue>>] + /// CHECK-DAG: <<Add:i\d+>> Add [<<Sub>>,<<Const6>>] + /// CHECK-DAG: Return [<<Add>>] + + /// CHECK-START: int Main.$noinline$SubAddConst1(int) instruction_simplifier (after) + /// CHECK-DAG: <<ArgValue:i\d+>> ParameterValue + /// CHECK-DAG: <<Const11:i\d+>> IntConstant 11 + /// CHECK-DAG: <<Sub:i\d+>> Sub [<<Const11>>,<<ArgValue>>] + /// CHECK-DAG: Return [<<Sub>>] + + public static int $noinline$SubAddConst1(int arg) { + if (doThrow) { throw new Error(); } + return 5 - arg + 6; + } + + /// CHECK-START: int Main.$noinline$SubAddConst2(int) instruction_simplifier (before) + /// CHECK-DAG: <<ArgValue:i\d+>> ParameterValue + /// CHECK-DAG: <<Const14:i\d+>> IntConstant 14 + /// CHECK-DAG: <<Const13:i\d+>> IntConstant 13 + /// CHECK-DAG: <<Add:i\d+>> Add [<<ArgValue>>,<<Const13>>] + /// CHECK-DAG: <<Sub:i\d+>> Sub [<<Const14>>,<<Add>>] + /// CHECK-DAG: Return [<<Sub>>] + + /// CHECK-START: int Main.$noinline$SubAddConst2(int) instruction_simplifier (after) + /// CHECK-DAG: <<ArgValue:i\d+>> ParameterValue + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 + /// CHECK-DAG: <<Sub:i\d+>> Sub [<<Const1>>,<<ArgValue>>] + /// CHECK-DAG: Return [<<Sub>>] + + public static int $noinline$SubAddConst2(int arg) { + if (doThrow) { throw new Error(); } + return 14 - (arg + 13); + } + + /// CHECK-START: long Main.$noinline$SubSubConst(long) instruction_simplifier (before) + /// CHECK-DAG: <<ArgValue:j\d+>> ParameterValue + /// CHECK-DAG: <<Const17:j\d+>> LongConstant 17 + /// CHECK-DAG: <<Const18:j\d+>> LongConstant 18 + /// CHECK-DAG: <<Sub1:j\d+>> Sub [<<Const18>>,<<ArgValue>>] + /// CHECK-DAG: <<Sub2:j\d+>> Sub [<<Const17>>,<<Sub1>>] + /// CHECK-DAG: Return [<<Sub2>>] + + /// CHECK-START: long Main.$noinline$SubSubConst(long) instruction_simplifier (after) + /// CHECK-DAG: <<ArgValue:j\d+>> ParameterValue + /// CHECK-DAG: <<ConstM1:j\d+>> LongConstant -1 + /// CHECK-DAG: <<Add:j\d+>> Add [<<ArgValue>>,<<ConstM1>>] + /// CHECK-DAG: Return [<<Add>>] + + public static long $noinline$SubSubConst(long arg) { + if (doThrow) { throw new Error(); } + return 17 - (18 - arg); + } + /// CHECK-START: long Main.$noinline$UShr0(long) instruction_simplifier (before) /// CHECK-DAG: <<Arg:j\d+>> ParameterValue /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 @@ -1757,6 +1858,17 @@ public class Main { } } + public static int $noinline$runSmaliTestConst(String name, int arg) { + if (doThrow) { throw new Error(); } + try { + Class<?> c = Class.forName("SmaliTests"); + Method m = c.getMethod(name, int.class); + return (Integer) m.invoke(null, arg); + } catch (Exception ex) { + throw new Error(ex); + } + } + /// CHECK-START: int Main.$noinline$intUnnecessaryShiftMasking(int, int) instruction_simplifier (before) /// CHECK: <<Value:i\d+>> ParameterValue /// CHECK: <<Shift:i\d+>> ParameterValue @@ -1863,12 +1975,14 @@ public static void main(String[] args) { int arg = 123456; assertLongEquals(arg, $noinline$Add0(arg)); + assertIntEquals(5, $noinline$AddAddSubAddConst(1)); assertIntEquals(arg, $noinline$AndAllOnes(arg)); assertLongEquals(arg, $noinline$Div1(arg)); assertIntEquals(-arg, $noinline$DivN1(arg)); assertLongEquals(arg, $noinline$Mul1(arg)); assertIntEquals(-arg, $noinline$MulN1(arg)); assertLongEquals((128 * arg), $noinline$MulPowerOfTwo128(arg)); + assertLongEquals(2640, $noinline$MulMulMulConst(2)); assertIntEquals(arg, $noinline$Or0(arg)); assertLongEquals(arg, $noinline$OrSame(arg)); assertIntEquals(arg, $noinline$Shl0(arg)); @@ -1876,6 +1990,9 @@ public static void main(String[] args) { assertLongEquals(arg, $noinline$Shr64(arg)); assertLongEquals(arg, $noinline$Sub0(arg)); assertIntEquals(-arg, $noinline$SubAliasNeg(arg)); + assertIntEquals(9, $noinline$SubAddConst1(2)); + assertIntEquals(-2, $noinline$SubAddConst2(3)); + assertLongEquals(3, $noinline$SubSubConst(4)); assertLongEquals(arg, $noinline$UShr0(arg)); assertIntEquals(arg, $noinline$Xor0(arg)); assertIntEquals(~arg, $noinline$XorAllOnes(arg)); @@ -2011,6 +2128,11 @@ public static void main(String[] args) { } } + assertIntEquals(0, $noinline$runSmaliTestConst("AddSubConst", 1)); + assertIntEquals(3, $noinline$runSmaliTestConst("SubAddConst", 2)); + assertIntEquals(-16, $noinline$runSmaliTestConst("SubSubConst1", 3)); + assertIntEquals(-5, $noinline$runSmaliTestConst("SubSubConst2", 4)); + assertIntEquals(26, $noinline$runSmaliTestConst("SubSubConst3", 5)); assertIntEquals(0x5e6f7808, $noinline$intUnnecessaryShiftMasking(0xabcdef01, 3)); assertIntEquals(0x5e6f7808, $noinline$intUnnecessaryShiftMasking(0xabcdef01, 3 + 32)); assertLongEquals(0xffffffffffffeaf3L, $noinline$longUnnecessaryShiftMasking(0xabcdef0123456789L, 50)); diff --git a/test/530-checker-loops1/src/Main.java b/test/530-checker-loops1/src/Main.java index 948a7b7dcc..dde4d62475 100644 --- a/test/530-checker-loops1/src/Main.java +++ b/test/530-checker-loops1/src/Main.java @@ -562,7 +562,7 @@ public class Main { // /// CHECK-START: void Main.linearTriangularOnTwoArrayLengths(int) BCE (after) /// CHECK-NOT: BoundsCheck - // TODO: also CHECK-NOT: Deoptimize, see b/27151190 + /// CHECK-NOT: Deoptimize private static void linearTriangularOnTwoArrayLengths(int n) { int[] a = new int[n]; for (int i = 0; i < a.length; i++) { @@ -604,7 +604,7 @@ public class Main { // /// CHECK-START: void Main.linearTriangularOnParameter(int) BCE (after) /// CHECK-NOT: BoundsCheck - // TODO: also CHECK-NOT: Deoptimize, see b/27151190 + /// CHECK-NOT: Deoptimize private static void linearTriangularOnParameter(int n) { int[] a = new int[n]; for (int i = 0; i < n; i++) { @@ -619,56 +619,56 @@ public class Main { } } - /// CHECK-START: void Main.linearTriangularVariationsInnerStrict(int) BCE (before) + /// CHECK-START: void Main.linearTriangularStrictLower(int) BCE (before) /// CHECK-DAG: BoundsCheck /// CHECK-DAG: BoundsCheck /// CHECK-DAG: BoundsCheck /// CHECK-DAG: BoundsCheck // - /// CHECK-START: void Main.linearTriangularVariationsInnerStrict(int) BCE (after) + /// CHECK-START: void Main.linearTriangularStrictLower(int) BCE (after) /// CHECK-NOT: BoundsCheck - // TODO: also CHECK-NOT: Deoptimize, see b/27151190 - private static void linearTriangularVariationsInnerStrict(int n) { + /// CHECK-NOT: Deoptimize + private static void linearTriangularStrictLower(int n) { int[] a = new int[n]; for (int i = 0; i < n; i++) { for (int j = 0; j < i; j++) { a[j] += 1; } - for (int j = i - 1; j > -1; j--) { + for (int j = i - 1; j >= 0; j--) { a[j] += 1; } for (int j = i; j < n; j++) { a[j] += 1; } - for (int j = n - 1; j > i - 1; j--) { + for (int j = n - 1; j >= i; j--) { a[j] += 1; } } verifyTriangular(a); } - /// CHECK-START: void Main.linearTriangularVariationsInnerNonStrict(int) BCE (before) + /// CHECK-START: void Main.linearTriangularStrictUpper(int) BCE (before) /// CHECK-DAG: BoundsCheck /// CHECK-DAG: BoundsCheck /// CHECK-DAG: BoundsCheck /// CHECK-DAG: BoundsCheck // - /// CHECK-START: void Main.linearTriangularVariationsInnerNonStrict(int) BCE (after) + /// CHECK-START: void Main.linearTriangularStrictUpper(int) BCE (after) /// CHECK-NOT: BoundsCheck - // TODO: also CHECK-NOT: Deoptimize, see b/27151190 - private static void linearTriangularVariationsInnerNonStrict(int n) { + /// CHECK-NOT: Deoptimize + private static void linearTriangularStrictUpper(int n) { int[] a = new int[n]; for (int i = 0; i < n; i++) { - for (int j = 0; j <= i - 1; j++) { + for (int j = 0; j <= i; j++) { a[j] += 1; } - for (int j = i - 1; j >= 0; j--) { + for (int j = i; j >= 0; j--) { a[j] += 1; } - for (int j = i; j <= n - 1; j++) { + for (int j = i + 1; j < n; j++) { a[j] += 1; } - for (int j = n - 1; j >= i; j--) { + for (int j = n - 1; j >= i + 1; j--) { a[j] += 1; } } @@ -802,8 +802,8 @@ public class Main { linearTriangularOnTwoArrayLengths(10); linearTriangularOnOneArrayLength(10); linearTriangularOnParameter(10); - linearTriangularVariationsInnerStrict(10); - linearTriangularVariationsInnerNonStrict(10); + linearTriangularStrictLower(10); + linearTriangularStrictUpper(10); { int[] t = linearTriangularOOB(); for (int i = 0; i < 200; i++) { diff --git a/test/530-checker-loops2/src/Main.java b/test/530-checker-loops2/src/Main.java index b12fbd6091..7acf0080f8 100644 --- a/test/530-checker-loops2/src/Main.java +++ b/test/530-checker-loops2/src/Main.java @@ -31,7 +31,7 @@ public class Main { // /// CHECK-START: void Main.bubble(int[]) BCE (after) /// CHECK-NOT: BoundsCheck - // TODO: also CHECK-NOT: Deoptimize, see b/27151190 + /// CHECK-NOT: Deoptimize private static void bubble(int[] a) { for (int i = a.length; --i >= 0;) { for (int j = 0; j < i; j++) { @@ -301,6 +301,53 @@ public class Main { } while (-1 <= i); } + /// CHECK-START: void Main.justRightTriangular1() BCE (before) + /// CHECK-DAG: BoundsCheck + // + /// CHECK-START: void Main.justRightTriangular1() BCE (after) + /// CHECK-NOT: BoundsCheck + /// CHECK-NOT: Deoptimize + private static void justRightTriangular1() { + int[] a = { 1 } ; + for (int i = Integer.MIN_VALUE + 5; i <= Integer.MIN_VALUE + 10; i++) { + for (int j = Integer.MIN_VALUE + 4; j < i - 5; j++) { + sResult += a[j - (Integer.MIN_VALUE + 4)]; + } + } + } + + /// CHECK-START: void Main.justRightTriangular2() BCE (before) + /// CHECK-DAG: BoundsCheck + // + /// CHECK-START: void Main.justRightTriangular2() BCE (after) + /// CHECK-NOT: BoundsCheck + /// CHECK-NOT: Deoptimize + private static void justRightTriangular2() { + int[] a = { 1 } ; + for (int i = Integer.MIN_VALUE + 5; i <= 10; i++) { + for (int j = 4; j < i - 5; j++) { + sResult += a[j - 4]; + } + } + } + + /// CHECK-START: void Main.justOOBTriangular() BCE (before) + /// CHECK-DAG: BoundsCheck + // + /// CHECK-START: void Main.justOOBTriangular() BCE (after) + /// CHECK-DAG: Deoptimize + // + /// CHECK-START: void Main.justOOBTriangular() BCE (after) + /// CHECK-NOT: BoundsCheck + private static void justOOBTriangular() { + int[] a = { 1 } ; + for (int i = Integer.MIN_VALUE + 4; i <= 10; i++) { + for (int j = 4; j < i - 5; j++) { + sResult += a[j - 4]; + } + } + } + /// CHECK-START: void Main.hiddenOOB1(int) BCE (before) /// CHECK-DAG: BoundsCheck // @@ -315,7 +362,6 @@ public class Main { // Dangerous loop where careless static range analysis would yield strict upper bound // on index j of 5. When, for instance, lo and thus i = -2147483648, the upper bound // becomes really positive due to arithmetic wrap-around, causing OOB. - // Dynamic BCE is feasible though, since it checks the range. for (int j = 4; j < i - 5; j++) { sResult += a[j - 4]; } @@ -336,13 +382,32 @@ public class Main { // Dangerous loop where careless static range analysis would yield strict lower bound // on index j of 5. When, for instance, hi and thus i = 2147483647, the upper bound // becomes really negative due to arithmetic wrap-around, causing OOB. - // Dynamic BCE is feasible though, since it checks the range. for (int j = 6; j > i + 5; j--) { sResult += a[j - 6]; } } } + /// CHECK-START: void Main.hiddenOOB3(int) BCE (before) + /// CHECK-DAG: BoundsCheck + // + /// CHECK-START: void Main.hiddenOOB3(int) BCE (after) + /// CHECK-DAG: Deoptimize + // + /// CHECK-START: void Main.hiddenOOB3(int) BCE (after) + /// CHECK-NOT: BoundsCheck + private static void hiddenOOB3(int hi) { + int[] a = { 11 } ; + for (int i = -1; i <= hi; i++) { + // Dangerous loop where careless static range analysis would yield strict lower bound + // on index j of 0. For large i, the initial value of j becomes really negative due + // to arithmetic wrap-around, causing OOB. + for (int j = i + 1; j < 1; j++) { + sResult += a[j]; + } + } + } + /// CHECK-START: void Main.hiddenInfiniteOOB() BCE (before) /// CHECK-DAG: BoundsCheck // @@ -376,7 +441,6 @@ public class Main { for (int i = -1; i <= 0; i++) { // Dangerous loop similar as above where the loop is now finite, but the // loop still goes out of bounds for i = -1 due to the large upper bound. - // Dynamic BCE is feasible though, since it checks the range. for (int j = -4; j < 2147483646 * i - 3; j++) { sResult += a[j + 4]; } @@ -432,6 +496,25 @@ public class Main { } } + /// CHECK-START: int Main.doNotHoist(int[]) BCE (before) + /// CHECK-DAG: BoundsCheck + // + /// CHECK-START: int Main.doNotHoist(int[]) BCE (after) + /// CHECK-NOT: BoundsCheck + public static int doNotHoist(int[] a) { + int n = a.length; + int x = 0; + // BCE applies, but hoisting would crash the loop. + for (int i = -10000; i < 10000; i++) { + for (int j = 0; j <= 1; j++) { + if (0 <= i && i < n) + x += a[i]; + } + } + return x; + } + + /// CHECK-START: int[] Main.add() BCE (before) /// CHECK-DAG: BoundsCheck // @@ -687,7 +770,7 @@ public class Main { /// CHECK-START: int Main.dynamicBCEAndConstantIndices(int[], int[][], int, int) BCE (after) // Order matters: /// CHECK: Deoptimize loop:<<Loop:B\d+>> - // CHECK-NOT: Goto loop:<<Loop>> + /// CHECK-NOT: Goto loop:<<Loop>> /// CHECK-DAG: {{l\d+}} ArrayGet loop:<<Loop>> /// CHECK-DAG: {{l\d+}} ArrayGet loop:<<Loop>> /// CHECK-DAG: {{l\d+}} ArrayGet loop:<<Loop>> @@ -839,6 +922,8 @@ public class Main { expectEquals(55, justRightDown1()); expectEquals(55, justRightDown2()); expectEquals(55, justRightDown3()); + + // Large bounds OOB. sResult = 0; try { justOOBUp(); @@ -890,6 +975,23 @@ public class Main { } expectEquals(1055, sResult); + // Triangular. + sResult = 0; + justRightTriangular1(); + expectEquals(1, sResult); + if (HEAVY) { + sResult = 0; + justRightTriangular2(); + expectEquals(1, sResult); + } + sResult = 0; + try { + justOOBTriangular(); + } catch (ArrayIndexOutOfBoundsException e) { + sResult += 1000; + } + expectEquals(1001, sResult); + // Hidden OOB. sResult = 0; try { @@ -912,6 +1014,15 @@ public class Main { sResult += 1000; } expectEquals(1, sResult); + sResult = 0; + try { + hiddenOOB3(-1); // no OOB + } catch (ArrayIndexOutOfBoundsException e) { + sResult += 1000; + } + expectEquals(11, sResult); + + // Expensive hidden OOB test. if (HEAVY) { sResult = 0; try { @@ -920,7 +1031,16 @@ public class Main { sResult += 1000; } expectEquals(1002, sResult); + sResult = 0; + try { + hiddenOOB3(2147483647); // OOB + } catch (ArrayIndexOutOfBoundsException e) { + sResult += 1000; + } + expectEquals(1011, sResult); } + + // More hidden OOB. sResult = 0; try { hiddenInfiniteOOB(); @@ -966,6 +1086,9 @@ public class Main { expectEquals(i < 128 ? i : 0, a200[i]); } + // No hoisting after BCE. + expectEquals(110, doNotHoist(x)); + // Addition. { int[] e1 ={ 1, 2, 3, 4, 4, 4, 4, 3, 2, 1 }; diff --git a/tools/ahat/src/AhatSnapshot.java b/tools/ahat/src/AhatSnapshot.java index d088e8c43f..e6f8411c90 100644 --- a/tools/ahat/src/AhatSnapshot.java +++ b/tools/ahat/src/AhatSnapshot.java @@ -16,6 +16,7 @@ package com.android.ahat; +import com.android.tools.perflib.captures.MemoryMappedFileBuffer; import com.android.tools.perflib.heap.ClassObj; import com.android.tools.perflib.heap.Heap; import com.android.tools.perflib.heap.Instance; @@ -24,9 +25,11 @@ import com.android.tools.perflib.heap.RootType; import com.android.tools.perflib.heap.Snapshot; import com.android.tools.perflib.heap.StackFrame; import com.android.tools.perflib.heap.StackTrace; -import com.android.tools.perflib.captures.MemoryMappedFileBuffer; + import com.google.common.collect.Lists; + import gnu.trove.TObjectProcedure; + import java.io.File; import java.io.IOException; import java.util.ArrayList; diff --git a/tools/ahat/src/InstanceUtils.java b/tools/ahat/src/InstanceUtils.java index 8defba2647..3cdb40cc6d 100644 --- a/tools/ahat/src/InstanceUtils.java +++ b/tools/ahat/src/InstanceUtils.java @@ -19,9 +19,10 @@ package com.android.ahat; import com.android.tools.perflib.heap.ArrayInstance; import com.android.tools.perflib.heap.ClassInstance; import com.android.tools.perflib.heap.ClassObj; -import com.android.tools.perflib.heap.Instance; import com.android.tools.perflib.heap.Heap; +import com.android.tools.perflib.heap.Instance; import com.android.tools.perflib.heap.Type; + import java.awt.image.BufferedImage; /** @@ -42,11 +43,11 @@ class InstanceUtils { * Returns null if the instance is not a byte array. */ private static byte[] asByteArray(Instance inst) { - if (! (inst instanceof ArrayInstance)) { + if (!(inst instanceof ArrayInstance)) { return null; } - ArrayInstance array = (ArrayInstance)inst; + ArrayInstance array = (ArrayInstance) inst; if (array.getArrayType() != Type.BYTE) { return null; } @@ -54,7 +55,7 @@ class InstanceUtils { Object[] objs = array.getValues(); byte[] bytes = new byte[objs.length]; for (int i = 0; i < objs.length; i++) { - Byte b = (Byte)objs[i]; + Byte b = (Byte) objs[i]; bytes[i] = b.byteValue(); } return bytes; @@ -143,10 +144,10 @@ class InstanceUtils { int[] abgr = new int[height * width]; for (int i = 0; i < abgr.length; i++) { abgr[i] = ( - (((int)buffer[i * 4 + 3] & 0xFF) << 24) + - (((int)buffer[i * 4 + 0] & 0xFF) << 16) + - (((int)buffer[i * 4 + 1] & 0xFF) << 8) + - ((int)buffer[i * 4 + 2] & 0xFF)); + (((int) buffer[i * 4 + 3] & 0xFF) << 24) + + (((int) buffer[i * 4 + 0] & 0xFF) << 16) + + (((int) buffer[i * 4 + 1] & 0xFF) << 8) + + ((int) buffer[i * 4 + 2] & 0xFF)); } BufferedImage bitmap = new BufferedImage( @@ -185,7 +186,7 @@ class InstanceUtils { if (!(value instanceof Instance)) { return null; } - return (Instance)value; + return (Instance) value; } /** @@ -199,7 +200,7 @@ class InstanceUtils { if (!(value instanceof Integer)) { return def; } - return (Integer)value; + return (Integer) value; } /** @@ -213,7 +214,7 @@ class InstanceUtils { if (!(value instanceof Long)) { return def; } - return (Long)value; + return (Long) value; } /** @@ -226,7 +227,7 @@ class InstanceUtils { if (!(value instanceof Instance)) { return null; } - return asByteArray((Instance)value); + return asByteArray((Instance) value); } // Return the bitmap instance associated with this object, or null if there @@ -243,7 +244,7 @@ class InstanceUtils { } if (inst instanceof ArrayInstance) { - ArrayInstance array = (ArrayInstance)inst; + ArrayInstance array = (ArrayInstance) inst; if (array.getArrayType() == Type.BYTE && inst.getHardReverseReferences().size() == 1) { Instance ref = inst.getHardReverseReferences().get(0); ClassObj clsref = ref.getClassObj(); @@ -323,10 +324,10 @@ class InstanceUtils { // Note: We know inst as an instance of ClassInstance because we already // read the nativePtr field from it. Instance registry = null; - for (ClassInstance.FieldValue field : ((ClassInstance)inst).getValues()) { + for (ClassInstance.FieldValue field : ((ClassInstance) inst).getValues()) { Object fieldValue = field.getValue(); if (fieldValue instanceof Instance) { - Instance fieldInst = (Instance)fieldValue; + Instance fieldInst = (Instance) fieldValue; if (isInstanceOfClass(fieldInst, "libcore.util.NativeAllocationRegistry")) { registry = fieldInst; break; diff --git a/tools/ahat/src/Site.java b/tools/ahat/src/Site.java index d504096314..dbb84f600e 100644 --- a/tools/ahat/src/Site.java +++ b/tools/ahat/src/Site.java @@ -20,6 +20,7 @@ import com.android.tools.perflib.heap.ClassObj; import com.android.tools.perflib.heap.Heap; import com.android.tools.perflib.heap.Instance; import com.android.tools.perflib.heap.StackFrame; + import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; diff --git a/tools/ahat/src/Sort.java b/tools/ahat/src/Sort.java index c5f89c315a..8a3d9f2053 100644 --- a/tools/ahat/src/Sort.java +++ b/tools/ahat/src/Sort.java @@ -16,13 +16,14 @@ package com.android.ahat; -import com.android.tools.perflib.heap.Instance; import com.android.tools.perflib.heap.Heap; +import com.android.tools.perflib.heap.Instance; + import java.util.ArrayList; import java.util.Arrays; import java.util.Comparator; -import java.util.List; import java.util.Iterator; +import java.util.List; /** * Provides Comparators and helper functions for sorting Instances, Sites, and |