diff options
34 files changed, 2042 insertions, 168 deletions
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk index 424aa7a7eb..009933d2b7 100644 --- a/build/Android.gtest.mk +++ b/build/Android.gtest.mk @@ -318,6 +318,8 @@ COMPILER_GTEST_COMMON_SRC_FILES_arm64 := \ compiler/utils/arm64/managed_register_arm64_test.cc \ COMPILER_GTEST_COMMON_SRC_FILES_mips := \ + compiler/linker/mips/relative_patcher_mips_test.cc \ + compiler/linker/mips/relative_patcher_mips32r6_test.cc \ COMPILER_GTEST_COMMON_SRC_FILES_mips64 := \ diff --git a/compiler/Android.mk b/compiler/Android.mk index e9c22d2b0f..4ec7d721f3 100644 --- a/compiler/Android.mk +++ b/compiler/Android.mk @@ -113,8 +113,11 @@ LIBART_COMPILER_SRC_FILES_arm64 := \ LIBART_COMPILER_SRC_FILES_mips := \ jni/quick/mips/calling_convention_mips.cc \ + linker/mips/relative_patcher_mips.cc \ optimizing/code_generator_mips.cc \ + optimizing/dex_cache_array_fixups_mips.cc \ optimizing/intrinsics_mips.cc \ + optimizing/pc_relative_fixups_mips.cc \ utils/mips/assembler_mips.cc \ utils/mips/managed_register_mips.cc \ diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index e52dda35bb..474530a033 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -394,7 +394,7 @@ CompilerDriver::CompilerDriver( dump_passes_(dump_passes), timings_logger_(timer), compiler_context_(nullptr), - support_boot_image_fixup_(instruction_set != kMips && instruction_set != kMips64), + support_boot_image_fixup_(instruction_set != kMips64), dex_files_for_oat_file_(nullptr), compiled_method_storage_(swap_fd), profile_compilation_info_(profile_compilation_info), diff --git a/compiler/linker/mips/relative_patcher_mips.cc b/compiler/linker/mips/relative_patcher_mips.cc new file mode 100644 index 0000000000..7c0423b635 --- /dev/null +++ b/compiler/linker/mips/relative_patcher_mips.cc @@ -0,0 +1,109 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "linker/mips/relative_patcher_mips.h" + +#include "compiled_method.h" + +namespace art { +namespace linker { + +uint32_t MipsRelativePatcher::ReserveSpace( + uint32_t offset, + const CompiledMethod* compiled_method ATTRIBUTE_UNUSED, + MethodReference method_ref ATTRIBUTE_UNUSED) { + return offset; // No space reserved; no limit on relative call distance. +} + +uint32_t MipsRelativePatcher::ReserveSpaceEnd(uint32_t offset) { + return offset; // No space reserved; no limit on relative call distance. +} + +uint32_t MipsRelativePatcher::WriteThunks(OutputStream* out ATTRIBUTE_UNUSED, uint32_t offset) { + return offset; // No thunks added; no limit on relative call distance. +} + +void MipsRelativePatcher::PatchCall(std::vector<uint8_t>* code ATTRIBUTE_UNUSED, + uint32_t literal_offset ATTRIBUTE_UNUSED, + uint32_t patch_offset ATTRIBUTE_UNUSED, + uint32_t target_offset ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL) << "PatchCall unimplemented on MIPS"; +} + +void MipsRelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code, + const LinkerPatch& patch, + uint32_t patch_offset, + uint32_t target_offset) { + uint32_t anchor_literal_offset = patch.PcInsnOffset(); + uint32_t literal_offset = patch.LiteralOffset(); + + // Basic sanity checks. + if (is_r6) { + DCHECK_GE(code->size(), 8u); + DCHECK_LE(literal_offset, code->size() - 8u); + DCHECK_EQ(literal_offset, anchor_literal_offset); + // AUIPC reg, offset_high + DCHECK_EQ((*code)[literal_offset + 0], 0x34); + DCHECK_EQ((*code)[literal_offset + 1], 0x12); + DCHECK_EQ(((*code)[literal_offset + 2] & 0x1F), 0x1E); + DCHECK_EQ(((*code)[literal_offset + 3] & 0xFC), 0xEC); + // ADDIU reg, reg, offset_low + DCHECK_EQ((*code)[literal_offset + 4], 0x78); + DCHECK_EQ((*code)[literal_offset + 5], 0x56); + DCHECK_EQ(((*code)[literal_offset + 7] & 0xFC), 0x24); + } else { + DCHECK_GE(code->size(), 16u); + DCHECK_LE(literal_offset, code->size() - 12u); + DCHECK_GE(literal_offset, 4u); + DCHECK_EQ(literal_offset + 4u, anchor_literal_offset); + // NAL + DCHECK_EQ((*code)[literal_offset - 4], 0x00); + DCHECK_EQ((*code)[literal_offset - 3], 0x00); + DCHECK_EQ((*code)[literal_offset - 2], 0x10); + DCHECK_EQ((*code)[literal_offset - 1], 0x04); + // LUI reg, offset_high + DCHECK_EQ((*code)[literal_offset + 0], 0x34); + DCHECK_EQ((*code)[literal_offset + 1], 0x12); + DCHECK_EQ(((*code)[literal_offset + 2] & 0xE0), 0x00); + DCHECK_EQ((*code)[literal_offset + 3], 0x3C); + // ORI reg, reg, offset_low + DCHECK_EQ((*code)[literal_offset + 4], 0x78); + DCHECK_EQ((*code)[literal_offset + 5], 0x56); + DCHECK_EQ(((*code)[literal_offset + 7] & 0xFC), 0x34); + // ADDU reg, reg, RA + DCHECK_EQ((*code)[literal_offset + 8], 0x21); + DCHECK_EQ(((*code)[literal_offset + 9] & 0x07), 0x00); + DCHECK_EQ(((*code)[literal_offset + 10] & 0x1F), 0x1F); + DCHECK_EQ(((*code)[literal_offset + 11] & 0xFC), 0x00); + } + + // Apply patch. + uint32_t anchor_offset = patch_offset - literal_offset + anchor_literal_offset; + uint32_t diff = target_offset - anchor_offset + kDexCacheArrayLwOffset; + if (is_r6) { + diff += (diff & 0x8000) << 1; // Account for sign extension in ADDIU. + } + + // LUI reg, offset_high / AUIPC reg, offset_high + (*code)[literal_offset + 0] = static_cast<uint8_t>(diff >> 16); + (*code)[literal_offset + 1] = static_cast<uint8_t>(diff >> 24); + // ORI reg, reg, offset_low / ADDIU reg, reg, offset_low + (*code)[literal_offset + 4] = static_cast<uint8_t>(diff >> 0); + (*code)[literal_offset + 5] = static_cast<uint8_t>(diff >> 8); +} + +} // namespace linker +} // namespace art diff --git a/compiler/linker/mips/relative_patcher_mips.h b/compiler/linker/mips/relative_patcher_mips.h new file mode 100644 index 0000000000..4ff2f2f24f --- /dev/null +++ b/compiler/linker/mips/relative_patcher_mips.h @@ -0,0 +1,57 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_LINKER_MIPS_RELATIVE_PATCHER_MIPS_H_ +#define ART_COMPILER_LINKER_MIPS_RELATIVE_PATCHER_MIPS_H_ + +#include "linker/relative_patcher.h" +#include "arch/mips/instruction_set_features_mips.h" + +namespace art { +namespace linker { + +class MipsRelativePatcher FINAL : public RelativePatcher { + public: + explicit MipsRelativePatcher(const MipsInstructionSetFeatures* features) + : is_r6(features->IsR6()) {} + + uint32_t ReserveSpace(uint32_t offset, + const CompiledMethod* compiled_method, + MethodReference method_ref) OVERRIDE; + uint32_t ReserveSpaceEnd(uint32_t offset) OVERRIDE; + uint32_t WriteThunks(OutputStream* out, uint32_t offset) OVERRIDE; + void PatchCall(std::vector<uint8_t>* code, + uint32_t literal_offset, + uint32_t patch_offset, + uint32_t target_offset) OVERRIDE; + void PatchPcRelativeReference(std::vector<uint8_t>* code, + const LinkerPatch& patch, + uint32_t patch_offset, + uint32_t target_offset) OVERRIDE; + + private: + // We'll maximize the range of a single load instruction for dex cache array accesses + // by aligning offset -32768 with the offset of the first used element. + static constexpr uint32_t kDexCacheArrayLwOffset = 0x8000; + bool is_r6; + + DISALLOW_COPY_AND_ASSIGN(MipsRelativePatcher); +}; + +} // namespace linker +} // namespace art + +#endif // ART_COMPILER_LINKER_MIPS_RELATIVE_PATCHER_MIPS_H_ diff --git a/compiler/linker/mips/relative_patcher_mips32r6_test.cc b/compiler/linker/mips/relative_patcher_mips32r6_test.cc new file mode 100644 index 0000000000..0f1dcbcbf1 --- /dev/null +++ b/compiler/linker/mips/relative_patcher_mips32r6_test.cc @@ -0,0 +1,68 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "linker/relative_patcher_test.h" +#include "linker/mips/relative_patcher_mips.h" + +namespace art { +namespace linker { + +// We'll maximize the range of a single load instruction for dex cache array accesses +// by aligning offset -32768 with the offset of the first used element. +static constexpr uint32_t kDexCacheArrayLwOffset = 0x8000; + +class Mips32r6RelativePatcherTest : public RelativePatcherTest { + public: + Mips32r6RelativePatcherTest() : RelativePatcherTest(kMips, "mips32r6") {} + + protected: + uint32_t GetMethodOffset(uint32_t method_idx) { + auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx)); + CHECK(result.first); + return result.second; + } +}; + +TEST_F(Mips32r6RelativePatcherTest, DexCacheReference) { + dex_cache_arrays_begin_ = 0x12345678; + constexpr size_t kElementOffset = 0x1234; + static const uint8_t raw_code[] = { + 0x34, 0x12, 0x5E, 0xEE, // auipc s2, high(diff); placeholder = 0x1234 + 0x78, 0x56, 0x52, 0x26, // addiu s2, s2, low(diff); placeholder = 0x5678 + }; + constexpr uint32_t literal_offset = 0; // At auipc (where patching starts). + constexpr uint32_t anchor_offset = literal_offset; // At auipc (where PC+0 points). + ArrayRef<const uint8_t> code(raw_code); + LinkerPatch patches[] = { + LinkerPatch::DexCacheArrayPatch(literal_offset, nullptr, anchor_offset, kElementOffset), + }; + AddCompiledMethod(MethodRef(1u), code, ArrayRef<const LinkerPatch>(patches)); + Link(); + + auto result = method_offset_map_.FindMethodOffset(MethodRef(1u)); + ASSERT_TRUE(result.first); + uint32_t diff = dex_cache_arrays_begin_ + kElementOffset - (result.second + anchor_offset) + + kDexCacheArrayLwOffset; + diff += (diff & 0x8000) << 1; // Account for sign extension in addiu. + static const uint8_t expected_code[] = { + static_cast<uint8_t>(diff >> 16), static_cast<uint8_t>(diff >> 24), 0x5E, 0xEE, + static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8), 0x52, 0x26, + }; + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); +} + +} // namespace linker +} // namespace art diff --git a/compiler/linker/mips/relative_patcher_mips_test.cc b/compiler/linker/mips/relative_patcher_mips_test.cc new file mode 100644 index 0000000000..8391b5352a --- /dev/null +++ b/compiler/linker/mips/relative_patcher_mips_test.cc @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "linker/relative_patcher_test.h" +#include "linker/mips/relative_patcher_mips.h" + +namespace art { +namespace linker { + +// We'll maximize the range of a single load instruction for dex cache array accesses +// by aligning offset -32768 with the offset of the first used element. +static constexpr uint32_t kDexCacheArrayLwOffset = 0x8000; + +class MipsRelativePatcherTest : public RelativePatcherTest { + public: + MipsRelativePatcherTest() : RelativePatcherTest(kMips, "mips32r2") {} + + protected: + uint32_t GetMethodOffset(uint32_t method_idx) { + auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx)); + CHECK(result.first); + return result.second; + } +}; + +TEST_F(MipsRelativePatcherTest, DexCacheReference) { + dex_cache_arrays_begin_ = 0x12345678; + constexpr size_t kElementOffset = 0x1234; + static const uint8_t raw_code[] = { + 0x00, 0x00, 0x10, 0x04, // nal + 0x34, 0x12, 0x12, 0x3C, // lui s2, high(diff); placeholder = 0x1234 + 0x78, 0x56, 0x52, 0x36, // ori s2, s2, low(diff); placeholder = 0x5678 + 0x21, 0x90, 0x5F, 0x02, // addu s2, s2, ra + }; + constexpr uint32_t literal_offset = 4; // At lui (where patching starts). + constexpr uint32_t anchor_offset = 8; // At ori (where PC+0 points). + ArrayRef<const uint8_t> code(raw_code); + LinkerPatch patches[] = { + LinkerPatch::DexCacheArrayPatch(literal_offset, nullptr, anchor_offset, kElementOffset), + }; + AddCompiledMethod(MethodRef(1u), code, ArrayRef<const LinkerPatch>(patches)); + Link(); + + auto result = method_offset_map_.FindMethodOffset(MethodRef(1u)); + ASSERT_TRUE(result.first); + uint32_t diff = dex_cache_arrays_begin_ + kElementOffset - (result.second + anchor_offset) + + kDexCacheArrayLwOffset; + static const uint8_t expected_code[] = { + 0x00, 0x00, 0x10, 0x04, + static_cast<uint8_t>(diff >> 16), static_cast<uint8_t>(diff >> 24), 0x12, 0x3C, + static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8), 0x52, 0x36, + 0x21, 0x90, 0x5F, 0x02, + }; + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); +} + +} // namespace linker +} // namespace art diff --git a/compiler/linker/relative_patcher.cc b/compiler/linker/relative_patcher.cc index 3a229831d0..77655947fd 100644 --- a/compiler/linker/relative_patcher.cc +++ b/compiler/linker/relative_patcher.cc @@ -22,6 +22,9 @@ #ifdef ART_ENABLE_CODEGEN_arm64 #include "linker/arm64/relative_patcher_arm64.h" #endif +#ifdef ART_ENABLE_CODEGEN_mips +#include "linker/mips/relative_patcher_mips.h" +#endif #ifdef ART_ENABLE_CODEGEN_x86 #include "linker/x86/relative_patcher_x86.h" #endif @@ -95,6 +98,11 @@ std::unique_ptr<RelativePatcher> RelativePatcher::Create( return std::unique_ptr<RelativePatcher>( new Arm64RelativePatcher(provider, features->AsArm64InstructionSetFeatures())); #endif +#ifdef ART_ENABLE_CODEGEN_mips + case kMips: + return std::unique_ptr<RelativePatcher>( + new MipsRelativePatcher(features->AsMipsInstructionSetFeatures())); +#endif default: return std::unique_ptr<RelativePatcher>(new RelativePatcherNone); } diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 4d44c18dcf..37f1c35c50 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -39,6 +39,10 @@ namespace mips { static constexpr int kCurrentMethodStackOffset = 0; static constexpr Register kMethodRegisterArgument = A0; +// We'll maximize the range of a single load instruction for dex cache array accesses +// by aligning offset -32768 with the offset of the first used element. +static constexpr uint32_t kDexCacheArrayLwOffset = 0x8000; + Location MipsReturnLocation(Primitive::Type return_type) { switch (return_type) { case Primitive::kPrimBoolean: @@ -477,7 +481,12 @@ CodeGeneratorMIPS::CodeGeneratorMIPS(HGraph* graph, instruction_visitor_(graph, this), move_resolver_(graph->GetArena(), this), assembler_(graph->GetArena(), &isa_features), - isa_features_(isa_features) { + isa_features_(isa_features), + method_patches_(MethodReferenceComparator(), + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + call_patches_(MethodReferenceComparator(), + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { // Save RA (containing the return address) to mimic Quick. AddAllocatedRegister(Location::RegisterLocation(RA)); } @@ -948,6 +957,71 @@ void CodeGeneratorMIPS::AddLocationAsTemp(Location location, LocationSummary* lo } } +void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { + DCHECK(linker_patches->empty()); + size_t size = + method_patches_.size() + + call_patches_.size() + + pc_relative_dex_cache_patches_.size(); + linker_patches->reserve(size); + for (const auto& entry : method_patches_) { + const MethodReference& target_method = entry.first; + Literal* literal = entry.second; + DCHECK(literal->GetLabel()->IsBound()); + uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel()); + linker_patches->push_back(LinkerPatch::MethodPatch(literal_offset, + target_method.dex_file, + target_method.dex_method_index)); + } + for (const auto& entry : call_patches_) { + const MethodReference& target_method = entry.first; + Literal* literal = entry.second; + DCHECK(literal->GetLabel()->IsBound()); + uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel()); + linker_patches->push_back(LinkerPatch::CodePatch(literal_offset, + target_method.dex_file, + target_method.dex_method_index)); + } + for (const PcRelativePatchInfo& info : pc_relative_dex_cache_patches_) { + const DexFile& dex_file = info.target_dex_file; + size_t base_element_offset = info.offset_or_index; + DCHECK(info.high_label.IsBound()); + uint32_t high_offset = __ GetLabelLocation(&info.high_label); + DCHECK(info.pc_rel_label.IsBound()); + uint32_t pc_rel_offset = __ GetLabelLocation(&info.pc_rel_label); + linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(high_offset, + &dex_file, + pc_rel_offset, + base_element_offset)); + } +} + +CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeDexCacheArrayPatch( + const DexFile& dex_file, uint32_t element_offset) { + return NewPcRelativePatch(dex_file, element_offset, &pc_relative_dex_cache_patches_); +} + +CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativePatch( + const DexFile& dex_file, uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches) { + patches->emplace_back(dex_file, offset_or_index); + return &patches->back(); +} + +Literal* CodeGeneratorMIPS::DeduplicateMethodLiteral(MethodReference target_method, + MethodToLiteralMap* map) { + return map->GetOrCreate( + target_method, + [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); }); +} + +Literal* CodeGeneratorMIPS::DeduplicateMethodAddressLiteral(MethodReference target_method) { + return DeduplicateMethodLiteral(target_method, &method_patches_); +} + +Literal* CodeGeneratorMIPS::DeduplicateMethodCodeLiteral(MethodReference target_method) { + return DeduplicateMethodLiteral(target_method, &call_patches_); +} + void CodeGeneratorMIPS::MarkGCCard(Register object, Register value) { MipsLabel done; Register card = AT; @@ -3741,12 +3815,38 @@ void LocationsBuilderMIPS::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invo // art::PrepareForRegisterAllocation. DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); + HInvokeStaticOrDirect::MethodLoadKind method_load_kind = invoke->GetMethodLoadKind(); + HInvokeStaticOrDirect::CodePtrLocation code_ptr_location = invoke->GetCodePtrLocation(); + bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); + + // kDirectAddressWithFixup and kCallDirectWithFixup need no extra input on R6 because + // R6 has PC-relative addressing. + bool has_extra_input = !isR6 && + ((method_load_kind == HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup) || + (code_ptr_location == HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup)); + + if (invoke->HasPcRelativeDexCache()) { + // kDexCachePcRelative is mutually exclusive with + // kDirectAddressWithFixup/kCallDirectWithFixup. + CHECK(!has_extra_input); + has_extra_input = true; + } + IntrinsicLocationsBuilderMIPS intrinsic(codegen_); if (intrinsic.TryDispatch(invoke)) { + if (invoke->GetLocations()->CanCall() && has_extra_input) { + invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any()); + } return; } HandleInvoke(invoke); + + // Add the extra input register if either the dex cache array base register + // or the PC-relative base register for accessing literals is needed. + if (has_extra_input) { + invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister()); + } } static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorMIPS* codegen) { @@ -3771,42 +3871,103 @@ HLoadClass::LoadKind CodeGeneratorMIPS::GetSupportedLoadClassKind( return HLoadClass::LoadKind::kDexCacheViaMethod; } +Register CodeGeneratorMIPS::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, + Register temp) { + CHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u); + Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); + if (!invoke->GetLocations()->Intrinsified()) { + return location.AsRegister<Register>(); + } + // For intrinsics we allow any location, so it may be on the stack. + if (!location.IsRegister()) { + __ LoadFromOffset(kLoadWord, temp, SP, location.GetStackIndex()); + return temp; + } + // For register locations, check if the register was saved. If so, get it from the stack. + // Note: There is a chance that the register was saved but not overwritten, so we could + // save one load. However, since this is just an intrinsic slow path we prefer this + // simple and more robust approach rather that trying to determine if that's the case. + SlowPathCode* slow_path = GetCurrentSlowPath(); + DCHECK(slow_path != nullptr); // For intrinsified invokes the call is emitted on the slow path. + if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) { + int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>()); + __ LoadFromOffset(kLoadWord, temp, SP, stack_offset); + return temp; + } + return location.AsRegister<Register>(); +} + HInvokeStaticOrDirect::DispatchInfo CodeGeneratorMIPS::GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, MethodReference target_method ATTRIBUTE_UNUSED) { - switch (desired_dispatch_info.method_load_kind) { + HInvokeStaticOrDirect::DispatchInfo dispatch_info = desired_dispatch_info; + // We disable PC-relative load when there is an irreducible loop, as the optimization + // is incompatible with it. + bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops(); + bool fallback_load = true; + bool fallback_call = true; + switch (dispatch_info.method_load_kind) { case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup: case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: - // TODO: Implement these types. For the moment, we fall back to kDexCacheViaMethod. - return HInvokeStaticOrDirect::DispatchInfo { - HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod, - HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod, - 0u, - 0u - }; + fallback_load = has_irreducible_loops; + break; default: + fallback_load = false; break; } - switch (desired_dispatch_info.code_ptr_location) { + switch (dispatch_info.code_ptr_location) { case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup: + fallback_call = has_irreducible_loops; + break; case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: - // TODO: Implement these types. For the moment, we fall back to kCallArtMethod. - return HInvokeStaticOrDirect::DispatchInfo { - desired_dispatch_info.method_load_kind, - HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod, - desired_dispatch_info.method_load_data, - 0u - }; + // TODO: Implement this type. + break; default: - return desired_dispatch_info; + fallback_call = false; + break; } + if (fallback_load) { + dispatch_info.method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod; + dispatch_info.method_load_data = 0; + } + if (fallback_call) { + dispatch_info.code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod; + dispatch_info.direct_code_ptr = 0; + } + return dispatch_info; } void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) { // All registers are assumed to be correctly set up per the calling convention. - Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. - switch (invoke->GetMethodLoadKind()) { + HInvokeStaticOrDirect::MethodLoadKind method_load_kind = invoke->GetMethodLoadKind(); + HInvokeStaticOrDirect::CodePtrLocation code_ptr_location = invoke->GetCodePtrLocation(); + bool isR6 = isa_features_.IsR6(); + // kDirectAddressWithFixup and kCallDirectWithFixup have no extra input on R6 because + // R6 has PC-relative addressing. + bool has_extra_input = invoke->HasPcRelativeDexCache() || + (!isR6 && + ((method_load_kind == HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup) || + (code_ptr_location == HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup))); + Register base_reg = has_extra_input + ? GetInvokeStaticOrDirectExtraParameter(invoke, temp.AsRegister<Register>()) + : ZERO; + + // For better instruction scheduling we load the direct code pointer before the method pointer. + switch (code_ptr_location) { + case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect: + // T9 = invoke->GetDirectCodePtr(); + __ LoadConst32(T9, invoke->GetDirectCodePtr()); + break; + case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup: + // T9 = code address from literal pool with link-time patch. + __ LoadLiteral(T9, base_reg, DeduplicateMethodCodeLiteral(invoke->GetTargetMethod())); + break; + default: + break; + } + + switch (method_load_kind) { case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: // temp = thread->string_init_entrypoint __ LoadFromOffset(kLoadWord, @@ -3821,11 +3982,18 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke __ LoadConst32(temp.AsRegister<Register>(), invoke->GetMethodAddress()); break; case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup: - case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: - // TODO: Implement these types. - // Currently filtered out by GetSupportedInvokeStaticOrDirectDispatch(). - LOG(FATAL) << "Unsupported"; - UNREACHABLE(); + __ LoadLiteral(temp.AsRegister<Register>(), + base_reg, + DeduplicateMethodAddressLiteral(invoke->GetTargetMethod())); + break; + case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: { + HMipsDexCacheArraysBase* base = + invoke->InputAt(invoke->GetSpecialInputIndex())->AsMipsDexCacheArraysBase(); + int32_t offset = + invoke->GetDexCacheArrayOffset() - base->GetElementOffset() - kDexCacheArrayLwOffset; + __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), base_reg, offset); + break; + } case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); Register reg = temp.AsRegister<Register>(); @@ -3856,20 +4024,19 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke } } - switch (invoke->GetCodePtrLocation()) { + switch (code_ptr_location) { case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf: - __ Jalr(&frame_entry_label_, T9); + __ Bal(&frame_entry_label_); break; case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect: - // LR = invoke->GetDirectCodePtr(); - __ LoadConst32(T9, invoke->GetDirectCodePtr()); - // LR() + case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup: + // T9 prepared above for better instruction scheduling. + // T9() __ Jalr(T9); __ Nop(); break; - case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup: case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: - // TODO: Implement these types. + // TODO: Implement this type. // Currently filtered out by GetSupportedInvokeStaticOrDirectDispatch(). LOG(FATAL) << "Unsupported"; UNREACHABLE(); @@ -5140,6 +5307,57 @@ void InstructionCodeGeneratorMIPS::VisitPackedSwitch(HPackedSwitch* switch_instr } } +void LocationsBuilderMIPS::VisitMipsComputeBaseMethodAddress( + HMipsComputeBaseMethodAddress* insn) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(insn, LocationSummary::kNoCall); + locations->SetOut(Location::RequiresRegister()); +} + +void InstructionCodeGeneratorMIPS::VisitMipsComputeBaseMethodAddress( + HMipsComputeBaseMethodAddress* insn) { + LocationSummary* locations = insn->GetLocations(); + Register reg = locations->Out().AsRegister<Register>(); + + CHECK(!codegen_->GetInstructionSetFeatures().IsR6()); + + // Generate a dummy PC-relative call to obtain PC. + __ Nal(); + // Grab the return address off RA. + __ Move(reg, RA); + + // Remember this offset (the obtained PC value) for later use with constant area. + __ BindPcRelBaseLabel(); +} + +void LocationsBuilderMIPS::VisitMipsDexCacheArraysBase(HMipsDexCacheArraysBase* base) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(base); + locations->SetOut(Location::RequiresRegister()); +} + +void InstructionCodeGeneratorMIPS::VisitMipsDexCacheArraysBase(HMipsDexCacheArraysBase* base) { + Register reg = base->GetLocations()->Out().AsRegister<Register>(); + CodeGeneratorMIPS::PcRelativePatchInfo* info = + codegen_->NewPcRelativeDexCacheArrayPatch(base->GetDexFile(), base->GetElementOffset()); + + if (codegen_->GetInstructionSetFeatures().IsR6()) { + __ Bind(&info->high_label); + __ Bind(&info->pc_rel_label); + // Add a 32-bit offset to PC. + __ Auipc(reg, /* placeholder */ 0x1234); + __ Addiu(reg, reg, /* placeholder */ 0x5678); + } else { + // Generate a dummy PC-relative call to obtain PC. + __ Nal(); + __ Bind(&info->high_label); + __ Lui(reg, /* placeholder */ 0x1234); + __ Bind(&info->pc_rel_label); + __ Ori(reg, reg, /* placeholder */ 0x5678); + // Add a 32-bit offset to PC. + __ Addu(reg, reg, RA); + } +} + void LocationsBuilderMIPS::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { // The trampoline uses the same calling convention as dex calling conventions, // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h index 6487f28ad5..08f74c04d1 100644 --- a/compiler/optimizing/code_generator_mips.h +++ b/compiler/optimizing/code_generator_mips.h @@ -285,6 +285,9 @@ class CodeGeneratorMIPS : public CodeGenerator { MipsAssembler* GetAssembler() OVERRIDE { return &assembler_; } const MipsAssembler& GetAssembler() const OVERRIDE { return assembler_; } + // Emit linker patches. + void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE; + void MarkGCCard(Register object, Register value); // Register allocation. @@ -372,7 +375,39 @@ class CodeGeneratorMIPS : public CodeGenerator { void GenerateImplicitNullCheck(HNullCheck* instruction); void GenerateExplicitNullCheck(HNullCheck* instruction); + // The PcRelativePatchInfo is used for PC-relative addressing of dex cache arrays + // and boot image strings. The only difference is the interpretation of the offset_or_index. + struct PcRelativePatchInfo { + PcRelativePatchInfo(const DexFile& dex_file, uint32_t off_or_idx) + : target_dex_file(dex_file), offset_or_index(off_or_idx) { } + PcRelativePatchInfo(PcRelativePatchInfo&& other) = default; + + const DexFile& target_dex_file; + // Either the dex cache array element offset or the string index. + uint32_t offset_or_index; + // Label for the instruction loading the most significant half of the offset that's added to PC + // to form the base address (the least significant half is loaded with the instruction that + // follows). + MipsLabel high_label; + // Label for the instruction corresponding to PC+0. + MipsLabel pc_rel_label; + }; + + PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, + uint32_t element_offset); + private: + Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp); + + using MethodToLiteralMap = ArenaSafeMap<MethodReference, Literal*, MethodReferenceComparator>; + + Literal* DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map); + Literal* DeduplicateMethodAddressLiteral(MethodReference target_method); + Literal* DeduplicateMethodCodeLiteral(MethodReference target_method); + PcRelativePatchInfo* NewPcRelativePatch(const DexFile& dex_file, + uint32_t offset_or_index, + ArenaDeque<PcRelativePatchInfo>* patches); + // Labels for each block that will be compiled. MipsLabel* block_labels_; MipsLabel frame_entry_label_; @@ -382,6 +417,12 @@ class CodeGeneratorMIPS : public CodeGenerator { MipsAssembler assembler_; const MipsInstructionSetFeatures& isa_features_; + // Method patch info, map MethodReference to a literal for method address and method code. + MethodToLiteralMap method_patches_; + MethodToLiteralMap call_patches_; + // PC-relative patch info for each HMipsDexCacheArraysBase. + ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_; + DISALLOW_COPY_AND_ASSIGN(CodeGeneratorMIPS); }; diff --git a/compiler/optimizing/dex_cache_array_fixups_mips.cc b/compiler/optimizing/dex_cache_array_fixups_mips.cc new file mode 100644 index 0000000000..0f42d9ce0f --- /dev/null +++ b/compiler/optimizing/dex_cache_array_fixups_mips.cc @@ -0,0 +1,94 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dex_cache_array_fixups_mips.h" + +#include "base/arena_containers.h" +#include "utils/dex_cache_arrays_layout-inl.h" + +namespace art { +namespace mips { + +/** + * Finds instructions that need the dex cache arrays base as an input. + */ +class DexCacheArrayFixupsVisitor : public HGraphVisitor { + public: + explicit DexCacheArrayFixupsVisitor(HGraph* graph) + : HGraphVisitor(graph), + dex_cache_array_bases_(std::less<const DexFile*>(), + // Attribute memory use to code generator. + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {} + + void MoveBasesIfNeeded() { + for (const auto& entry : dex_cache_array_bases_) { + // Bring the base closer to the first use (previously, it was in the + // entry block) and relieve some pressure on the register allocator + // while avoiding recalculation of the base in a loop. + HMipsDexCacheArraysBase* base = entry.second; + base->MoveBeforeFirstUserAndOutOfLoops(); + } + } + + private: + void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE { + // If this is an invoke with PC-relative access to the dex cache methods array, + // we need to add the dex cache arrays base as the special input. + if (invoke->HasPcRelativeDexCache()) { + // Initialize base for target method dex file if needed. + MethodReference target_method = invoke->GetTargetMethod(); + HMipsDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(*target_method.dex_file); + // Update the element offset in base. + DexCacheArraysLayout layout(kMipsPointerSize, target_method.dex_file); + base->UpdateElementOffset(layout.MethodOffset(target_method.dex_method_index)); + // Add the special argument base to the method. + DCHECK(!invoke->HasCurrentMethodInput()); + invoke->AddSpecialInput(base); + } + } + + HMipsDexCacheArraysBase* GetOrCreateDexCacheArrayBase(const DexFile& dex_file) { + return dex_cache_array_bases_.GetOrCreate( + &dex_file, + [this, &dex_file]() { + HMipsDexCacheArraysBase* base = + new (GetGraph()->GetArena()) HMipsDexCacheArraysBase(dex_file); + HBasicBlock* entry_block = GetGraph()->GetEntryBlock(); + // Insert the base at the start of the entry block, move it to a better + // position later in MoveBaseIfNeeded(). + entry_block->InsertInstructionBefore(base, entry_block->GetFirstInstruction()); + return base; + }); + } + + using DexCacheArraysBaseMap = + ArenaSafeMap<const DexFile*, HMipsDexCacheArraysBase*, std::less<const DexFile*>>; + DexCacheArraysBaseMap dex_cache_array_bases_; +}; + +void DexCacheArrayFixups::Run() { + if (graph_->HasIrreducibleLoops()) { + // Do not run this optimization, as irreducible loops do not work with an instruction + // that can be live-in at the irreducible loop header. + return; + } + DexCacheArrayFixupsVisitor visitor(graph_); + visitor.VisitInsertionOrder(); + visitor.MoveBasesIfNeeded(); +} + +} // namespace mips +} // namespace art diff --git a/compiler/optimizing/dex_cache_array_fixups_mips.h b/compiler/optimizing/dex_cache_array_fixups_mips.h new file mode 100644 index 0000000000..c8def2842e --- /dev/null +++ b/compiler/optimizing/dex_cache_array_fixups_mips.h @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_MIPS_H_ +#define ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_MIPS_H_ + +#include "nodes.h" +#include "optimization.h" + +namespace art { +namespace mips { + +class DexCacheArrayFixups : public HOptimization { + public: + DexCacheArrayFixups(HGraph* graph, OptimizingCompilerStats* stats) + : HOptimization(graph, "dex_cache_array_fixups_mips", stats) {} + + void Run() OVERRIDE; +}; + +} // namespace mips +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_MIPS_H_ diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index 3041c4d2c7..e0410dcdb2 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -54,6 +54,9 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor { // De Morgan's laws: // ~a & ~b = ~(a | b) and ~a | ~b = ~(a & b) bool TryDeMorganNegationFactoring(HBinaryOperation* op); + bool TryHandleAssociativeAndCommutativeOperation(HBinaryOperation* instruction); + bool TrySubtractionChainSimplification(HBinaryOperation* instruction); + void VisitShift(HBinaryOperation* shift); void VisitEqual(HEqual* equal) OVERRIDE; @@ -963,7 +966,18 @@ void InstructionSimplifierVisitor::VisitAdd(HAdd* instruction) { return; } - TryReplaceWithRotate(instruction); + if (TryReplaceWithRotate(instruction)) { + return; + } + + // TryHandleAssociativeAndCommutativeOperation() does not remove its input, + // so no need to return. + TryHandleAssociativeAndCommutativeOperation(instruction); + + if ((instruction->GetLeft()->IsSub() || instruction->GetRight()->IsSub()) && + TrySubtractionChainSimplification(instruction)) { + return; + } } void InstructionSimplifierVisitor::VisitAnd(HAnd* instruction) { @@ -1025,7 +1039,13 @@ void InstructionSimplifierVisitor::VisitAnd(HAnd* instruction) { return; } - TryDeMorganNegationFactoring(instruction); + if (TryDeMorganNegationFactoring(instruction)) { + return; + } + + // TryHandleAssociativeAndCommutativeOperation() does not remove its input, + // so no need to return. + TryHandleAssociativeAndCommutativeOperation(instruction); } void InstructionSimplifierVisitor::VisitGreaterThan(HGreaterThan* condition) { @@ -1242,6 +1262,7 @@ void InstructionSimplifierVisitor::VisitMul(HMul* instruction) { instruction->ReplaceWith(input_cst); instruction->GetBlock()->RemoveInstruction(instruction); RecordSimplification(); + return; } else if (IsPowerOfTwo(factor)) { // Replace code looking like // MUL dst, src, pow_of_2 @@ -1251,6 +1272,7 @@ void InstructionSimplifierVisitor::VisitMul(HMul* instruction) { HShl* shl = new (allocator) HShl(type, input_other, shift); block->ReplaceAndRemoveInstructionWith(instruction, shl); RecordSimplification(); + return; } else if (IsPowerOfTwo(factor - 1)) { // Transform code looking like // MUL dst, src, (2^n + 1) @@ -1265,6 +1287,7 @@ void InstructionSimplifierVisitor::VisitMul(HMul* instruction) { block->InsertInstructionBefore(shl, instruction); block->ReplaceAndRemoveInstructionWith(instruction, add); RecordSimplification(); + return; } else if (IsPowerOfTwo(factor + 1)) { // Transform code looking like // MUL dst, src, (2^n - 1) @@ -1279,8 +1302,13 @@ void InstructionSimplifierVisitor::VisitMul(HMul* instruction) { block->InsertInstructionBefore(shl, instruction); block->ReplaceAndRemoveInstructionWith(instruction, sub); RecordSimplification(); + return; } } + + // TryHandleAssociativeAndCommutativeOperation() does not remove its input, + // so no need to return. + TryHandleAssociativeAndCommutativeOperation(instruction); } void InstructionSimplifierVisitor::VisitNeg(HNeg* instruction) { @@ -1380,7 +1408,13 @@ void InstructionSimplifierVisitor::VisitOr(HOr* instruction) { if (TryDeMorganNegationFactoring(instruction)) return; - TryReplaceWithRotate(instruction); + if (TryReplaceWithRotate(instruction)) { + return; + } + + // TryHandleAssociativeAndCommutativeOperation() does not remove its input, + // so no need to return. + TryHandleAssociativeAndCommutativeOperation(instruction); } void InstructionSimplifierVisitor::VisitShl(HShl* instruction) { @@ -1471,6 +1505,11 @@ void InstructionSimplifierVisitor::VisitSub(HSub* instruction) { instruction->GetBlock()->RemoveInstruction(instruction); RecordSimplification(); left->GetBlock()->RemoveInstruction(left); + return; + } + + if (TrySubtractionChainSimplification(instruction)) { + return; } } @@ -1524,7 +1563,13 @@ void InstructionSimplifierVisitor::VisitXor(HXor* instruction) { return; } - TryReplaceWithRotate(instruction); + if (TryReplaceWithRotate(instruction)) { + return; + } + + // TryHandleAssociativeAndCommutativeOperation() does not remove its input, + // so no need to return. + TryHandleAssociativeAndCommutativeOperation(instruction); } void InstructionSimplifierVisitor::SimplifyStringEquals(HInvoke* instruction) { @@ -1823,4 +1868,150 @@ void InstructionSimplifierVisitor::VisitDeoptimize(HDeoptimize* deoptimize) { } } +// Replace code looking like +// OP y, x, const1 +// OP z, y, const2 +// with +// OP z, x, const3 +// where OP is both an associative and a commutative operation. +bool InstructionSimplifierVisitor::TryHandleAssociativeAndCommutativeOperation( + HBinaryOperation* instruction) { + DCHECK(instruction->IsCommutative()); + + if (!Primitive::IsIntegralType(instruction->GetType())) { + return false; + } + + HInstruction* left = instruction->GetLeft(); + HInstruction* right = instruction->GetRight(); + // Variable names as described above. + HConstant* const2; + HBinaryOperation* y; + + if (instruction->InstructionTypeEquals(left) && right->IsConstant()) { + const2 = right->AsConstant(); + y = left->AsBinaryOperation(); + } else if (left->IsConstant() && instruction->InstructionTypeEquals(right)) { + const2 = left->AsConstant(); + y = right->AsBinaryOperation(); + } else { + // The node does not match the pattern. + return false; + } + + // If `y` has more than one use, we do not perform the optimization + // because it might increase code size (e.g. if the new constant is + // no longer encodable as an immediate operand in the target ISA). + if (!y->HasOnlyOneNonEnvironmentUse()) { + return false; + } + + // GetConstantRight() can return both left and right constants + // for commutative operations. + HConstant* const1 = y->GetConstantRight(); + if (const1 == nullptr) { + return false; + } + + instruction->ReplaceInput(const1, 0); + instruction->ReplaceInput(const2, 1); + HConstant* const3 = instruction->TryStaticEvaluation(); + DCHECK(const3 != nullptr); + instruction->ReplaceInput(y->GetLeastConstantLeft(), 0); + instruction->ReplaceInput(const3, 1); + RecordSimplification(); + return true; +} + +static HBinaryOperation* AsAddOrSub(HInstruction* binop) { + return (binop->IsAdd() || binop->IsSub()) ? binop->AsBinaryOperation() : nullptr; +} + +// Helper function that performs addition statically, considering the result type. +static int64_t ComputeAddition(Primitive::Type type, int64_t x, int64_t y) { + // Use the Compute() method for consistency with TryStaticEvaluation(). + if (type == Primitive::kPrimInt) { + return HAdd::Compute<int32_t>(x, y); + } else { + DCHECK_EQ(type, Primitive::kPrimLong); + return HAdd::Compute<int64_t>(x, y); + } +} + +// Helper function that handles the child classes of HConstant +// and returns an integer with the appropriate sign. +static int64_t GetValue(HConstant* constant, bool is_negated) { + int64_t ret = Int64FromConstant(constant); + return is_negated ? -ret : ret; +} + +// Replace code looking like +// OP1 y, x, const1 +// OP2 z, y, const2 +// with +// OP3 z, x, const3 +// where OPx is either ADD or SUB, and at least one of OP{1,2} is SUB. +bool InstructionSimplifierVisitor::TrySubtractionChainSimplification( + HBinaryOperation* instruction) { + DCHECK(instruction->IsAdd() || instruction->IsSub()) << instruction->DebugName(); + + Primitive::Type type = instruction->GetType(); + if (!Primitive::IsIntegralType(type)) { + return false; + } + + HInstruction* left = instruction->GetLeft(); + HInstruction* right = instruction->GetRight(); + // Variable names as described above. + HConstant* const2 = right->IsConstant() ? right->AsConstant() : left->AsConstant(); + if (const2 == nullptr) { + return false; + } + + HBinaryOperation* y = (AsAddOrSub(left) != nullptr) + ? left->AsBinaryOperation() + : AsAddOrSub(right); + // If y has more than one use, we do not perform the optimization because + // it might increase code size (e.g. if the new constant is no longer + // encodable as an immediate operand in the target ISA). + if ((y == nullptr) || !y->HasOnlyOneNonEnvironmentUse()) { + return false; + } + + left = y->GetLeft(); + HConstant* const1 = left->IsConstant() ? left->AsConstant() : y->GetRight()->AsConstant(); + if (const1 == nullptr) { + return false; + } + + HInstruction* x = (const1 == left) ? y->GetRight() : left; + // If both inputs are constants, let the constant folding pass deal with it. + if (x->IsConstant()) { + return false; + } + + bool is_const2_negated = (const2 == right) && instruction->IsSub(); + int64_t const2_val = GetValue(const2, is_const2_negated); + bool is_y_negated = (y == right) && instruction->IsSub(); + right = y->GetRight(); + bool is_const1_negated = is_y_negated ^ ((const1 == right) && y->IsSub()); + int64_t const1_val = GetValue(const1, is_const1_negated); + bool is_x_negated = is_y_negated ^ ((x == right) && y->IsSub()); + int64_t const3_val = ComputeAddition(type, const1_val, const2_val); + HBasicBlock* block = instruction->GetBlock(); + HConstant* const3 = block->GetGraph()->GetConstant(type, const3_val); + ArenaAllocator* arena = instruction->GetArena(); + HInstruction* z; + + if (is_x_negated) { + z = new (arena) HSub(type, const3, x, instruction->GetDexPc()); + } else { + z = new (arena) HAdd(type, x, const3, instruction->GetDexPc()); + } + + block->ReplaceAndRemoveInstructionWith(instruction, z); + RecordSimplification(); + return true; +} + } // namespace art diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index abc8d5746a..0f0ef26ea9 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -1307,7 +1307,13 @@ class HLoopInformationOutwardIterator : public ValueObject { M(Arm64IntermediateAddress, Instruction) #endif +#ifndef ART_ENABLE_CODEGEN_mips #define FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M) +#else +#define FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M) \ + M(MipsComputeBaseMethodAddress, Instruction) \ + M(MipsDexCacheArraysBase, Instruction) +#endif #define FOR_EACH_CONCRETE_INSTRUCTION_MIPS64(M) @@ -6551,6 +6557,9 @@ class HParallelMove FINAL : public HTemplateInstruction<0> { #ifdef ART_ENABLE_CODEGEN_arm64 #include "nodes_arm64.h" #endif +#ifdef ART_ENABLE_CODEGEN_mips +#include "nodes_mips.h" +#endif #ifdef ART_ENABLE_CODEGEN_x86 #include "nodes_x86.h" #endif diff --git a/compiler/optimizing/nodes_mips.h b/compiler/optimizing/nodes_mips.h new file mode 100644 index 0000000000..de77245e17 --- /dev/null +++ b/compiler/optimizing/nodes_mips.h @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_NODES_MIPS_H_ +#define ART_COMPILER_OPTIMIZING_NODES_MIPS_H_ + +namespace art { + +// Compute the address of the method for MIPS Constant area support. +class HMipsComputeBaseMethodAddress : public HExpression<0> { + public: + // Treat the value as an int32_t, but it is really a 32 bit native pointer. + HMipsComputeBaseMethodAddress() + : HExpression(Primitive::kPrimInt, SideEffects::None(), kNoDexPc) {} + + bool CanBeMoved() const OVERRIDE { return true; } + + DECLARE_INSTRUCTION(MipsComputeBaseMethodAddress); + + private: + DISALLOW_COPY_AND_ASSIGN(HMipsComputeBaseMethodAddress); +}; + +class HMipsDexCacheArraysBase : public HExpression<0> { + public: + explicit HMipsDexCacheArraysBase(const DexFile& dex_file) + : HExpression(Primitive::kPrimInt, SideEffects::None(), kNoDexPc), + dex_file_(&dex_file), + element_offset_(static_cast<size_t>(-1)) { } + + bool CanBeMoved() const OVERRIDE { return true; } + + void UpdateElementOffset(size_t element_offset) { + // We'll maximize the range of a single load instruction for dex cache array accesses + // by aligning offset -32768 with the offset of the first used element. + element_offset_ = std::min(element_offset_, element_offset); + } + + const DexFile& GetDexFile() const { + return *dex_file_; + } + + size_t GetElementOffset() const { + return element_offset_; + } + + DECLARE_INSTRUCTION(MipsDexCacheArraysBase); + + private: + const DexFile* dex_file_; + size_t element_offset_; + + DISALLOW_COPY_AND_ASSIGN(HMipsDexCacheArraysBase); +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_NODES_MIPS_H_ diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index c9a4bfe987..d703b0f94f 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -28,6 +28,11 @@ #include "instruction_simplifier_arm64.h" #endif +#ifdef ART_ENABLE_CODEGEN_mips +#include "dex_cache_array_fixups_mips.h" +#include "pc_relative_fixups_mips.h" +#endif + #ifdef ART_ENABLE_CODEGEN_x86 #include "pc_relative_fixups_x86.h" #endif @@ -462,6 +467,20 @@ static void RunArchOptimizations(InstructionSet instruction_set, break; } #endif +#ifdef ART_ENABLE_CODEGEN_mips + case kMips: { + mips::PcRelativeFixups* pc_relative_fixups = + new (arena) mips::PcRelativeFixups(graph, codegen, stats); + mips::DexCacheArrayFixups* dex_cache_array_fixups = + new (arena) mips::DexCacheArrayFixups(graph, stats); + HOptimization* mips_optimizations[] = { + pc_relative_fixups, + dex_cache_array_fixups + }; + RunOptimizations(mips_optimizations, arraysize(mips_optimizations), pass_observer); + break; + } +#endif #ifdef ART_ENABLE_CODEGEN_x86 case kX86: { x86::PcRelativeFixups* pc_relative_fixups = diff --git a/compiler/optimizing/pc_relative_fixups_mips.cc b/compiler/optimizing/pc_relative_fixups_mips.cc new file mode 100644 index 0000000000..ba405cdb69 --- /dev/null +++ b/compiler/optimizing/pc_relative_fixups_mips.cc @@ -0,0 +1,133 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "pc_relative_fixups_mips.h" +#include "code_generator_mips.h" +#include "intrinsics_mips.h" + +namespace art { +namespace mips { + +/** + * Finds instructions that need the constant area base as an input. + */ +class PCRelativeHandlerVisitor : public HGraphVisitor { + public: + PCRelativeHandlerVisitor(HGraph* graph, CodeGenerator* codegen) + : HGraphVisitor(graph), + codegen_(down_cast<CodeGeneratorMIPS*>(codegen)), + base_(nullptr) {} + + void MoveBaseIfNeeded() { + if (base_ != nullptr) { + // Bring the base closer to the first use (previously, it was in the + // entry block) and relieve some pressure on the register allocator + // while avoiding recalculation of the base in a loop. + base_->MoveBeforeFirstUserAndOutOfLoops(); + } + } + + private: + void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE { + HandleInvoke(invoke); + } + + void InitializePCRelativeBasePointer() { + // Ensure we only initialize the pointer once. + if (base_ != nullptr) { + return; + } + // Insert the base at the start of the entry block, move it to a better + // position later in MoveBaseIfNeeded(). + base_ = new (GetGraph()->GetArena()) HMipsComputeBaseMethodAddress(); + HBasicBlock* entry_block = GetGraph()->GetEntryBlock(); + entry_block->InsertInstructionBefore(base_, entry_block->GetFirstInstruction()); + DCHECK(base_ != nullptr); + } + + void HandleInvoke(HInvoke* invoke) { + // If this is an invoke-static/-direct with PC-relative dex cache array + // addressing, we need the PC-relative address base. + HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect(); + if (invoke_static_or_direct != nullptr) { + HInvokeStaticOrDirect::MethodLoadKind method_load_kind = + invoke_static_or_direct->GetMethodLoadKind(); + HInvokeStaticOrDirect::CodePtrLocation code_ptr_location = + invoke_static_or_direct->GetCodePtrLocation(); + + bool has_extra_input = + (method_load_kind == HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup) || + (code_ptr_location == HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup); + + // We can't add a pointer to the constant area if we already have a current + // method pointer. This may arise when sharpening doesn't remove the current + // method pointer from the invoke. + if (invoke_static_or_direct->HasCurrentMethodInput()) { + DCHECK(!invoke_static_or_direct->HasPcRelativeDexCache()); + CHECK(!has_extra_input); // TODO: review this. + return; + } + + if (has_extra_input && !WillHaveCallFreeIntrinsicsCodeGen(invoke)) { + InitializePCRelativeBasePointer(); + // Add the extra parameter base_. + invoke_static_or_direct->AddSpecialInput(base_); + } + } + } + + bool WillHaveCallFreeIntrinsicsCodeGen(HInvoke* invoke) { + if (invoke->GetIntrinsic() != Intrinsics::kNone) { + // This invoke may have intrinsic code generation defined. However, we must + // now also determine if this code generation is truly there and call-free + // (not unimplemented, no bail on instruction features, or call on slow path). + // This is done by actually calling the locations builder on the instruction + // and clearing out the locations once result is known. We assume this + // call only has creating locations as side effects! + IntrinsicLocationsBuilderMIPS builder(codegen_); + bool success = builder.TryDispatch(invoke) && !invoke->GetLocations()->CanCall(); + invoke->SetLocations(nullptr); + return success; + } + return false; + } + + CodeGeneratorMIPS* codegen_; + + // The generated HMipsComputeBaseMethodAddress in the entry block needed as an + // input to the HMipsLoadFromConstantTable instructions. + HMipsComputeBaseMethodAddress* base_; +}; + +void PcRelativeFixups::Run() { + CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen_); + if (mips_codegen->GetInstructionSetFeatures().IsR6()) { + // Do nothing for R6 because it has PC-relative addressing. + // TODO: review. Move this check into RunArchOptimizations()? + return; + } + if (graph_->HasIrreducibleLoops()) { + // Do not run this optimization, as irreducible loops do not work with an instruction + // that can be live-in at the irreducible loop header. + return; + } + PCRelativeHandlerVisitor visitor(graph_, codegen_); + visitor.VisitInsertionOrder(); + visitor.MoveBaseIfNeeded(); +} + +} // namespace mips +} // namespace art diff --git a/compiler/optimizing/pc_relative_fixups_mips.h b/compiler/optimizing/pc_relative_fixups_mips.h new file mode 100644 index 0000000000..1e8b071bb3 --- /dev/null +++ b/compiler/optimizing/pc_relative_fixups_mips.h @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_PC_RELATIVE_FIXUPS_MIPS_H_ +#define ART_COMPILER_OPTIMIZING_PC_RELATIVE_FIXUPS_MIPS_H_ + +#include "nodes.h" +#include "optimization.h" + +namespace art { + +class CodeGenerator; + +namespace mips { + +class PcRelativeFixups : public HOptimization { + public: + PcRelativeFixups(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats) + : HOptimization(graph, "pc_relative_fixups_mips", stats), + codegen_(codegen) {} + + void Run() OVERRIDE; + + private: + CodeGenerator* codegen_; +}; + +} // namespace mips +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_PC_RELATIVE_FIXUPS_MIPS_H_ diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc index ac930833f2..ebaf1c0cab 100644 --- a/compiler/utils/mips/assembler_mips.cc +++ b/compiler/utils/mips/assembler_mips.cc @@ -39,6 +39,7 @@ void MipsAssembler::FinalizeCode() { for (auto& exception_block : exception_blocks_) { EmitExceptionPoll(&exception_block); } + EmitLiterals(); PromoteBranches(); } @@ -444,6 +445,12 @@ void MipsAssembler::Lhu(Register rt, Register rs, uint16_t imm16) { EmitI(0x25, rs, rt, imm16); } +void MipsAssembler::Lwpc(Register rs, uint32_t imm19) { + CHECK(IsR6()); + CHECK(IsUint<19>(imm19)) << imm19; + EmitI21(0x3B, rs, (0x01 << 19) | imm19); +} + void MipsAssembler::Lui(Register rt, uint16_t imm16) { EmitI(0xf, static_cast<Register>(0), rt, imm16); } @@ -532,6 +539,10 @@ void MipsAssembler::B(uint16_t imm16) { EmitI(0x4, static_cast<Register>(0), static_cast<Register>(0), imm16); } +void MipsAssembler::Bal(uint16_t imm16) { + EmitI(0x1, static_cast<Register>(0), static_cast<Register>(0x11), imm16); +} + void MipsAssembler::Beq(Register rs, Register rt, uint16_t imm16) { EmitI(0x4, rs, rt, imm16); } @@ -624,6 +635,11 @@ void MipsAssembler::Bc(uint32_t imm26) { EmitI26(0x32, imm26); } +void MipsAssembler::Balc(uint32_t imm26) { + CHECK(IsR6()); + EmitI26(0x3A, imm26); +} + void MipsAssembler::Jic(Register rt, uint16_t imm16) { CHECK(IsR6()); EmitI(0x36, static_cast<Register>(0), rt, imm16); @@ -1489,30 +1505,47 @@ void MipsAssembler::Branch::InitShortOrLong(MipsAssembler::Branch::OffsetBits of type_ = (offset_size <= branch_info_[short_type].offset_size) ? short_type : long_type; } -void MipsAssembler::Branch::InitializeType(bool is_call, bool is_r6) { +void MipsAssembler::Branch::InitializeType(bool is_call, bool is_literal, bool is_r6) { + CHECK_EQ(is_call && is_literal, false); OffsetBits offset_size = GetOffsetSizeNeeded(location_, target_); if (is_r6) { // R6 - if (is_call) { + if (is_literal) { + CHECK(!IsResolved()); + type_ = kR6Literal; + } else if (is_call) { InitShortOrLong(offset_size, kR6Call, kR6LongCall); - } else if (condition_ == kUncond) { - InitShortOrLong(offset_size, kR6UncondBranch, kR6LongUncondBranch); } else { - if (condition_ == kCondEQZ || condition_ == kCondNEZ) { - // Special case for beqzc/bnezc with longer offset than in other b<cond>c instructions. - type_ = (offset_size <= kOffset23) ? kR6CondBranch : kR6LongCondBranch; - } else { - InitShortOrLong(offset_size, kR6CondBranch, kR6LongCondBranch); + switch (condition_) { + case kUncond: + InitShortOrLong(offset_size, kR6UncondBranch, kR6LongUncondBranch); + break; + case kCondEQZ: + case kCondNEZ: + // Special case for beqzc/bnezc with longer offset than in other b<cond>c instructions. + type_ = (offset_size <= kOffset23) ? kR6CondBranch : kR6LongCondBranch; + break; + default: + InitShortOrLong(offset_size, kR6CondBranch, kR6LongCondBranch); + break; } } } else { // R2 - if (is_call) { + if (is_literal) { + CHECK(!IsResolved()); + type_ = kLiteral; + } else if (is_call) { InitShortOrLong(offset_size, kCall, kLongCall); - } else if (condition_ == kUncond) { - InitShortOrLong(offset_size, kUncondBranch, kLongUncondBranch); } else { - InitShortOrLong(offset_size, kCondBranch, kLongCondBranch); + switch (condition_) { + case kUncond: + InitShortOrLong(offset_size, kUncondBranch, kLongUncondBranch); + break; + default: + InitShortOrLong(offset_size, kCondBranch, kLongCondBranch); + break; + } } } old_type_ = type_; @@ -1544,14 +1577,14 @@ bool MipsAssembler::Branch::IsUncond(BranchCondition condition, Register lhs, Re } } -MipsAssembler::Branch::Branch(bool is_r6, uint32_t location, uint32_t target) +MipsAssembler::Branch::Branch(bool is_r6, uint32_t location, uint32_t target, bool is_call) : old_location_(location), location_(location), target_(target), lhs_reg_(0), rhs_reg_(0), condition_(kUncond) { - InitializeType(false, is_r6); + InitializeType(is_call, /* is_literal */ false, is_r6); } MipsAssembler::Branch::Branch(bool is_r6, @@ -1608,19 +1641,23 @@ MipsAssembler::Branch::Branch(bool is_r6, // Branch condition is always true, make the branch unconditional. condition_ = kUncond; } - InitializeType(false, is_r6); + InitializeType(/* is_call */ false, /* is_literal */ false, is_r6); } -MipsAssembler::Branch::Branch(bool is_r6, uint32_t location, uint32_t target, Register indirect_reg) +MipsAssembler::Branch::Branch(bool is_r6, uint32_t location, Register dest_reg, Register base_reg) : old_location_(location), location_(location), - target_(target), - lhs_reg_(indirect_reg), - rhs_reg_(0), + target_(kUnresolved), + lhs_reg_(dest_reg), + rhs_reg_(base_reg), condition_(kUncond) { - CHECK_NE(indirect_reg, ZERO); - CHECK_NE(indirect_reg, AT); - InitializeType(true, is_r6); + CHECK_NE(dest_reg, ZERO); + if (is_r6) { + CHECK_EQ(base_reg, ZERO); + } else { + CHECK_NE(base_reg, ZERO); + } + InitializeType(/* is_call */ false, /* is_literal */ true, is_r6); } MipsAssembler::BranchCondition MipsAssembler::Branch::OppositeCondition( @@ -1722,19 +1759,27 @@ bool MipsAssembler::Branch::IsLong() const { case kUncondBranch: case kCondBranch: case kCall: + // R2 near literal. + case kLiteral: // R6 short branches. case kR6UncondBranch: case kR6CondBranch: case kR6Call: + // R6 near literal. + case kR6Literal: return false; // R2 long branches. case kLongUncondBranch: case kLongCondBranch: case kLongCall: + // R2 far literal. + case kFarLiteral: // R6 long branches. case kR6LongUncondBranch: case kR6LongCondBranch: case kR6LongCall: + // R6 far literal. + case kR6FarLiteral: return true; } UNREACHABLE(); @@ -1803,6 +1848,10 @@ void MipsAssembler::Branch::PromoteToLong() { case kCall: type_ = kLongCall; break; + // R2 near literal. + case kLiteral: + type_ = kFarLiteral; + break; // R6 short branches. case kR6UncondBranch: type_ = kR6LongUncondBranch; @@ -1813,6 +1862,10 @@ void MipsAssembler::Branch::PromoteToLong() { case kR6Call: type_ = kR6LongCall; break; + // R6 near literal. + case kR6Literal: + type_ = kR6FarLiteral; + break; default: // Note: 'type_' is already long. break; @@ -1820,14 +1873,26 @@ void MipsAssembler::Branch::PromoteToLong() { CHECK(IsLong()); } -uint32_t MipsAssembler::Branch::PromoteIfNeeded(uint32_t max_short_distance) { +uint32_t MipsAssembler::GetBranchLocationOrPcRelBase(const MipsAssembler::Branch* branch) const { + switch (branch->GetType()) { + case Branch::kLiteral: + case Branch::kFarLiteral: + return GetLabelLocation(&pc_rel_base_label_); + default: + return branch->GetLocation(); + } +} + +uint32_t MipsAssembler::Branch::PromoteIfNeeded(uint32_t location, uint32_t max_short_distance) { + // `location` is either `GetLabelLocation(&pc_rel_base_label_)` for R2 literals or + // `this->GetLocation()` for everything else. // If the branch is still unresolved or already long, nothing to do. if (IsLong() || !IsResolved()) { return 0; } // Promote the short branch to long if the offset size is too small - // to hold the distance between location_ and target_. - if (GetOffsetSizeNeeded(location_, target_) > GetOffsetSize()) { + // to hold the distance between location and target_. + if (GetOffsetSizeNeeded(location, target_) > GetOffsetSize()) { PromoteToLong(); uint32_t old_size = GetOldSize(); uint32_t new_size = GetSize(); @@ -1837,7 +1902,7 @@ uint32_t MipsAssembler::Branch::PromoteIfNeeded(uint32_t max_short_distance) { // The following logic is for debugging/testing purposes. // Promote some short branches to long when it's not really required. if (UNLIKELY(max_short_distance != std::numeric_limits<uint32_t>::max())) { - int64_t distance = static_cast<int64_t>(target_) - location_; + int64_t distance = static_cast<int64_t>(target_) - location; distance = (distance >= 0) ? distance : -distance; if (distance >= max_short_distance) { PromoteToLong(); @@ -1854,12 +1919,26 @@ uint32_t MipsAssembler::Branch::GetOffsetLocation() const { return location_ + branch_info_[type_].instr_offset * sizeof(uint32_t); } -uint32_t MipsAssembler::Branch::GetOffset() const { +uint32_t MipsAssembler::GetBranchOrPcRelBaseForEncoding(const MipsAssembler::Branch* branch) const { + switch (branch->GetType()) { + case Branch::kLiteral: + case Branch::kFarLiteral: + return GetLabelLocation(&pc_rel_base_label_); + default: + return branch->GetOffsetLocation() + + Branch::branch_info_[branch->GetType()].pc_org * sizeof(uint32_t); + } +} + +uint32_t MipsAssembler::Branch::GetOffset(uint32_t location) const { + // `location` is either `GetLabelLocation(&pc_rel_base_label_)` for R2 literals or + // `this->GetOffsetLocation() + branch_info_[this->GetType()].pc_org * sizeof(uint32_t)` + // for everything else. CHECK(IsResolved()); uint32_t ofs_mask = 0xFFFFFFFF >> (32 - GetOffsetSize()); // Calculate the byte distance between instructions and also account for // different PC-relative origins. - uint32_t offset = target_ - GetOffsetLocation() - branch_info_[type_].pc_org * sizeof(uint32_t); + uint32_t offset = target_ - location; // Prepare the offset for encoding into the instruction(s). offset = (offset & ofs_mask) >> branch_info_[type_].offset_shift; return offset; @@ -1906,7 +1985,7 @@ void MipsAssembler::Bind(MipsLabel* label) { label->BindTo(bound_pc); } -uint32_t MipsAssembler::GetLabelLocation(MipsLabel* label) const { +uint32_t MipsAssembler::GetLabelLocation(const MipsLabel* label) const { CHECK(label->IsBound()); uint32_t target = label->Position(); if (label->prev_branch_id_plus_one_) { @@ -1941,6 +2020,10 @@ uint32_t MipsAssembler::GetAdjustedPosition(uint32_t old_position) { return old_position + last_position_adjustment_; } +void MipsAssembler::BindPcRelBaseLabel() { + Bind(&pc_rel_base_label_); +} + void MipsAssembler::FinalizeLabeledBranch(MipsLabel* label) { uint32_t length = branches_.back().GetLength(); if (!label->IsBound()) { @@ -1962,7 +2045,7 @@ void MipsAssembler::FinalizeLabeledBranch(MipsLabel* label) { void MipsAssembler::Buncond(MipsLabel* label) { uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved; - branches_.emplace_back(IsR6(), buffer_.Size(), target); + branches_.emplace_back(IsR6(), buffer_.Size(), target, /* is_call */ false); FinalizeLabeledBranch(label); } @@ -1976,12 +2059,46 @@ void MipsAssembler::Bcond(MipsLabel* label, BranchCondition condition, Register FinalizeLabeledBranch(label); } -void MipsAssembler::Call(MipsLabel* label, Register indirect_reg) { +void MipsAssembler::Call(MipsLabel* label) { uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved; - branches_.emplace_back(IsR6(), buffer_.Size(), target, indirect_reg); + branches_.emplace_back(IsR6(), buffer_.Size(), target, /* is_call */ true); FinalizeLabeledBranch(label); } +Literal* MipsAssembler::NewLiteral(size_t size, const uint8_t* data) { + DCHECK(size == 4u || size == 8u) << size; + literals_.emplace_back(size, data); + return &literals_.back(); +} + +void MipsAssembler::LoadLiteral(Register dest_reg, Register base_reg, Literal* literal) { + // Literal loads are treated as pseudo branches since they require very similar handling. + DCHECK_EQ(literal->GetSize(), 4u); + MipsLabel* label = literal->GetLabel(); + DCHECK(!label->IsBound()); + branches_.emplace_back(IsR6(), + buffer_.Size(), + dest_reg, + base_reg); + FinalizeLabeledBranch(label); +} + +void MipsAssembler::EmitLiterals() { + if (!literals_.empty()) { + // We don't support byte and half-word literals. + // TODO: proper alignment for 64-bit literals when they're implemented. + for (Literal& literal : literals_) { + MipsLabel* label = literal.GetLabel(); + Bind(label); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + DCHECK(literal.GetSize() == 4u || literal.GetSize() == 8u); + for (size_t i = 0, size = literal.GetSize(); i != size; ++i) { + buffer_.Emit<uint8_t>(literal.GetData()[i]); + } + } + } +} + void MipsAssembler::PromoteBranches() { // Promote short branches to long as necessary. bool changed; @@ -1989,7 +2106,8 @@ void MipsAssembler::PromoteBranches() { changed = false; for (auto& branch : branches_) { CHECK(branch.IsResolved()); - uint32_t delta = branch.PromoteIfNeeded(); + uint32_t base = GetBranchLocationOrPcRelBase(&branch); + uint32_t delta = branch.PromoteIfNeeded(base); // If this branch has been promoted and needs to expand in size, // relocate all branches by the expansion size. if (delta) { @@ -2027,27 +2145,35 @@ const MipsAssembler::Branch::BranchInfo MipsAssembler::Branch::branch_info_[] = // R2 short branches. { 2, 0, 1, MipsAssembler::Branch::kOffset18, 2 }, // kUncondBranch { 2, 0, 1, MipsAssembler::Branch::kOffset18, 2 }, // kCondBranch - { 5, 2, 0, MipsAssembler::Branch::kOffset16, 0 }, // kCall + { 2, 0, 1, MipsAssembler::Branch::kOffset18, 2 }, // kCall + // R2 near literal. + { 1, 0, 0, MipsAssembler::Branch::kOffset16, 0 }, // kLiteral // R2 long branches. { 9, 3, 1, MipsAssembler::Branch::kOffset32, 0 }, // kLongUncondBranch { 10, 4, 1, MipsAssembler::Branch::kOffset32, 0 }, // kLongCondBranch { 6, 1, 1, MipsAssembler::Branch::kOffset32, 0 }, // kLongCall + // R2 far literal. + { 3, 0, 0, MipsAssembler::Branch::kOffset32, 0 }, // kFarLiteral // R6 short branches. { 1, 0, 1, MipsAssembler::Branch::kOffset28, 2 }, // kR6UncondBranch { 2, 0, 1, MipsAssembler::Branch::kOffset18, 2 }, // kR6CondBranch // Exception: kOffset23 for beqzc/bnezc. - { 2, 0, 0, MipsAssembler::Branch::kOffset21, 2 }, // kR6Call + { 1, 0, 1, MipsAssembler::Branch::kOffset28, 2 }, // kR6Call + // R6 near literal. + { 1, 0, 0, MipsAssembler::Branch::kOffset21, 2 }, // kR6Literal // R6 long branches. { 2, 0, 0, MipsAssembler::Branch::kOffset32, 0 }, // kR6LongUncondBranch { 3, 1, 0, MipsAssembler::Branch::kOffset32, 0 }, // kR6LongCondBranch - { 3, 0, 0, MipsAssembler::Branch::kOffset32, 0 }, // kR6LongCall + { 2, 0, 0, MipsAssembler::Branch::kOffset32, 0 }, // kR6LongCall + // R6 far literal. + { 2, 0, 0, MipsAssembler::Branch::kOffset32, 0 }, // kR6FarLiteral }; -// Note: make sure branch_info_[] and mitBranch() are kept synchronized. +// Note: make sure branch_info_[] and EmitBranch() are kept synchronized. void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) { CHECK_EQ(overwriting_, true); overwrite_location_ = branch->GetLocation(); - uint32_t offset = branch->GetOffset(); + uint32_t offset = branch->GetOffset(GetBranchOrPcRelBaseForEncoding(branch)); BranchCondition condition = branch->GetCondition(); Register lhs = branch->GetLeftRegister(); Register rhs = branch->GetRightRegister(); @@ -2064,12 +2190,15 @@ void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) { Nop(); // TODO: improve by filling the delay slot. break; case Branch::kCall: - Nal(); - Nop(); // TODO: is this NOP really needed here? CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); - Addiu(lhs, RA, offset); - Jalr(lhs); - Nop(); + Bal(offset); + Nop(); // TODO: improve by filling the delay slot. + break; + + // R2 near literal. + case Branch::kLiteral: + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Lw(lhs, rhs, offset); break; // R2 long branches. @@ -2123,11 +2252,20 @@ void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) { CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); Lui(AT, High16Bits(offset)); Ori(AT, AT, Low16Bits(offset)); - Addu(lhs, AT, RA); - Jalr(lhs); + Addu(AT, AT, RA); + Jalr(AT); Nop(); break; + // R2 far literal. + case Branch::kFarLiteral: + offset += (offset & 0x8000) << 1; // Account for sign extension in lw. + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Lui(AT, High16Bits(offset)); + Addu(AT, AT, rhs); + Lw(lhs, AT, Low16Bits(offset)); + break; + // R6 short branches. case Branch::kR6UncondBranch: CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); @@ -2140,8 +2278,13 @@ void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) { break; case Branch::kR6Call: CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); - Addiupc(lhs, offset); - Jialc(lhs, 0); + Balc(offset); + break; + + // R6 near literal. + case Branch::kR6Literal: + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Lwpc(lhs, offset); break; // R6 long branches. @@ -2159,11 +2302,18 @@ void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) { Jic(AT, Low16Bits(offset)); break; case Branch::kR6LongCall: - offset += (offset & 0x8000) << 1; // Account for sign extension in addiu. + offset += (offset & 0x8000) << 1; // Account for sign extension in jialc. CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); - Auipc(lhs, High16Bits(offset)); - Addiu(lhs, lhs, Low16Bits(offset)); - Jialc(lhs, 0); + Auipc(AT, High16Bits(offset)); + Jialc(AT, Low16Bits(offset)); + break; + + // R6 far literal. + case Branch::kR6FarLiteral: + offset += (offset & 0x8000) << 1; // Account for sign extension in lw. + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Auipc(AT, High16Bits(offset)); + Lw(lhs, AT, Low16Bits(offset)); break; } CHECK_EQ(overwrite_location_, branch->GetEndLocation()); @@ -2174,8 +2324,8 @@ void MipsAssembler::B(MipsLabel* label) { Buncond(label); } -void MipsAssembler::Jalr(MipsLabel* label, Register indirect_reg) { - Call(label, indirect_reg); +void MipsAssembler::Bal(MipsLabel* label) { + Call(label); } void MipsAssembler::Beq(Register rs, Register rt, MipsLabel* label) { diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h index 31b3b311eb..1f7781fef9 100644 --- a/compiler/utils/mips/assembler_mips.h +++ b/compiler/utils/mips/assembler_mips.h @@ -17,10 +17,12 @@ #ifndef ART_COMPILER_UTILS_MIPS_ASSEMBLER_MIPS_H_ #define ART_COMPILER_UTILS_MIPS_ASSEMBLER_MIPS_H_ +#include <deque> #include <utility> #include <vector> #include "arch/mips/instruction_set_features_mips.h" +#include "base/arena_containers.h" #include "base/macros.h" #include "constants_mips.h" #include "globals.h" @@ -79,6 +81,49 @@ class MipsLabel : public Label { DISALLOW_COPY_AND_ASSIGN(MipsLabel); }; +// Assembler literal is a value embedded in code, retrieved using a PC-relative load. +class Literal { + public: + static constexpr size_t kMaxSize = 8; + + Literal(uint32_t size, const uint8_t* data) + : label_(), size_(size) { + DCHECK_LE(size, Literal::kMaxSize); + memcpy(data_, data, size); + } + + template <typename T> + T GetValue() const { + DCHECK_EQ(size_, sizeof(T)); + T value; + memcpy(&value, data_, sizeof(T)); + return value; + } + + uint32_t GetSize() const { + return size_; + } + + const uint8_t* GetData() const { + return data_; + } + + MipsLabel* GetLabel() { + return &label_; + } + + const MipsLabel* GetLabel() const { + return &label_; + } + + private: + MipsLabel label_; + const uint32_t size_; + uint8_t data_[kMaxSize]; + + DISALLOW_COPY_AND_ASSIGN(Literal); +}; + // Slowpath entered when Thread::Current()->_exception is non-null. class MipsExceptionSlowPath { public: @@ -107,6 +152,7 @@ class MipsAssembler FINAL : public Assembler { : Assembler(arena), overwriting_(false), overwrite_location_(0), + literals_(arena->Adapter(kArenaAllocAssembler)), last_position_adjustment_(0), last_old_position_(0), last_branch_id_(0), @@ -182,6 +228,7 @@ class MipsAssembler FINAL : public Assembler { void Lwr(Register rt, Register rs, uint16_t imm16); void Lbu(Register rt, Register rs, uint16_t imm16); void Lhu(Register rt, Register rs, uint16_t imm16); + void Lwpc(Register rs, uint32_t imm19); // R6 void Lui(Register rt, uint16_t imm16); void Aui(Register rt, Register rs, uint16_t imm16); // R6 void Sync(uint32_t stype); @@ -205,6 +252,7 @@ class MipsAssembler FINAL : public Assembler { void Sltiu(Register rt, Register rs, uint16_t imm16); void B(uint16_t imm16); + void Bal(uint16_t imm16); void Beq(Register rs, Register rt, uint16_t imm16); void Bne(Register rs, Register rt, uint16_t imm16); void Beqz(Register rt, uint16_t imm16); @@ -226,6 +274,7 @@ class MipsAssembler FINAL : public Assembler { void Auipc(Register rs, uint16_t imm16); // R6 void Addiupc(Register rs, uint32_t imm19); // R6 void Bc(uint32_t imm26); // R6 + void Balc(uint32_t imm26); // R6 void Jic(Register rt, uint16_t imm16); // R6 void Jialc(Register rt, uint16_t imm16); // R6 void Bltc(Register rs, Register rt, uint16_t imm16); // R6 @@ -365,7 +414,7 @@ class MipsAssembler FINAL : public Assembler { // These will generate R2 branches or R6 branches as appropriate. void Bind(MipsLabel* label); void B(MipsLabel* label); - void Jalr(MipsLabel* label, Register indirect_reg); + void Bal(MipsLabel* label); void Beq(Register rs, Register rt, MipsLabel* label); void Bne(Register rs, Register rt, MipsLabel* label); void Beqz(Register rt, MipsLabel* label); @@ -412,6 +461,21 @@ class MipsAssembler FINAL : public Assembler { UNIMPLEMENTED(FATAL) << "Do not use Jump for MIPS"; } + // Create a new literal with a given value. + // NOTE: Force the template parameter to be explicitly specified. + template <typename T> + Literal* NewLiteral(typename Identity<T>::type value) { + static_assert(std::is_integral<T>::value, "T must be an integral type."); + return NewLiteral(sizeof(value), reinterpret_cast<const uint8_t*>(&value)); + } + + // Create a new literal with the given data. + Literal* NewLiteral(size_t size, const uint8_t* data); + + // Load literal using the base register (for R2 only) or using PC-relative loads + // (for R6 only; base_reg must be ZERO). + void LoadLiteral(Register dest_reg, Register base_reg, Literal* literal); + // // Overridden common assembler high-level functionality. // @@ -569,12 +633,22 @@ class MipsAssembler FINAL : public Assembler { // Returns the (always-)current location of a label (can be used in class CodeGeneratorMIPS, // must be used instead of MipsLabel::GetPosition()). - uint32_t GetLabelLocation(MipsLabel* label) const; + uint32_t GetLabelLocation(const MipsLabel* label) const; // Get the final position of a label after local fixup based on the old position // recorded before FinalizeCode(). uint32_t GetAdjustedPosition(uint32_t old_position); + // R2 doesn't have PC-relative addressing, which we need to access literals. We simulate it by + // reading the PC value into a general-purpose register with the NAL instruction and then loading + // literals through this base register. The code generator calls this method (at most once per + // method being compiled) to bind a label to the location for which the PC value is acquired. + // The assembler then computes literal offsets relative to this label. + void BindPcRelBaseLabel(); + + // Note that PC-relative literal loads are handled as pseudo branches because they need very + // similar relocation and may similarly expand in size to accomodate for larger offsets relative + // to PC. enum BranchCondition { kCondLT, kCondGE, @@ -604,18 +678,26 @@ class MipsAssembler FINAL : public Assembler { kUncondBranch, kCondBranch, kCall, + // R2 near literal. + kLiteral, // R2 long branches. kLongUncondBranch, kLongCondBranch, kLongCall, + // R2 far literal. + kFarLiteral, // R6 short branches. kR6UncondBranch, kR6CondBranch, kR6Call, + // R6 near literal. + kR6Literal, // R6 long branches. kR6LongUncondBranch, kR6LongCondBranch, kR6LongCall, + // R6 far literal. + kR6FarLiteral, }; // Bit sizes of offsets defined as enums to minimize chance of typos. enum OffsetBits { @@ -650,17 +732,17 @@ class MipsAssembler FINAL : public Assembler { }; static const BranchInfo branch_info_[/* Type */]; - // Unconditional branch. - Branch(bool is_r6, uint32_t location, uint32_t target); + // Unconditional branch or call. + Branch(bool is_r6, uint32_t location, uint32_t target, bool is_call); // Conditional branch. Branch(bool is_r6, uint32_t location, uint32_t target, BranchCondition condition, Register lhs_reg, - Register rhs_reg = ZERO); - // Call (branch and link) that stores the target address in a given register (i.e. T9). - Branch(bool is_r6, uint32_t location, uint32_t target, Register indirect_reg); + Register rhs_reg); + // Literal. + Branch(bool is_r6, uint32_t location, Register dest_reg, Register base_reg); // Some conditional branches with lhs = rhs are effectively NOPs, while some // others are effectively unconditional. MIPSR6 conditional branches require lhs != rhs. @@ -736,17 +818,18 @@ class MipsAssembler FINAL : public Assembler { // that is allowed for short branches. This is for debugging/testing purposes. // max_short_distance = 0 forces all short branches to become long. // Use the implicit default argument when not debugging/testing. - uint32_t PromoteIfNeeded(uint32_t max_short_distance = std::numeric_limits<uint32_t>::max()); + uint32_t PromoteIfNeeded(uint32_t location, + uint32_t max_short_distance = std::numeric_limits<uint32_t>::max()); // Returns the location of the instruction(s) containing the offset. uint32_t GetOffsetLocation() const; // Calculates and returns the offset ready for encoding in the branch instruction(s). - uint32_t GetOffset() const; + uint32_t GetOffset(uint32_t location) const; private: // Completes branch construction by determining and recording its type. - void InitializeType(bool is_call, bool is_r6); + void InitializeType(bool is_call, bool is_literal, bool is_r6); // Helper for the above. void InitShortOrLong(OffsetBits ofs_size, Type short_type, Type long_type); @@ -776,12 +859,15 @@ class MipsAssembler FINAL : public Assembler { void Buncond(MipsLabel* label); void Bcond(MipsLabel* label, BranchCondition condition, Register lhs, Register rhs = ZERO); - void Call(MipsLabel* label, Register indirect_reg); + void Call(MipsLabel* label); void FinalizeLabeledBranch(MipsLabel* label); Branch* GetBranch(uint32_t branch_id); const Branch* GetBranch(uint32_t branch_id) const; + uint32_t GetBranchLocationOrPcRelBase(const MipsAssembler::Branch* branch) const; + uint32_t GetBranchOrPcRelBaseForEncoding(const MipsAssembler::Branch* branch) const; + void EmitLiterals(); void PromoteBranches(); void EmitBranch(Branch* branch); void EmitBranches(); @@ -816,6 +902,15 @@ class MipsAssembler FINAL : public Assembler { // The current overwrite location. uint32_t overwrite_location_; + // Use std::deque<> for literal labels to allow insertions at the end + // without invalidating pointers and references to existing elements. + ArenaDeque<Literal> literals_; + + // There's no PC-relative addressing on MIPS32R2. So, in order to access literals relative to PC + // we get PC using the NAL instruction. This label marks the position within the assembler buffer + // that PC (from NAL) points to. + MipsLabel pc_rel_base_label_; + // Data for AdjustedPosition(), see the description there. uint32_t last_position_adjustment_; uint32_t last_old_position_; diff --git a/compiler/utils/mips/assembler_mips32r6_test.cc b/compiler/utils/mips/assembler_mips32r6_test.cc index ce92d602d0..49ef272fb0 100644 --- a/compiler/utils/mips/assembler_mips32r6_test.cc +++ b/compiler/utils/mips/assembler_mips32r6_test.cc @@ -48,8 +48,30 @@ class AssemblerMIPS32r6Test : public AssemblerTest<mips::MipsAssembler, return "mips"; } + std::string GetAssemblerCmdName() OVERRIDE { + // We assemble and link for MIPS32R6. See GetAssemblerParameters() for details. + return "gcc"; + } + std::string GetAssemblerParameters() OVERRIDE { - return " --no-warn -32 -march=mips32r6"; + // We assemble and link for MIPS32R6. The reason is that object files produced for MIPS32R6 + // (and MIPS64R6) with the GNU assembler don't have correct final offsets in PC-relative + // branches in the .text section and so they require a relocation pass (there's a relocation + // section, .rela.text, that has the needed info to fix up the branches). + // We use "-modd-spreg" so we can use odd-numbered single precision FPU registers. + // We put the code at address 0x1000000 (instead of 0) to avoid overlapping with the + // .MIPS.abiflags section (there doesn't seem to be a way to suppress its generation easily). + return " -march=mips32r6 -modd-spreg -Wa,--no-warn" + " -Wl,-Ttext=0x1000000 -Wl,-e0x1000000 -nostdlib"; + } + + void Pad(std::vector<uint8_t>& data) OVERRIDE { + // The GNU linker unconditionally pads the code segment with NOPs to a size that is a multiple + // of 16 and there doesn't appear to be a way to suppress this padding. Our assembler doesn't + // pad, so, in order for two assembler outputs to match, we need to match the padding as well. + // NOP is encoded as four zero bytes on MIPS. + size_t pad_size = RoundUp(data.size(), 16u) - data.size(); + data.insert(data.end(), pad_size, 0); } std::string GetDisassembleParameters() OVERRIDE { @@ -272,6 +294,21 @@ TEST_F(AssemblerMIPS32r6Test, Aui) { DriverStr(RepeatRRIb(&mips::MipsAssembler::Aui, 16, "aui ${reg1}, ${reg2}, {imm}"), "Aui"); } +TEST_F(AssemblerMIPS32r6Test, Auipc) { + DriverStr(RepeatRIb(&mips::MipsAssembler::Auipc, 16, "auipc ${reg}, {imm}"), "Auipc"); +} + +TEST_F(AssemblerMIPS32r6Test, Lwpc) { + // Lwpc() takes an unsigned 19-bit immediate, while the GNU assembler needs a signed offset, + // hence the sign extension from bit 18 with `imm - ((imm & 0x40000) << 1)`. + // The GNU assembler also wants the offset to be a multiple of 4, which it will shift right + // by 2 positions when encoding, hence `<< 2` to compensate for that shift. + // We capture the value of the immediate with `.set imm, {imm}` because the value is needed + // twice for the sign extension, but `{imm}` is substituted only once. + const char* code = ".set imm, {imm}\nlw ${reg}, ((imm - ((imm & 0x40000) << 1)) << 2)($pc)"; + DriverStr(RepeatRIb(&mips::MipsAssembler::Lwpc, 19, code), "Lwpc"); +} + TEST_F(AssemblerMIPS32r6Test, Bitswap) { DriverStr(RepeatRR(&mips::MipsAssembler::Bitswap, "bitswap ${reg1}, ${reg2}"), "bitswap"); } @@ -598,12 +635,45 @@ TEST_F(AssemblerMIPS32r6Test, StoreDToOffset) { DriverStr(expected, "StoreDToOffset"); } +TEST_F(AssemblerMIPS32r6Test, LoadFarthestNearLiteral) { + mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678); + __ LoadLiteral(mips::V0, mips::ZERO, literal); + constexpr size_t kAdduCount = 0x3FFDE; + for (size_t i = 0; i != kAdduCount; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + + std::string expected = + "lwpc $v0, 1f\n" + + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "1:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadFarthestNearLiteral"); +} + +TEST_F(AssemblerMIPS32r6Test, LoadNearestFarLiteral) { + mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678); + __ LoadLiteral(mips::V0, mips::ZERO, literal); + constexpr size_t kAdduCount = 0x3FFDF; + for (size_t i = 0; i != kAdduCount; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + + std::string expected = + "1:\n" + "auipc $at, %hi(2f - 1b)\n" + "lw $v0, %lo(2f - 1b)($at)\n" + + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadNearestFarLiteral"); +} + ////////////// // BRANCHES // ////////////// -// TODO: MipsAssembler::Auipc -// MipsAssembler::Addiupc +// TODO: MipsAssembler::Addiupc // MipsAssembler::Bc // MipsAssembler::Jic // MipsAssembler::Jialc diff --git a/compiler/utils/mips/assembler_mips_test.cc b/compiler/utils/mips/assembler_mips_test.cc index a1d6ad6a2f..50a8dc202a 100644 --- a/compiler/utils/mips/assembler_mips_test.cc +++ b/compiler/utils/mips/assembler_mips_test.cc @@ -2293,6 +2293,44 @@ TEST_F(AssemblerMIPSTest, LoadConst32) { DriverStr(expected, "LoadConst32"); } +TEST_F(AssemblerMIPSTest, LoadFarthestNearLiteral) { + mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678); + __ BindPcRelBaseLabel(); + __ LoadLiteral(mips::V0, mips::V1, literal); + constexpr size_t kAddiuCount = 0x1FDE; + for (size_t i = 0; i != kAddiuCount; ++i) { + __ Addiu(mips::A0, mips::A1, 0); + } + + std::string expected = + "1:\n" + "lw $v0, %lo(2f - 1b)($v1)\n" + + RepeatInsn(kAddiuCount, "addiu $a0, $a1, %hi(2f - 1b)\n") + + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadFarthestNearLiteral"); +} + +TEST_F(AssemblerMIPSTest, LoadNearestFarLiteral) { + mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678); + __ BindPcRelBaseLabel(); + __ LoadLiteral(mips::V0, mips::V1, literal); + constexpr size_t kAdduCount = 0x1FDF; + for (size_t i = 0; i != kAdduCount; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + + std::string expected = + "1:\n" + "lui $at, %hi(2f - 1b)\n" + "addu $at, $at, $v1\n" + "lw $v0, %lo(2f - 1b)($at)\n" + + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadNearestFarLiteral"); +} + #undef __ } // namespace art diff --git a/disassembler/disassembler_mips.cc b/disassembler/disassembler_mips.cc index 1f513113ec..769263ec5b 100644 --- a/disassembler/disassembler_mips.cc +++ b/disassembler/disassembler_mips.cc @@ -330,8 +330,10 @@ static const MipsInstruction gMipsInstructions[] = { { kITypeMask, 55u << kOpcodeShift, "ld", "TO", }, { kITypeMask, 56u << kOpcodeShift, "sc", "TO", }, { kITypeMask, 57u << kOpcodeShift, "swc1", "tO", }, + { kJTypeMask, 58u << kOpcodeShift, "balc", "P" }, { kITypeMask | (0x1f << 16), (59u << kOpcodeShift) | (30 << 16), "auipc", "Si" }, { kITypeMask | (0x3 << 19), (59u << kOpcodeShift) | (0 << 19), "addiupc", "Sp" }, + { kITypeMask | (0x3 << 19), (59u << kOpcodeShift) | (1 << 19), "lwpc", "So" }, { kITypeMask, 61u << kOpcodeShift, "sdc1", "tO", }, { kITypeMask | (0x1f << 21), 62u << kOpcodeShift, "jialc", "Ti" }, { kITypeMask | (1 << 21), (62u << kOpcodeShift) | (1 << 21), "bnezc", "Sb" }, // TODO: de-dup? @@ -509,7 +511,15 @@ size_t DisassemblerMips::Dump(std::ostream& os, const uint8_t* instr_ptr) { } } break; - case 'P': // 26-bit offset in bc. + case 'o': // 19-bit offset in lwpc. + { + int32_t offset = (instruction & 0x7ffff) - ((instruction & 0x40000) << 1); + offset <<= 2; + args << FormatInstructionPointer(instr_ptr + offset); + args << StringPrintf(" ; %+d", offset); + } + break; + case 'P': // 26-bit offset in bc and balc. { int32_t offset = (instruction & 0x3ffffff) - ((instruction & 0x2000000) << 1); offset <<= 2; @@ -540,6 +550,7 @@ size_t DisassemblerMips::Dump(std::ostream& os, const uint8_t* instr_ptr) { } } + // TODO: Simplify this once these sequences are simplified in the compiler. // Special cases for sequences of: // pc-relative +/- 2GB branch: // auipc reg, imm diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc index bb35f8dc30..3f031a3318 100644 --- a/oatdump/oatdump.cc +++ b/oatdump/oatdump.cc @@ -1751,7 +1751,7 @@ class ImageDumper { const auto& method_section = state->image_header_.GetMethodsSection(); size_t num_methods = dex_cache->NumResolvedMethods(); if (num_methods != 0u) { - os << "Methods (size=" << num_methods << "):"; + os << "Methods (size=" << num_methods << "):\n"; ScopedIndentation indent2(&state->vios_); auto* resolved_methods = dex_cache->GetResolvedMethods(); for (size_t i = 0, length = dex_cache->NumResolvedMethods(); i < length; ++i) { @@ -1760,10 +1760,12 @@ class ImageDumper { image_pointer_size); size_t run = 0; for (size_t j = i + 1; - j != length && elem == mirror::DexCache::GetElementPtrSize(resolved_methods, - j, - image_pointer_size); - ++j, ++run) {} + j != length && elem == mirror::DexCache::GetElementPtrSize(resolved_methods, + j, + image_pointer_size); + ++j) { + ++run; + } if (run == 0) { os << StringPrintf("%zd: ", i); } else { @@ -1784,17 +1786,20 @@ class ImageDumper { } size_t num_fields = dex_cache->NumResolvedFields(); if (num_fields != 0u) { - os << "Fields (size=" << num_fields << "):"; + os << "Fields (size=" << num_fields << "):\n"; ScopedIndentation indent2(&state->vios_); auto* resolved_fields = dex_cache->GetResolvedFields(); for (size_t i = 0, length = dex_cache->NumResolvedFields(); i < length; ++i) { - auto* elem = mirror::DexCache::GetElementPtrSize(resolved_fields, i, image_pointer_size); + auto* elem = mirror::DexCache::GetElementPtrSize( + resolved_fields, i, image_pointer_size); size_t run = 0; for (size_t j = i + 1; - j != length && elem == mirror::DexCache::GetElementPtrSize(resolved_fields, - j, - image_pointer_size); - ++j, ++run) {} + j != length && elem == mirror::DexCache::GetElementPtrSize(resolved_fields, + j, + image_pointer_size); + ++j) { + ++run; + } if (run == 0) { os << StringPrintf("%zd: ", i); } else { @@ -1813,6 +1818,32 @@ class ImageDumper { os << StringPrintf("%p %s\n", elem, msg.c_str()); } } + size_t num_types = dex_cache->NumResolvedTypes(); + if (num_types != 0u) { + os << "Types (size=" << num_types << "):\n"; + ScopedIndentation indent2(&state->vios_); + auto* resolved_types = dex_cache->GetResolvedTypes(); + for (size_t i = 0; i < num_types; ++i) { + auto* elem = resolved_types[i].Read(); + size_t run = 0; + for (size_t j = i + 1; j != num_types && elem == resolved_types[j].Read(); ++j) { + ++run; + } + if (run == 0) { + os << StringPrintf("%zd: ", i); + } else { + os << StringPrintf("%zd to %zd: ", i, i + run); + i = i + run; + } + std::string msg; + if (elem == nullptr) { + msg = "null"; + } else { + msg = PrettyClass(elem); + } + os << StringPrintf("%p %s\n", elem, msg.c_str()); + } + } } } std::string temp; diff --git a/runtime/arch/mips/instruction_set_features_mips.cc b/runtime/arch/mips/instruction_set_features_mips.cc index 93d79b7763..b3a98667d0 100644 --- a/runtime/arch/mips/instruction_set_features_mips.cc +++ b/runtime/arch/mips/instruction_set_features_mips.cc @@ -76,21 +76,22 @@ const MipsInstructionSetFeatures* MipsInstructionSetFeatures::FromVariant( GetFlagsFromCppDefined(&mips_isa_gte2, &r6, &fpu_32bit); // Override defaults based on variant string. - // Only care if it is R1, R2 or R6 and we assume all CPUs will have a FP unit. + // Only care if it is R1, R2, R5 or R6 and we assume all CPUs will have a FP unit. constexpr const char* kMips32Prefix = "mips32r"; const size_t kPrefixLength = strlen(kMips32Prefix); if (variant.compare(0, kPrefixLength, kMips32Prefix, kPrefixLength) == 0 && variant.size() > kPrefixLength) { - if (variant[kPrefixLength] >= '6') { - fpu_32bit = false; - r6 = true; - } - if (variant[kPrefixLength] >= '2') { - mips_isa_gte2 = true; - } + r6 = (variant[kPrefixLength] >= '6'); + fpu_32bit = (variant[kPrefixLength] < '5'); + mips_isa_gte2 = (variant[kPrefixLength] >= '2'); } else if (variant == "default") { - // Default variant is: smp = true, has fpu, is gte2, is not r6. This is the traditional - // setting. + // Default variant is: smp = true, has FPU, is gte2. This is the traditional setting. + // + // Note, we get FPU bitness and R6-ness from the build (using cpp defines, see above) + // and don't override them because many things depend on the "default" variant being + // sufficient for most purposes. That is, "default" should work for both R2 and R6. + // Use "mips32r#" to get a specific configuration, possibly not matching the runtime + // ISA (e.g. for ISA-specific testing of dex2oat internals). mips_isa_gte2 = true; } else { LOG(WARNING) << "Unexpected CPU variant for Mips32 using defaults: " << variant; diff --git a/runtime/arch/mips/instruction_set_features_mips.h b/runtime/arch/mips/instruction_set_features_mips.h index aac436e468..120dc1c0a5 100644 --- a/runtime/arch/mips/instruction_set_features_mips.h +++ b/runtime/arch/mips/instruction_set_features_mips.h @@ -81,8 +81,19 @@ class MipsInstructionSetFeatures FINAL : public InstructionSetFeatures { private: MipsInstructionSetFeatures(bool smp, bool fpu_32bit, bool mips_isa_gte2, bool r6) - : InstructionSetFeatures(smp), fpu_32bit_(fpu_32bit), mips_isa_gte2_(mips_isa_gte2), r6_(r6) - {} + : InstructionSetFeatures(smp), + fpu_32bit_(fpu_32bit), + mips_isa_gte2_(mips_isa_gte2), + r6_(r6) { + // Sanity checks. + if (r6) { + CHECK(mips_isa_gte2); + CHECK(!fpu_32bit); + } + if (!mips_isa_gte2) { + CHECK(fpu_32bit); + } + } // Bitmap positions for encoding features as a bitmap. enum { diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc index fe7448fa25..3ec8f21ce0 100644 --- a/runtime/class_linker.cc +++ b/runtime/class_linker.cc @@ -2536,6 +2536,10 @@ mirror::Class* ClassLinker::DefineClass(Thread* self, CHECK(h_new_class.Get() != nullptr) << descriptor; CHECK(h_new_class->IsResolved()) << descriptor; + // Update the dex cache of where the class is defined. Inlining depends on having + // this filled. + h_new_class->GetDexCache()->SetResolvedType(h_new_class->GetDexTypeIndex(), h_new_class.Get()); + // Instrumentation may have updated entrypoints for all methods of all // classes. However it could not update methods of this class while we // were loading it. Now the class is resolved, we can update entrypoints diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h index 1b55d2f331..d61d0aa55b 100644 --- a/runtime/entrypoints/entrypoint_utils-inl.h +++ b/runtime/entrypoints/entrypoint_utils-inl.h @@ -39,45 +39,83 @@ namespace art { -template <bool kResolve = true> inline ArtMethod* GetResolvedMethod(ArtMethod* outer_method, const InlineInfo& inline_info, const InlineInfoEncoding& encoding, uint8_t inlining_depth) - SHARED_REQUIRES(Locks::mutator_lock_) { + SHARED_REQUIRES(Locks::mutator_lock_) { + // This method is being used by artQuickResolutionTrampoline, before it sets up + // the passed parameters in a GC friendly way. Therefore we must never be + // suspended while executing it. + ScopedAssertNoThreadSuspension sants(Thread::Current(), __FUNCTION__); + uint32_t method_index = inline_info.GetMethodIndexAtDepth(encoding, inlining_depth); InvokeType invoke_type = static_cast<InvokeType>( inline_info.GetInvokeTypeAtDepth(encoding, inlining_depth)); - ArtMethod* caller = outer_method->GetDexCacheResolvedMethod(method_index, sizeof(void*)); - if (!caller->IsRuntimeMethod()) { - return caller; - } - if (!kResolve) { - return nullptr; + ArtMethod* inlined_method = outer_method->GetDexCacheResolvedMethod(method_index, sizeof(void*)); + if (!inlined_method->IsRuntimeMethod()) { + return inlined_method; } - // The method in the dex cache can be the runtime method responsible for invoking + // The method in the dex cache is the runtime method responsible for invoking // the stub that will then update the dex cache. Therefore, we need to do the // resolution ourselves. - // We first find the class loader of our caller. If it is the outer method, we can directly - // use its class loader. Otherwise, we also need to resolve our caller. - StackHandleScope<2> hs(Thread::Current()); - ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); - MutableHandle<mirror::ClassLoader> class_loader(hs.NewHandle<mirror::Class>(nullptr)); - Handle<mirror::DexCache> dex_cache(hs.NewHandle(outer_method->GetDexCache())); - if (inlining_depth == 0) { - class_loader.Assign(outer_method->GetClassLoader()); + // We first find the dex cache of our caller. If it is the outer method, we can directly + // use its dex cache. Otherwise, we also need to resolve our caller. + ArtMethod* caller = outer_method; + if (inlining_depth != 0) { + caller = GetResolvedMethod(outer_method, + inline_info, + encoding, + inlining_depth - 1); + } + DCHECK_EQ(caller->GetDexCache(), outer_method->GetDexCache()) + << "Compiler only supports inlining calls within the same dex cache"; + const DexFile* dex_file = outer_method->GetDexFile(); + const DexFile::MethodId& method_id = dex_file->GetMethodId(method_index); + + if (inline_info.GetDexPcAtDepth(encoding, inlining_depth) == static_cast<uint32_t>(-1)) { + // "charAt" special case. It is the only non-leaf method we inline across dex files. + if (kIsDebugBuild) { + const char* name = dex_file->StringDataByIdx(method_id.name_idx_); + DCHECK_EQ(std::string(name), "charAt"); + DCHECK_EQ(std::string(dex_file->GetMethodShorty(method_id)), "CI") + << std::string(dex_file->GetMethodShorty(method_id)); + DCHECK_EQ(std::string(dex_file->StringByTypeIdx(method_id.class_idx_)), "Ljava/lang/String;") + << std::string(dex_file->StringByTypeIdx(method_id.class_idx_)); + } + mirror::Class* cls = + Runtime::Current()->GetClassLinker()->GetClassRoot(ClassLinker::kJavaLangString); + // Update the dex cache for future lookups. + caller->GetDexCache()->SetResolvedType(method_id.class_idx_, cls); + inlined_method = cls->FindVirtualMethod("charAt", "(I)C", sizeof(void*)); } else { - caller = GetResolvedMethod<kResolve>(outer_method, - inline_info, - encoding, - inlining_depth - 1); - class_loader.Assign(caller->GetClassLoader()); + mirror::Class* klass = caller->GetDexCache()->GetResolvedType(method_id.class_idx_); + DCHECK_EQ(klass->GetDexCache(), caller->GetDexCache()) + << "Compiler only supports inlining calls within the same dex cache"; + switch (invoke_type) { + case kDirect: + case kStatic: + inlined_method = + klass->FindDirectMethod(klass->GetDexCache(), method_index, sizeof(void*)); + break; + case kSuper: + case kVirtual: + inlined_method = + klass->FindVirtualMethod(klass->GetDexCache(), method_index, sizeof(void*)); + break; + default: + LOG(FATAL) << "Unimplemented inlined invocation type: " << invoke_type; + UNREACHABLE(); + } } - return class_linker->ResolveMethod<ClassLinker::kNoICCECheckForCache>( - *outer_method->GetDexFile(), method_index, dex_cache, class_loader, nullptr, invoke_type); + // Update the dex cache for future lookups. Note that for static methods, this is safe + // when the class is being initialized, as the entrypoint for the ArtMethod is at + // this point still the resolution trampoline. + outer_method->SetDexCacheResolvedMethod(method_index, inlined_method, sizeof(void*)); + return inlined_method; } inline ArtMethod* GetCalleeSaveMethodCaller(Thread* self, Runtime::CalleeSaveType type) diff --git a/runtime/gc/allocation_record.cc b/runtime/gc/allocation_record.cc index d9f1507afe..6489a396d6 100644 --- a/runtime/gc/allocation_record.cc +++ b/runtime/gc/allocation_record.cc @@ -187,7 +187,7 @@ class AllocRecordStackVisitor : public StackVisitor { public: AllocRecordStackVisitor(Thread* thread, size_t max_depth, AllocRecordStackTrace* trace_out) SHARED_REQUIRES(Locks::mutator_lock_) - : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFramesNoResolve), + : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames), max_depth_(max_depth), trace_(trace_out) {} diff --git a/runtime/monitor.cc b/runtime/monitor.cc index 71c866f3d6..396c946014 100644 --- a/runtime/monitor.cc +++ b/runtime/monitor.cc @@ -220,7 +220,7 @@ void Monitor::SetObject(mirror::Object* object) { struct NthCallerWithDexPcVisitor FINAL : public StackVisitor { explicit NthCallerWithDexPcVisitor(Thread* thread, size_t frame) SHARED_REQUIRES(Locks::mutator_lock_) - : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFramesNoResolve), + : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames), method_(nullptr), dex_pc_(0), current_frame_number_(0), diff --git a/runtime/stack.cc b/runtime/stack.cc index a5ca527aa2..1d913f2222 100644 --- a/runtime/stack.cc +++ b/runtime/stack.cc @@ -131,16 +131,10 @@ ArtMethod* StackVisitor::GetMethod() const { const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader(); CodeInfoEncoding encoding = method_header->GetOptimizedCodeInfo().ExtractEncoding(); DCHECK(walk_kind_ != StackWalkKind::kSkipInlinedFrames); - bool allow_resolve = walk_kind_ != StackWalkKind::kIncludeInlinedFramesNoResolve; - return allow_resolve - ? GetResolvedMethod<true>(*GetCurrentQuickFrame(), - inline_info, - encoding.inline_info_encoding, - depth_in_stack_map) - : GetResolvedMethod<false>(*GetCurrentQuickFrame(), - inline_info, - encoding.inline_info_encoding, - depth_in_stack_map); + return GetResolvedMethod(*GetCurrentQuickFrame(), + inline_info, + encoding.inline_info_encoding, + depth_in_stack_map); } else { return *cur_quick_frame_; } @@ -791,8 +785,7 @@ void StackVisitor::WalkStack(bool include_transitions) { cur_oat_quick_method_header_ = method->GetOatQuickMethodHeader(cur_quick_frame_pc_); SanityCheckFrame(); - if ((walk_kind_ == StackWalkKind::kIncludeInlinedFrames || - walk_kind_ == StackWalkKind::kIncludeInlinedFramesNoResolve) + if ((walk_kind_ == StackWalkKind::kIncludeInlinedFrames) && (cur_oat_quick_method_header_ != nullptr) && cur_oat_quick_method_header_->IsOptimized()) { CodeInfo code_info = cur_oat_quick_method_header_->GetOptimizedCodeInfo(); diff --git a/runtime/stack.h b/runtime/stack.h index e77ab4647e..84b665effa 100644 --- a/runtime/stack.h +++ b/runtime/stack.h @@ -568,7 +568,6 @@ class StackVisitor { // when walking the stack. enum class StackWalkKind { kIncludeInlinedFrames, - kIncludeInlinedFramesNoResolve, kSkipInlinedFrames, }; diff --git a/test/458-checker-instruction-simplification/smali/SmaliTests.smali b/test/458-checker-instruction-simplification/smali/SmaliTests.smali index ede599b213..6845961f39 100644 --- a/test/458-checker-instruction-simplification/smali/SmaliTests.smali +++ b/test/458-checker-instruction-simplification/smali/SmaliTests.smali @@ -191,3 +191,139 @@ .end method +## CHECK-START: int SmaliTests.AddSubConst(int) instruction_simplifier (before) +## CHECK-DAG: <<ArgValue:i\d+>> ParameterValue +## CHECK-DAG: <<Const7:i\d+>> IntConstant 7 +## CHECK-DAG: <<Const8:i\d+>> IntConstant 8 +## CHECK-DAG: <<Add:i\d+>> Add [<<ArgValue>>,<<Const7>>] +## CHECK-DAG: <<Sub:i\d+>> Sub [<<Add>>,<<Const8>>] +## CHECK-DAG: Return [<<Sub>>] + +## CHECK-START: int SmaliTests.AddSubConst(int) instruction_simplifier (after) +## CHECK-DAG: <<ArgValue:i\d+>> ParameterValue +## CHECK-DAG: <<ConstM1:i\d+>> IntConstant -1 +## CHECK-DAG: <<Add:i\d+>> Add [<<ArgValue>>,<<ConstM1>>] +## CHECK-DAG: Return [<<Add>>] + +.method public static AddSubConst(I)I + .registers 3 + + .prologue + add-int/lit8 v0, p0, 7 + + const/16 v1, 8 + + sub-int v0, v0, v1 + + return v0 +.end method + +## CHECK-START: int SmaliTests.SubAddConst(int) instruction_simplifier (before) +## CHECK-DAG: <<ArgValue:i\d+>> ParameterValue +## CHECK-DAG: <<Const3:i\d+>> IntConstant 3 +## CHECK-DAG: <<Const4:i\d+>> IntConstant 4 +## CHECK-DAG: <<Sub:i\d+>> Sub [<<ArgValue>>,<<Const3>>] +## CHECK-DAG: <<Add:i\d+>> Add [<<Sub>>,<<Const4>>] +## CHECK-DAG: Return [<<Add>>] + +## CHECK-START: int SmaliTests.SubAddConst(int) instruction_simplifier (after) +## CHECK-DAG: <<ArgValue:i\d+>> ParameterValue +## CHECK-DAG: <<Const1:i\d+>> IntConstant 1 +## CHECK-DAG: <<Add:i\d+>> Add [<<ArgValue>>,<<Const1>>] +## CHECK-DAG: Return [<<Add>>] + +.method public static SubAddConst(I)I + .registers 2 + + .prologue + const/4 v0, 3 + + sub-int v0, p0, v0 + + add-int/lit8 v0, v0, 4 + + return v0 +.end method + +## CHECK-START: int SmaliTests.SubSubConst1(int) instruction_simplifier (before) +## CHECK-DAG: <<ArgValue:i\d+>> ParameterValue +## CHECK-DAG: <<Const9:i\d+>> IntConstant 9 +## CHECK-DAG: <<Const10:i\d+>> IntConstant 10 +## CHECK-DAG: <<Sub1:i\d+>> Sub [<<ArgValue>>,<<Const9>>] +## CHECK-DAG: <<Sub2:i\d+>> Sub [<<Sub1>>,<<Const10>>] +## CHECK-DAG: Return [<<Sub2>>] + +## CHECK-START: int SmaliTests.SubSubConst1(int) instruction_simplifier (after) +## CHECK-DAG: <<ArgValue:i\d+>> ParameterValue +## CHECK-DAG: <<ConstM19:i\d+>> IntConstant -19 +## CHECK-DAG: <<Add:i\d+>> Add [<<ArgValue>>,<<ConstM19>>] +## CHECK-DAG: Return [<<Add>>] + +.method public static SubSubConst1(I)I + .registers 3 + + .prologue + const/16 v1, 9 + + sub-int v0, p0, v1 + + const/16 v1, 10 + + sub-int v0, v0, v1 + + return v0 +.end method + +## CHECK-START: int SmaliTests.SubSubConst2(int) instruction_simplifier (before) +## CHECK-DAG: <<ArgValue:i\d+>> ParameterValue +## CHECK-DAG: <<Const11:i\d+>> IntConstant 11 +## CHECK-DAG: <<Const12:i\d+>> IntConstant 12 +## CHECK-DAG: <<Sub1:i\d+>> Sub [<<Const11>>,<<ArgValue>>] +## CHECK-DAG: <<Sub2:i\d+>> Sub [<<Sub1>>,<<Const12>>] +## CHECK-DAG: Return [<<Sub2>>] + +## CHECK-START: int SmaliTests.SubSubConst2(int) instruction_simplifier (after) +## CHECK-DAG: <<ArgValue:i\d+>> ParameterValue +## CHECK-DAG: <<ConstM1:i\d+>> IntConstant -1 +## CHECK-DAG: <<Sub:i\d+>> Sub [<<ConstM1>>,<<ArgValue>>] +## CHECK-DAG: Return [<<Sub>>] + +.method public static SubSubConst2(I)I + .registers 3 + + .prologue + rsub-int/lit8 v0, p0, 11 + + const/16 v1, 12 + + sub-int v0, v0, v1 + + return v0 +.end method + +## CHECK-START: int SmaliTests.SubSubConst3(int) instruction_simplifier (before) +## CHECK-DAG: <<ArgValue:i\d+>> ParameterValue +## CHECK-DAG: <<Const15:i\d+>> IntConstant 15 +## CHECK-DAG: <<Const16:i\d+>> IntConstant 16 +## CHECK-DAG: <<Sub1:i\d+>> Sub [<<ArgValue>>,<<Const16>>] +## CHECK-DAG: <<Sub2:i\d+>> Sub [<<Const15>>,<<Sub1>>] +## CHECK-DAG: Return [<<Sub2>>] + +## CHECK-START: int SmaliTests.SubSubConst3(int) instruction_simplifier (after) +## CHECK-DAG: <<ArgValue:i\d+>> ParameterValue +## CHECK-DAG: <<Const31:i\d+>> IntConstant 31 +## CHECK-DAG: <<Sub:i\d+>> Sub [<<Const31>>,<<ArgValue>>] +## CHECK-DAG: Return [<<Sub>>] + +.method public static SubSubConst3(I)I + .registers 2 + + .prologue + const/16 v0, 16 + + sub-int v0, p0, v0 + + rsub-int/lit8 v0, v0, 15 + + return v0 +.end method diff --git a/test/458-checker-instruction-simplification/src/Main.java b/test/458-checker-instruction-simplification/src/Main.java index ffce49d2e1..c717eaa85e 100644 --- a/test/458-checker-instruction-simplification/src/Main.java +++ b/test/458-checker-instruction-simplification/src/Main.java @@ -78,6 +78,29 @@ public class Main { return 0 + arg; } + /// CHECK-START: int Main.$noinline$AddAddSubAddConst(int) instruction_simplifier (before) + /// CHECK-DAG: <<ArgValue:i\d+>> ParameterValue + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 + /// CHECK-DAG: <<Const2:i\d+>> IntConstant 2 + /// CHECK-DAG: <<ConstM3:i\d+>> IntConstant -3 + /// CHECK-DAG: <<Const4:i\d+>> IntConstant 4 + /// CHECK-DAG: <<Add1:i\d+>> Add [<<ArgValue>>,<<Const1>>] + /// CHECK-DAG: <<Add2:i\d+>> Add [<<Add1>>,<<Const2>>] + /// CHECK-DAG: <<Add3:i\d+>> Add [<<Add2>>,<<ConstM3>>] + /// CHECK-DAG: <<Add4:i\d+>> Add [<<Add3>>,<<Const4>>] + /// CHECK-DAG: Return [<<Add4>>] + + /// CHECK-START: int Main.$noinline$AddAddSubAddConst(int) instruction_simplifier (after) + /// CHECK-DAG: <<ArgValue:i\d+>> ParameterValue + /// CHECK-DAG: <<Const4:i\d+>> IntConstant 4 + /// CHECK-DAG: <<Add:i\d+>> Add [<<ArgValue>>,<<Const4>>] + /// CHECK-DAG: Return [<<Add>>] + + public static int $noinline$AddAddSubAddConst(int arg) { + if (doThrow) { throw new Error(); } + return arg + 1 + 2 - 3 + 4; + } + /// CHECK-START: int Main.$noinline$AndAllOnes(int) instruction_simplifier (before) /// CHECK-DAG: <<Arg:i\d+>> ParameterValue /// CHECK-DAG: <<ConstF:i\d+>> IntConstant -1 @@ -364,6 +387,27 @@ public class Main { return arg * 128; } + /// CHECK-START: long Main.$noinline$MulMulMulConst(long) instruction_simplifier (before) + /// CHECK-DAG: <<ArgValue:j\d+>> ParameterValue + /// CHECK-DAG: <<Const10:j\d+>> LongConstant 10 + /// CHECK-DAG: <<Const11:j\d+>> LongConstant 11 + /// CHECK-DAG: <<Const12:j\d+>> LongConstant 12 + /// CHECK-DAG: <<Mul1:j\d+>> Mul [<<Const10>>,<<ArgValue>>] + /// CHECK-DAG: <<Mul2:j\d+>> Mul [<<Mul1>>,<<Const11>>] + /// CHECK-DAG: <<Mul3:j\d+>> Mul [<<Mul2>>,<<Const12>>] + /// CHECK-DAG: Return [<<Mul3>>] + + /// CHECK-START: long Main.$noinline$MulMulMulConst(long) instruction_simplifier (after) + /// CHECK-DAG: <<ArgValue:j\d+>> ParameterValue + /// CHECK-DAG: <<Const1320:j\d+>> LongConstant 1320 + /// CHECK-DAG: <<Mul:j\d+>> Mul [<<ArgValue>>,<<Const1320>>] + /// CHECK-DAG: Return [<<Mul>>] + + public static long $noinline$MulMulMulConst(long arg) { + if (doThrow) { throw new Error(); } + return 10 * arg * 11 * 12; + } + /// CHECK-START: int Main.$noinline$Or0(int) instruction_simplifier (before) /// CHECK-DAG: <<Arg:i\d+>> ParameterValue /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 @@ -490,6 +534,63 @@ public class Main { return 0 - arg; } + /// CHECK-START: int Main.$noinline$SubAddConst1(int) instruction_simplifier (before) + /// CHECK-DAG: <<ArgValue:i\d+>> ParameterValue + /// CHECK-DAG: <<Const5:i\d+>> IntConstant 5 + /// CHECK-DAG: <<Const6:i\d+>> IntConstant 6 + /// CHECK-DAG: <<Sub:i\d+>> Sub [<<Const5>>,<<ArgValue>>] + /// CHECK-DAG: <<Add:i\d+>> Add [<<Sub>>,<<Const6>>] + /// CHECK-DAG: Return [<<Add>>] + + /// CHECK-START: int Main.$noinline$SubAddConst1(int) instruction_simplifier (after) + /// CHECK-DAG: <<ArgValue:i\d+>> ParameterValue + /// CHECK-DAG: <<Const11:i\d+>> IntConstant 11 + /// CHECK-DAG: <<Sub:i\d+>> Sub [<<Const11>>,<<ArgValue>>] + /// CHECK-DAG: Return [<<Sub>>] + + public static int $noinline$SubAddConst1(int arg) { + if (doThrow) { throw new Error(); } + return 5 - arg + 6; + } + + /// CHECK-START: int Main.$noinline$SubAddConst2(int) instruction_simplifier (before) + /// CHECK-DAG: <<ArgValue:i\d+>> ParameterValue + /// CHECK-DAG: <<Const14:i\d+>> IntConstant 14 + /// CHECK-DAG: <<Const13:i\d+>> IntConstant 13 + /// CHECK-DAG: <<Add:i\d+>> Add [<<ArgValue>>,<<Const13>>] + /// CHECK-DAG: <<Sub:i\d+>> Sub [<<Const14>>,<<Add>>] + /// CHECK-DAG: Return [<<Sub>>] + + /// CHECK-START: int Main.$noinline$SubAddConst2(int) instruction_simplifier (after) + /// CHECK-DAG: <<ArgValue:i\d+>> ParameterValue + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 + /// CHECK-DAG: <<Sub:i\d+>> Sub [<<Const1>>,<<ArgValue>>] + /// CHECK-DAG: Return [<<Sub>>] + + public static int $noinline$SubAddConst2(int arg) { + if (doThrow) { throw new Error(); } + return 14 - (arg + 13); + } + + /// CHECK-START: long Main.$noinline$SubSubConst(long) instruction_simplifier (before) + /// CHECK-DAG: <<ArgValue:j\d+>> ParameterValue + /// CHECK-DAG: <<Const17:j\d+>> LongConstant 17 + /// CHECK-DAG: <<Const18:j\d+>> LongConstant 18 + /// CHECK-DAG: <<Sub1:j\d+>> Sub [<<Const18>>,<<ArgValue>>] + /// CHECK-DAG: <<Sub2:j\d+>> Sub [<<Const17>>,<<Sub1>>] + /// CHECK-DAG: Return [<<Sub2>>] + + /// CHECK-START: long Main.$noinline$SubSubConst(long) instruction_simplifier (after) + /// CHECK-DAG: <<ArgValue:j\d+>> ParameterValue + /// CHECK-DAG: <<ConstM1:j\d+>> LongConstant -1 + /// CHECK-DAG: <<Add:j\d+>> Add [<<ArgValue>>,<<ConstM1>>] + /// CHECK-DAG: Return [<<Add>>] + + public static long $noinline$SubSubConst(long arg) { + if (doThrow) { throw new Error(); } + return 17 - (18 - arg); + } + /// CHECK-START: long Main.$noinline$UShr0(long) instruction_simplifier (before) /// CHECK-DAG: <<Arg:j\d+>> ParameterValue /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 @@ -1757,6 +1858,17 @@ public class Main { } } + public static int $noinline$runSmaliTestConst(String name, int arg) { + if (doThrow) { throw new Error(); } + try { + Class<?> c = Class.forName("SmaliTests"); + Method m = c.getMethod(name, int.class); + return (Integer) m.invoke(null, arg); + } catch (Exception ex) { + throw new Error(ex); + } + } + /// CHECK-START: int Main.$noinline$intUnnecessaryShiftMasking(int, int) instruction_simplifier (before) /// CHECK: <<Value:i\d+>> ParameterValue /// CHECK: <<Shift:i\d+>> ParameterValue @@ -1863,12 +1975,14 @@ public static void main(String[] args) { int arg = 123456; assertLongEquals(arg, $noinline$Add0(arg)); + assertIntEquals(5, $noinline$AddAddSubAddConst(1)); assertIntEquals(arg, $noinline$AndAllOnes(arg)); assertLongEquals(arg, $noinline$Div1(arg)); assertIntEquals(-arg, $noinline$DivN1(arg)); assertLongEquals(arg, $noinline$Mul1(arg)); assertIntEquals(-arg, $noinline$MulN1(arg)); assertLongEquals((128 * arg), $noinline$MulPowerOfTwo128(arg)); + assertLongEquals(2640, $noinline$MulMulMulConst(2)); assertIntEquals(arg, $noinline$Or0(arg)); assertLongEquals(arg, $noinline$OrSame(arg)); assertIntEquals(arg, $noinline$Shl0(arg)); @@ -1876,6 +1990,9 @@ public static void main(String[] args) { assertLongEquals(arg, $noinline$Shr64(arg)); assertLongEquals(arg, $noinline$Sub0(arg)); assertIntEquals(-arg, $noinline$SubAliasNeg(arg)); + assertIntEquals(9, $noinline$SubAddConst1(2)); + assertIntEquals(-2, $noinline$SubAddConst2(3)); + assertLongEquals(3, $noinline$SubSubConst(4)); assertLongEquals(arg, $noinline$UShr0(arg)); assertIntEquals(arg, $noinline$Xor0(arg)); assertIntEquals(~arg, $noinline$XorAllOnes(arg)); @@ -2011,6 +2128,11 @@ public static void main(String[] args) { } } + assertIntEquals(0, $noinline$runSmaliTestConst("AddSubConst", 1)); + assertIntEquals(3, $noinline$runSmaliTestConst("SubAddConst", 2)); + assertIntEquals(-16, $noinline$runSmaliTestConst("SubSubConst1", 3)); + assertIntEquals(-5, $noinline$runSmaliTestConst("SubSubConst2", 4)); + assertIntEquals(26, $noinline$runSmaliTestConst("SubSubConst3", 5)); assertIntEquals(0x5e6f7808, $noinline$intUnnecessaryShiftMasking(0xabcdef01, 3)); assertIntEquals(0x5e6f7808, $noinline$intUnnecessaryShiftMasking(0xabcdef01, 3 + 32)); assertLongEquals(0xffffffffffffeaf3L, $noinline$longUnnecessaryShiftMasking(0xabcdef0123456789L, 50)); |