diff options
author | 2016-05-10 16:08:05 -0700 | |
---|---|---|
committer | 2016-07-01 14:10:14 -0700 | |
commit | e3fb245fbdb5e91cf8a9750504df40bd629e0080 (patch) | |
tree | a3882db92b7942b2edd6add3090b5c875fef2d09 | |
parent | 1fdb340de4e608a88e8683c857cad5d0da2c16de (diff) |
MIPS32: Improve method invocation
Improvements include:
- CodeGeneratorMIPS::GenerateStaticOrDirectCall() supports:
- MethodLoadKind::kDirectAddressWithFixup (via literals)
- CodePtrLocation::kCallDirectWithFixup (via literals)
- MethodLoadKind::kDexCachePcRelative
- 32-bit literals to support the above (not ready for general-
purpose applications yet because RA is not saved in leaf
methods, but is clobbered on MIPS32R2 when simulating
PC-relative addressing (MIPS32R6 is OK because it has
PC-relative addressing with the lwpc instruction))
- shorter instruction sequences for recursive static/direct
calls
Tested:
- test-art-host-gtest
- test-art-target-gtest and test-art-target-run-test-optimizing on:
- MIPS32R2 QEMU
- CI20 board
- MIPS32R6 (2nd arch) QEMU
Change-Id: Id5b137ad32d5590487fd154c9a01d3b3e7e044ff
22 files changed, 1450 insertions, 102 deletions
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk index 424aa7a7eb..009933d2b7 100644 --- a/build/Android.gtest.mk +++ b/build/Android.gtest.mk @@ -318,6 +318,8 @@ COMPILER_GTEST_COMMON_SRC_FILES_arm64 := \ compiler/utils/arm64/managed_register_arm64_test.cc \ COMPILER_GTEST_COMMON_SRC_FILES_mips := \ + compiler/linker/mips/relative_patcher_mips_test.cc \ + compiler/linker/mips/relative_patcher_mips32r6_test.cc \ COMPILER_GTEST_COMMON_SRC_FILES_mips64 := \ diff --git a/compiler/Android.mk b/compiler/Android.mk index e9c22d2b0f..4ec7d721f3 100644 --- a/compiler/Android.mk +++ b/compiler/Android.mk @@ -113,8 +113,11 @@ LIBART_COMPILER_SRC_FILES_arm64 := \ LIBART_COMPILER_SRC_FILES_mips := \ jni/quick/mips/calling_convention_mips.cc \ + linker/mips/relative_patcher_mips.cc \ optimizing/code_generator_mips.cc \ + optimizing/dex_cache_array_fixups_mips.cc \ optimizing/intrinsics_mips.cc \ + optimizing/pc_relative_fixups_mips.cc \ utils/mips/assembler_mips.cc \ utils/mips/managed_register_mips.cc \ diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index e52dda35bb..474530a033 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -394,7 +394,7 @@ CompilerDriver::CompilerDriver( dump_passes_(dump_passes), timings_logger_(timer), compiler_context_(nullptr), - support_boot_image_fixup_(instruction_set != kMips && instruction_set != kMips64), + support_boot_image_fixup_(instruction_set != kMips64), dex_files_for_oat_file_(nullptr), compiled_method_storage_(swap_fd), profile_compilation_info_(profile_compilation_info), diff --git a/compiler/linker/mips/relative_patcher_mips.cc b/compiler/linker/mips/relative_patcher_mips.cc new file mode 100644 index 0000000000..7c0423b635 --- /dev/null +++ b/compiler/linker/mips/relative_patcher_mips.cc @@ -0,0 +1,109 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "linker/mips/relative_patcher_mips.h" + +#include "compiled_method.h" + +namespace art { +namespace linker { + +uint32_t MipsRelativePatcher::ReserveSpace( + uint32_t offset, + const CompiledMethod* compiled_method ATTRIBUTE_UNUSED, + MethodReference method_ref ATTRIBUTE_UNUSED) { + return offset; // No space reserved; no limit on relative call distance. +} + +uint32_t MipsRelativePatcher::ReserveSpaceEnd(uint32_t offset) { + return offset; // No space reserved; no limit on relative call distance. +} + +uint32_t MipsRelativePatcher::WriteThunks(OutputStream* out ATTRIBUTE_UNUSED, uint32_t offset) { + return offset; // No thunks added; no limit on relative call distance. +} + +void MipsRelativePatcher::PatchCall(std::vector<uint8_t>* code ATTRIBUTE_UNUSED, + uint32_t literal_offset ATTRIBUTE_UNUSED, + uint32_t patch_offset ATTRIBUTE_UNUSED, + uint32_t target_offset ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL) << "PatchCall unimplemented on MIPS"; +} + +void MipsRelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code, + const LinkerPatch& patch, + uint32_t patch_offset, + uint32_t target_offset) { + uint32_t anchor_literal_offset = patch.PcInsnOffset(); + uint32_t literal_offset = patch.LiteralOffset(); + + // Basic sanity checks. + if (is_r6) { + DCHECK_GE(code->size(), 8u); + DCHECK_LE(literal_offset, code->size() - 8u); + DCHECK_EQ(literal_offset, anchor_literal_offset); + // AUIPC reg, offset_high + DCHECK_EQ((*code)[literal_offset + 0], 0x34); + DCHECK_EQ((*code)[literal_offset + 1], 0x12); + DCHECK_EQ(((*code)[literal_offset + 2] & 0x1F), 0x1E); + DCHECK_EQ(((*code)[literal_offset + 3] & 0xFC), 0xEC); + // ADDIU reg, reg, offset_low + DCHECK_EQ((*code)[literal_offset + 4], 0x78); + DCHECK_EQ((*code)[literal_offset + 5], 0x56); + DCHECK_EQ(((*code)[literal_offset + 7] & 0xFC), 0x24); + } else { + DCHECK_GE(code->size(), 16u); + DCHECK_LE(literal_offset, code->size() - 12u); + DCHECK_GE(literal_offset, 4u); + DCHECK_EQ(literal_offset + 4u, anchor_literal_offset); + // NAL + DCHECK_EQ((*code)[literal_offset - 4], 0x00); + DCHECK_EQ((*code)[literal_offset - 3], 0x00); + DCHECK_EQ((*code)[literal_offset - 2], 0x10); + DCHECK_EQ((*code)[literal_offset - 1], 0x04); + // LUI reg, offset_high + DCHECK_EQ((*code)[literal_offset + 0], 0x34); + DCHECK_EQ((*code)[literal_offset + 1], 0x12); + DCHECK_EQ(((*code)[literal_offset + 2] & 0xE0), 0x00); + DCHECK_EQ((*code)[literal_offset + 3], 0x3C); + // ORI reg, reg, offset_low + DCHECK_EQ((*code)[literal_offset + 4], 0x78); + DCHECK_EQ((*code)[literal_offset + 5], 0x56); + DCHECK_EQ(((*code)[literal_offset + 7] & 0xFC), 0x34); + // ADDU reg, reg, RA + DCHECK_EQ((*code)[literal_offset + 8], 0x21); + DCHECK_EQ(((*code)[literal_offset + 9] & 0x07), 0x00); + DCHECK_EQ(((*code)[literal_offset + 10] & 0x1F), 0x1F); + DCHECK_EQ(((*code)[literal_offset + 11] & 0xFC), 0x00); + } + + // Apply patch. + uint32_t anchor_offset = patch_offset - literal_offset + anchor_literal_offset; + uint32_t diff = target_offset - anchor_offset + kDexCacheArrayLwOffset; + if (is_r6) { + diff += (diff & 0x8000) << 1; // Account for sign extension in ADDIU. + } + + // LUI reg, offset_high / AUIPC reg, offset_high + (*code)[literal_offset + 0] = static_cast<uint8_t>(diff >> 16); + (*code)[literal_offset + 1] = static_cast<uint8_t>(diff >> 24); + // ORI reg, reg, offset_low / ADDIU reg, reg, offset_low + (*code)[literal_offset + 4] = static_cast<uint8_t>(diff >> 0); + (*code)[literal_offset + 5] = static_cast<uint8_t>(diff >> 8); +} + +} // namespace linker +} // namespace art diff --git a/compiler/linker/mips/relative_patcher_mips.h b/compiler/linker/mips/relative_patcher_mips.h new file mode 100644 index 0000000000..4ff2f2f24f --- /dev/null +++ b/compiler/linker/mips/relative_patcher_mips.h @@ -0,0 +1,57 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_LINKER_MIPS_RELATIVE_PATCHER_MIPS_H_ +#define ART_COMPILER_LINKER_MIPS_RELATIVE_PATCHER_MIPS_H_ + +#include "linker/relative_patcher.h" +#include "arch/mips/instruction_set_features_mips.h" + +namespace art { +namespace linker { + +class MipsRelativePatcher FINAL : public RelativePatcher { + public: + explicit MipsRelativePatcher(const MipsInstructionSetFeatures* features) + : is_r6(features->IsR6()) {} + + uint32_t ReserveSpace(uint32_t offset, + const CompiledMethod* compiled_method, + MethodReference method_ref) OVERRIDE; + uint32_t ReserveSpaceEnd(uint32_t offset) OVERRIDE; + uint32_t WriteThunks(OutputStream* out, uint32_t offset) OVERRIDE; + void PatchCall(std::vector<uint8_t>* code, + uint32_t literal_offset, + uint32_t patch_offset, + uint32_t target_offset) OVERRIDE; + void PatchPcRelativeReference(std::vector<uint8_t>* code, + const LinkerPatch& patch, + uint32_t patch_offset, + uint32_t target_offset) OVERRIDE; + + private: + // We'll maximize the range of a single load instruction for dex cache array accesses + // by aligning offset -32768 with the offset of the first used element. + static constexpr uint32_t kDexCacheArrayLwOffset = 0x8000; + bool is_r6; + + DISALLOW_COPY_AND_ASSIGN(MipsRelativePatcher); +}; + +} // namespace linker +} // namespace art + +#endif // ART_COMPILER_LINKER_MIPS_RELATIVE_PATCHER_MIPS_H_ diff --git a/compiler/linker/mips/relative_patcher_mips32r6_test.cc b/compiler/linker/mips/relative_patcher_mips32r6_test.cc new file mode 100644 index 0000000000..0f1dcbcbf1 --- /dev/null +++ b/compiler/linker/mips/relative_patcher_mips32r6_test.cc @@ -0,0 +1,68 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "linker/relative_patcher_test.h" +#include "linker/mips/relative_patcher_mips.h" + +namespace art { +namespace linker { + +// We'll maximize the range of a single load instruction for dex cache array accesses +// by aligning offset -32768 with the offset of the first used element. +static constexpr uint32_t kDexCacheArrayLwOffset = 0x8000; + +class Mips32r6RelativePatcherTest : public RelativePatcherTest { + public: + Mips32r6RelativePatcherTest() : RelativePatcherTest(kMips, "mips32r6") {} + + protected: + uint32_t GetMethodOffset(uint32_t method_idx) { + auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx)); + CHECK(result.first); + return result.second; + } +}; + +TEST_F(Mips32r6RelativePatcherTest, DexCacheReference) { + dex_cache_arrays_begin_ = 0x12345678; + constexpr size_t kElementOffset = 0x1234; + static const uint8_t raw_code[] = { + 0x34, 0x12, 0x5E, 0xEE, // auipc s2, high(diff); placeholder = 0x1234 + 0x78, 0x56, 0x52, 0x26, // addiu s2, s2, low(diff); placeholder = 0x5678 + }; + constexpr uint32_t literal_offset = 0; // At auipc (where patching starts). + constexpr uint32_t anchor_offset = literal_offset; // At auipc (where PC+0 points). + ArrayRef<const uint8_t> code(raw_code); + LinkerPatch patches[] = { + LinkerPatch::DexCacheArrayPatch(literal_offset, nullptr, anchor_offset, kElementOffset), + }; + AddCompiledMethod(MethodRef(1u), code, ArrayRef<const LinkerPatch>(patches)); + Link(); + + auto result = method_offset_map_.FindMethodOffset(MethodRef(1u)); + ASSERT_TRUE(result.first); + uint32_t diff = dex_cache_arrays_begin_ + kElementOffset - (result.second + anchor_offset) + + kDexCacheArrayLwOffset; + diff += (diff & 0x8000) << 1; // Account for sign extension in addiu. + static const uint8_t expected_code[] = { + static_cast<uint8_t>(diff >> 16), static_cast<uint8_t>(diff >> 24), 0x5E, 0xEE, + static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8), 0x52, 0x26, + }; + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); +} + +} // namespace linker +} // namespace art diff --git a/compiler/linker/mips/relative_patcher_mips_test.cc b/compiler/linker/mips/relative_patcher_mips_test.cc new file mode 100644 index 0000000000..8391b5352a --- /dev/null +++ b/compiler/linker/mips/relative_patcher_mips_test.cc @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "linker/relative_patcher_test.h" +#include "linker/mips/relative_patcher_mips.h" + +namespace art { +namespace linker { + +// We'll maximize the range of a single load instruction for dex cache array accesses +// by aligning offset -32768 with the offset of the first used element. +static constexpr uint32_t kDexCacheArrayLwOffset = 0x8000; + +class MipsRelativePatcherTest : public RelativePatcherTest { + public: + MipsRelativePatcherTest() : RelativePatcherTest(kMips, "mips32r2") {} + + protected: + uint32_t GetMethodOffset(uint32_t method_idx) { + auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx)); + CHECK(result.first); + return result.second; + } +}; + +TEST_F(MipsRelativePatcherTest, DexCacheReference) { + dex_cache_arrays_begin_ = 0x12345678; + constexpr size_t kElementOffset = 0x1234; + static const uint8_t raw_code[] = { + 0x00, 0x00, 0x10, 0x04, // nal + 0x34, 0x12, 0x12, 0x3C, // lui s2, high(diff); placeholder = 0x1234 + 0x78, 0x56, 0x52, 0x36, // ori s2, s2, low(diff); placeholder = 0x5678 + 0x21, 0x90, 0x5F, 0x02, // addu s2, s2, ra + }; + constexpr uint32_t literal_offset = 4; // At lui (where patching starts). + constexpr uint32_t anchor_offset = 8; // At ori (where PC+0 points). + ArrayRef<const uint8_t> code(raw_code); + LinkerPatch patches[] = { + LinkerPatch::DexCacheArrayPatch(literal_offset, nullptr, anchor_offset, kElementOffset), + }; + AddCompiledMethod(MethodRef(1u), code, ArrayRef<const LinkerPatch>(patches)); + Link(); + + auto result = method_offset_map_.FindMethodOffset(MethodRef(1u)); + ASSERT_TRUE(result.first); + uint32_t diff = dex_cache_arrays_begin_ + kElementOffset - (result.second + anchor_offset) + + kDexCacheArrayLwOffset; + static const uint8_t expected_code[] = { + 0x00, 0x00, 0x10, 0x04, + static_cast<uint8_t>(diff >> 16), static_cast<uint8_t>(diff >> 24), 0x12, 0x3C, + static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8), 0x52, 0x36, + 0x21, 0x90, 0x5F, 0x02, + }; + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); +} + +} // namespace linker +} // namespace art diff --git a/compiler/linker/relative_patcher.cc b/compiler/linker/relative_patcher.cc index 3a229831d0..77655947fd 100644 --- a/compiler/linker/relative_patcher.cc +++ b/compiler/linker/relative_patcher.cc @@ -22,6 +22,9 @@ #ifdef ART_ENABLE_CODEGEN_arm64 #include "linker/arm64/relative_patcher_arm64.h" #endif +#ifdef ART_ENABLE_CODEGEN_mips +#include "linker/mips/relative_patcher_mips.h" +#endif #ifdef ART_ENABLE_CODEGEN_x86 #include "linker/x86/relative_patcher_x86.h" #endif @@ -95,6 +98,11 @@ std::unique_ptr<RelativePatcher> RelativePatcher::Create( return std::unique_ptr<RelativePatcher>( new Arm64RelativePatcher(provider, features->AsArm64InstructionSetFeatures())); #endif +#ifdef ART_ENABLE_CODEGEN_mips + case kMips: + return std::unique_ptr<RelativePatcher>( + new MipsRelativePatcher(features->AsMipsInstructionSetFeatures())); +#endif default: return std::unique_ptr<RelativePatcher>(new RelativePatcherNone); } diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 4d44c18dcf..37f1c35c50 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -39,6 +39,10 @@ namespace mips { static constexpr int kCurrentMethodStackOffset = 0; static constexpr Register kMethodRegisterArgument = A0; +// We'll maximize the range of a single load instruction for dex cache array accesses +// by aligning offset -32768 with the offset of the first used element. +static constexpr uint32_t kDexCacheArrayLwOffset = 0x8000; + Location MipsReturnLocation(Primitive::Type return_type) { switch (return_type) { case Primitive::kPrimBoolean: @@ -477,7 +481,12 @@ CodeGeneratorMIPS::CodeGeneratorMIPS(HGraph* graph, instruction_visitor_(graph, this), move_resolver_(graph->GetArena(), this), assembler_(graph->GetArena(), &isa_features), - isa_features_(isa_features) { + isa_features_(isa_features), + method_patches_(MethodReferenceComparator(), + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + call_patches_(MethodReferenceComparator(), + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { // Save RA (containing the return address) to mimic Quick. AddAllocatedRegister(Location::RegisterLocation(RA)); } @@ -948,6 +957,71 @@ void CodeGeneratorMIPS::AddLocationAsTemp(Location location, LocationSummary* lo } } +void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { + DCHECK(linker_patches->empty()); + size_t size = + method_patches_.size() + + call_patches_.size() + + pc_relative_dex_cache_patches_.size(); + linker_patches->reserve(size); + for (const auto& entry : method_patches_) { + const MethodReference& target_method = entry.first; + Literal* literal = entry.second; + DCHECK(literal->GetLabel()->IsBound()); + uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel()); + linker_patches->push_back(LinkerPatch::MethodPatch(literal_offset, + target_method.dex_file, + target_method.dex_method_index)); + } + for (const auto& entry : call_patches_) { + const MethodReference& target_method = entry.first; + Literal* literal = entry.second; + DCHECK(literal->GetLabel()->IsBound()); + uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel()); + linker_patches->push_back(LinkerPatch::CodePatch(literal_offset, + target_method.dex_file, + target_method.dex_method_index)); + } + for (const PcRelativePatchInfo& info : pc_relative_dex_cache_patches_) { + const DexFile& dex_file = info.target_dex_file; + size_t base_element_offset = info.offset_or_index; + DCHECK(info.high_label.IsBound()); + uint32_t high_offset = __ GetLabelLocation(&info.high_label); + DCHECK(info.pc_rel_label.IsBound()); + uint32_t pc_rel_offset = __ GetLabelLocation(&info.pc_rel_label); + linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(high_offset, + &dex_file, + pc_rel_offset, + base_element_offset)); + } +} + +CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeDexCacheArrayPatch( + const DexFile& dex_file, uint32_t element_offset) { + return NewPcRelativePatch(dex_file, element_offset, &pc_relative_dex_cache_patches_); +} + +CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativePatch( + const DexFile& dex_file, uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches) { + patches->emplace_back(dex_file, offset_or_index); + return &patches->back(); +} + +Literal* CodeGeneratorMIPS::DeduplicateMethodLiteral(MethodReference target_method, + MethodToLiteralMap* map) { + return map->GetOrCreate( + target_method, + [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); }); +} + +Literal* CodeGeneratorMIPS::DeduplicateMethodAddressLiteral(MethodReference target_method) { + return DeduplicateMethodLiteral(target_method, &method_patches_); +} + +Literal* CodeGeneratorMIPS::DeduplicateMethodCodeLiteral(MethodReference target_method) { + return DeduplicateMethodLiteral(target_method, &call_patches_); +} + void CodeGeneratorMIPS::MarkGCCard(Register object, Register value) { MipsLabel done; Register card = AT; @@ -3741,12 +3815,38 @@ void LocationsBuilderMIPS::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invo // art::PrepareForRegisterAllocation. DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); + HInvokeStaticOrDirect::MethodLoadKind method_load_kind = invoke->GetMethodLoadKind(); + HInvokeStaticOrDirect::CodePtrLocation code_ptr_location = invoke->GetCodePtrLocation(); + bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); + + // kDirectAddressWithFixup and kCallDirectWithFixup need no extra input on R6 because + // R6 has PC-relative addressing. + bool has_extra_input = !isR6 && + ((method_load_kind == HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup) || + (code_ptr_location == HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup)); + + if (invoke->HasPcRelativeDexCache()) { + // kDexCachePcRelative is mutually exclusive with + // kDirectAddressWithFixup/kCallDirectWithFixup. + CHECK(!has_extra_input); + has_extra_input = true; + } + IntrinsicLocationsBuilderMIPS intrinsic(codegen_); if (intrinsic.TryDispatch(invoke)) { + if (invoke->GetLocations()->CanCall() && has_extra_input) { + invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any()); + } return; } HandleInvoke(invoke); + + // Add the extra input register if either the dex cache array base register + // or the PC-relative base register for accessing literals is needed. + if (has_extra_input) { + invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister()); + } } static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorMIPS* codegen) { @@ -3771,42 +3871,103 @@ HLoadClass::LoadKind CodeGeneratorMIPS::GetSupportedLoadClassKind( return HLoadClass::LoadKind::kDexCacheViaMethod; } +Register CodeGeneratorMIPS::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, + Register temp) { + CHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u); + Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); + if (!invoke->GetLocations()->Intrinsified()) { + return location.AsRegister<Register>(); + } + // For intrinsics we allow any location, so it may be on the stack. + if (!location.IsRegister()) { + __ LoadFromOffset(kLoadWord, temp, SP, location.GetStackIndex()); + return temp; + } + // For register locations, check if the register was saved. If so, get it from the stack. + // Note: There is a chance that the register was saved but not overwritten, so we could + // save one load. However, since this is just an intrinsic slow path we prefer this + // simple and more robust approach rather that trying to determine if that's the case. + SlowPathCode* slow_path = GetCurrentSlowPath(); + DCHECK(slow_path != nullptr); // For intrinsified invokes the call is emitted on the slow path. + if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) { + int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>()); + __ LoadFromOffset(kLoadWord, temp, SP, stack_offset); + return temp; + } + return location.AsRegister<Register>(); +} + HInvokeStaticOrDirect::DispatchInfo CodeGeneratorMIPS::GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, MethodReference target_method ATTRIBUTE_UNUSED) { - switch (desired_dispatch_info.method_load_kind) { + HInvokeStaticOrDirect::DispatchInfo dispatch_info = desired_dispatch_info; + // We disable PC-relative load when there is an irreducible loop, as the optimization + // is incompatible with it. + bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops(); + bool fallback_load = true; + bool fallback_call = true; + switch (dispatch_info.method_load_kind) { case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup: case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: - // TODO: Implement these types. For the moment, we fall back to kDexCacheViaMethod. - return HInvokeStaticOrDirect::DispatchInfo { - HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod, - HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod, - 0u, - 0u - }; + fallback_load = has_irreducible_loops; + break; default: + fallback_load = false; break; } - switch (desired_dispatch_info.code_ptr_location) { + switch (dispatch_info.code_ptr_location) { case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup: + fallback_call = has_irreducible_loops; + break; case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: - // TODO: Implement these types. For the moment, we fall back to kCallArtMethod. - return HInvokeStaticOrDirect::DispatchInfo { - desired_dispatch_info.method_load_kind, - HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod, - desired_dispatch_info.method_load_data, - 0u - }; + // TODO: Implement this type. + break; default: - return desired_dispatch_info; + fallback_call = false; + break; } + if (fallback_load) { + dispatch_info.method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod; + dispatch_info.method_load_data = 0; + } + if (fallback_call) { + dispatch_info.code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod; + dispatch_info.direct_code_ptr = 0; + } + return dispatch_info; } void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) { // All registers are assumed to be correctly set up per the calling convention. - Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. - switch (invoke->GetMethodLoadKind()) { + HInvokeStaticOrDirect::MethodLoadKind method_load_kind = invoke->GetMethodLoadKind(); + HInvokeStaticOrDirect::CodePtrLocation code_ptr_location = invoke->GetCodePtrLocation(); + bool isR6 = isa_features_.IsR6(); + // kDirectAddressWithFixup and kCallDirectWithFixup have no extra input on R6 because + // R6 has PC-relative addressing. + bool has_extra_input = invoke->HasPcRelativeDexCache() || + (!isR6 && + ((method_load_kind == HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup) || + (code_ptr_location == HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup))); + Register base_reg = has_extra_input + ? GetInvokeStaticOrDirectExtraParameter(invoke, temp.AsRegister<Register>()) + : ZERO; + + // For better instruction scheduling we load the direct code pointer before the method pointer. + switch (code_ptr_location) { + case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect: + // T9 = invoke->GetDirectCodePtr(); + __ LoadConst32(T9, invoke->GetDirectCodePtr()); + break; + case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup: + // T9 = code address from literal pool with link-time patch. + __ LoadLiteral(T9, base_reg, DeduplicateMethodCodeLiteral(invoke->GetTargetMethod())); + break; + default: + break; + } + + switch (method_load_kind) { case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: // temp = thread->string_init_entrypoint __ LoadFromOffset(kLoadWord, @@ -3821,11 +3982,18 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke __ LoadConst32(temp.AsRegister<Register>(), invoke->GetMethodAddress()); break; case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup: - case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: - // TODO: Implement these types. - // Currently filtered out by GetSupportedInvokeStaticOrDirectDispatch(). - LOG(FATAL) << "Unsupported"; - UNREACHABLE(); + __ LoadLiteral(temp.AsRegister<Register>(), + base_reg, + DeduplicateMethodAddressLiteral(invoke->GetTargetMethod())); + break; + case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: { + HMipsDexCacheArraysBase* base = + invoke->InputAt(invoke->GetSpecialInputIndex())->AsMipsDexCacheArraysBase(); + int32_t offset = + invoke->GetDexCacheArrayOffset() - base->GetElementOffset() - kDexCacheArrayLwOffset; + __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), base_reg, offset); + break; + } case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); Register reg = temp.AsRegister<Register>(); @@ -3856,20 +4024,19 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke } } - switch (invoke->GetCodePtrLocation()) { + switch (code_ptr_location) { case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf: - __ Jalr(&frame_entry_label_, T9); + __ Bal(&frame_entry_label_); break; case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect: - // LR = invoke->GetDirectCodePtr(); - __ LoadConst32(T9, invoke->GetDirectCodePtr()); - // LR() + case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup: + // T9 prepared above for better instruction scheduling. + // T9() __ Jalr(T9); __ Nop(); break; - case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup: case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: - // TODO: Implement these types. + // TODO: Implement this type. // Currently filtered out by GetSupportedInvokeStaticOrDirectDispatch(). LOG(FATAL) << "Unsupported"; UNREACHABLE(); @@ -5140,6 +5307,57 @@ void InstructionCodeGeneratorMIPS::VisitPackedSwitch(HPackedSwitch* switch_instr } } +void LocationsBuilderMIPS::VisitMipsComputeBaseMethodAddress( + HMipsComputeBaseMethodAddress* insn) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(insn, LocationSummary::kNoCall); + locations->SetOut(Location::RequiresRegister()); +} + +void InstructionCodeGeneratorMIPS::VisitMipsComputeBaseMethodAddress( + HMipsComputeBaseMethodAddress* insn) { + LocationSummary* locations = insn->GetLocations(); + Register reg = locations->Out().AsRegister<Register>(); + + CHECK(!codegen_->GetInstructionSetFeatures().IsR6()); + + // Generate a dummy PC-relative call to obtain PC. + __ Nal(); + // Grab the return address off RA. + __ Move(reg, RA); + + // Remember this offset (the obtained PC value) for later use with constant area. + __ BindPcRelBaseLabel(); +} + +void LocationsBuilderMIPS::VisitMipsDexCacheArraysBase(HMipsDexCacheArraysBase* base) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(base); + locations->SetOut(Location::RequiresRegister()); +} + +void InstructionCodeGeneratorMIPS::VisitMipsDexCacheArraysBase(HMipsDexCacheArraysBase* base) { + Register reg = base->GetLocations()->Out().AsRegister<Register>(); + CodeGeneratorMIPS::PcRelativePatchInfo* info = + codegen_->NewPcRelativeDexCacheArrayPatch(base->GetDexFile(), base->GetElementOffset()); + + if (codegen_->GetInstructionSetFeatures().IsR6()) { + __ Bind(&info->high_label); + __ Bind(&info->pc_rel_label); + // Add a 32-bit offset to PC. + __ Auipc(reg, /* placeholder */ 0x1234); + __ Addiu(reg, reg, /* placeholder */ 0x5678); + } else { + // Generate a dummy PC-relative call to obtain PC. + __ Nal(); + __ Bind(&info->high_label); + __ Lui(reg, /* placeholder */ 0x1234); + __ Bind(&info->pc_rel_label); + __ Ori(reg, reg, /* placeholder */ 0x5678); + // Add a 32-bit offset to PC. + __ Addu(reg, reg, RA); + } +} + void LocationsBuilderMIPS::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { // The trampoline uses the same calling convention as dex calling conventions, // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h index 6487f28ad5..08f74c04d1 100644 --- a/compiler/optimizing/code_generator_mips.h +++ b/compiler/optimizing/code_generator_mips.h @@ -285,6 +285,9 @@ class CodeGeneratorMIPS : public CodeGenerator { MipsAssembler* GetAssembler() OVERRIDE { return &assembler_; } const MipsAssembler& GetAssembler() const OVERRIDE { return assembler_; } + // Emit linker patches. + void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE; + void MarkGCCard(Register object, Register value); // Register allocation. @@ -372,7 +375,39 @@ class CodeGeneratorMIPS : public CodeGenerator { void GenerateImplicitNullCheck(HNullCheck* instruction); void GenerateExplicitNullCheck(HNullCheck* instruction); + // The PcRelativePatchInfo is used for PC-relative addressing of dex cache arrays + // and boot image strings. The only difference is the interpretation of the offset_or_index. + struct PcRelativePatchInfo { + PcRelativePatchInfo(const DexFile& dex_file, uint32_t off_or_idx) + : target_dex_file(dex_file), offset_or_index(off_or_idx) { } + PcRelativePatchInfo(PcRelativePatchInfo&& other) = default; + + const DexFile& target_dex_file; + // Either the dex cache array element offset or the string index. + uint32_t offset_or_index; + // Label for the instruction loading the most significant half of the offset that's added to PC + // to form the base address (the least significant half is loaded with the instruction that + // follows). + MipsLabel high_label; + // Label for the instruction corresponding to PC+0. + MipsLabel pc_rel_label; + }; + + PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, + uint32_t element_offset); + private: + Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp); + + using MethodToLiteralMap = ArenaSafeMap<MethodReference, Literal*, MethodReferenceComparator>; + + Literal* DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map); + Literal* DeduplicateMethodAddressLiteral(MethodReference target_method); + Literal* DeduplicateMethodCodeLiteral(MethodReference target_method); + PcRelativePatchInfo* NewPcRelativePatch(const DexFile& dex_file, + uint32_t offset_or_index, + ArenaDeque<PcRelativePatchInfo>* patches); + // Labels for each block that will be compiled. MipsLabel* block_labels_; MipsLabel frame_entry_label_; @@ -382,6 +417,12 @@ class CodeGeneratorMIPS : public CodeGenerator { MipsAssembler assembler_; const MipsInstructionSetFeatures& isa_features_; + // Method patch info, map MethodReference to a literal for method address and method code. + MethodToLiteralMap method_patches_; + MethodToLiteralMap call_patches_; + // PC-relative patch info for each HMipsDexCacheArraysBase. + ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_; + DISALLOW_COPY_AND_ASSIGN(CodeGeneratorMIPS); }; diff --git a/compiler/optimizing/dex_cache_array_fixups_mips.cc b/compiler/optimizing/dex_cache_array_fixups_mips.cc new file mode 100644 index 0000000000..0f42d9ce0f --- /dev/null +++ b/compiler/optimizing/dex_cache_array_fixups_mips.cc @@ -0,0 +1,94 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dex_cache_array_fixups_mips.h" + +#include "base/arena_containers.h" +#include "utils/dex_cache_arrays_layout-inl.h" + +namespace art { +namespace mips { + +/** + * Finds instructions that need the dex cache arrays base as an input. + */ +class DexCacheArrayFixupsVisitor : public HGraphVisitor { + public: + explicit DexCacheArrayFixupsVisitor(HGraph* graph) + : HGraphVisitor(graph), + dex_cache_array_bases_(std::less<const DexFile*>(), + // Attribute memory use to code generator. + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {} + + void MoveBasesIfNeeded() { + for (const auto& entry : dex_cache_array_bases_) { + // Bring the base closer to the first use (previously, it was in the + // entry block) and relieve some pressure on the register allocator + // while avoiding recalculation of the base in a loop. + HMipsDexCacheArraysBase* base = entry.second; + base->MoveBeforeFirstUserAndOutOfLoops(); + } + } + + private: + void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE { + // If this is an invoke with PC-relative access to the dex cache methods array, + // we need to add the dex cache arrays base as the special input. + if (invoke->HasPcRelativeDexCache()) { + // Initialize base for target method dex file if needed. + MethodReference target_method = invoke->GetTargetMethod(); + HMipsDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(*target_method.dex_file); + // Update the element offset in base. + DexCacheArraysLayout layout(kMipsPointerSize, target_method.dex_file); + base->UpdateElementOffset(layout.MethodOffset(target_method.dex_method_index)); + // Add the special argument base to the method. + DCHECK(!invoke->HasCurrentMethodInput()); + invoke->AddSpecialInput(base); + } + } + + HMipsDexCacheArraysBase* GetOrCreateDexCacheArrayBase(const DexFile& dex_file) { + return dex_cache_array_bases_.GetOrCreate( + &dex_file, + [this, &dex_file]() { + HMipsDexCacheArraysBase* base = + new (GetGraph()->GetArena()) HMipsDexCacheArraysBase(dex_file); + HBasicBlock* entry_block = GetGraph()->GetEntryBlock(); + // Insert the base at the start of the entry block, move it to a better + // position later in MoveBaseIfNeeded(). + entry_block->InsertInstructionBefore(base, entry_block->GetFirstInstruction()); + return base; + }); + } + + using DexCacheArraysBaseMap = + ArenaSafeMap<const DexFile*, HMipsDexCacheArraysBase*, std::less<const DexFile*>>; + DexCacheArraysBaseMap dex_cache_array_bases_; +}; + +void DexCacheArrayFixups::Run() { + if (graph_->HasIrreducibleLoops()) { + // Do not run this optimization, as irreducible loops do not work with an instruction + // that can be live-in at the irreducible loop header. + return; + } + DexCacheArrayFixupsVisitor visitor(graph_); + visitor.VisitInsertionOrder(); + visitor.MoveBasesIfNeeded(); +} + +} // namespace mips +} // namespace art diff --git a/compiler/optimizing/dex_cache_array_fixups_mips.h b/compiler/optimizing/dex_cache_array_fixups_mips.h new file mode 100644 index 0000000000..c8def2842e --- /dev/null +++ b/compiler/optimizing/dex_cache_array_fixups_mips.h @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_MIPS_H_ +#define ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_MIPS_H_ + +#include "nodes.h" +#include "optimization.h" + +namespace art { +namespace mips { + +class DexCacheArrayFixups : public HOptimization { + public: + DexCacheArrayFixups(HGraph* graph, OptimizingCompilerStats* stats) + : HOptimization(graph, "dex_cache_array_fixups_mips", stats) {} + + void Run() OVERRIDE; +}; + +} // namespace mips +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_MIPS_H_ diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index abc8d5746a..0f0ef26ea9 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -1307,7 +1307,13 @@ class HLoopInformationOutwardIterator : public ValueObject { M(Arm64IntermediateAddress, Instruction) #endif +#ifndef ART_ENABLE_CODEGEN_mips #define FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M) +#else +#define FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M) \ + M(MipsComputeBaseMethodAddress, Instruction) \ + M(MipsDexCacheArraysBase, Instruction) +#endif #define FOR_EACH_CONCRETE_INSTRUCTION_MIPS64(M) @@ -6551,6 +6557,9 @@ class HParallelMove FINAL : public HTemplateInstruction<0> { #ifdef ART_ENABLE_CODEGEN_arm64 #include "nodes_arm64.h" #endif +#ifdef ART_ENABLE_CODEGEN_mips +#include "nodes_mips.h" +#endif #ifdef ART_ENABLE_CODEGEN_x86 #include "nodes_x86.h" #endif diff --git a/compiler/optimizing/nodes_mips.h b/compiler/optimizing/nodes_mips.h new file mode 100644 index 0000000000..de77245e17 --- /dev/null +++ b/compiler/optimizing/nodes_mips.h @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_NODES_MIPS_H_ +#define ART_COMPILER_OPTIMIZING_NODES_MIPS_H_ + +namespace art { + +// Compute the address of the method for MIPS Constant area support. +class HMipsComputeBaseMethodAddress : public HExpression<0> { + public: + // Treat the value as an int32_t, but it is really a 32 bit native pointer. + HMipsComputeBaseMethodAddress() + : HExpression(Primitive::kPrimInt, SideEffects::None(), kNoDexPc) {} + + bool CanBeMoved() const OVERRIDE { return true; } + + DECLARE_INSTRUCTION(MipsComputeBaseMethodAddress); + + private: + DISALLOW_COPY_AND_ASSIGN(HMipsComputeBaseMethodAddress); +}; + +class HMipsDexCacheArraysBase : public HExpression<0> { + public: + explicit HMipsDexCacheArraysBase(const DexFile& dex_file) + : HExpression(Primitive::kPrimInt, SideEffects::None(), kNoDexPc), + dex_file_(&dex_file), + element_offset_(static_cast<size_t>(-1)) { } + + bool CanBeMoved() const OVERRIDE { return true; } + + void UpdateElementOffset(size_t element_offset) { + // We'll maximize the range of a single load instruction for dex cache array accesses + // by aligning offset -32768 with the offset of the first used element. + element_offset_ = std::min(element_offset_, element_offset); + } + + const DexFile& GetDexFile() const { + return *dex_file_; + } + + size_t GetElementOffset() const { + return element_offset_; + } + + DECLARE_INSTRUCTION(MipsDexCacheArraysBase); + + private: + const DexFile* dex_file_; + size_t element_offset_; + + DISALLOW_COPY_AND_ASSIGN(HMipsDexCacheArraysBase); +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_NODES_MIPS_H_ diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index c9a4bfe987..d703b0f94f 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -28,6 +28,11 @@ #include "instruction_simplifier_arm64.h" #endif +#ifdef ART_ENABLE_CODEGEN_mips +#include "dex_cache_array_fixups_mips.h" +#include "pc_relative_fixups_mips.h" +#endif + #ifdef ART_ENABLE_CODEGEN_x86 #include "pc_relative_fixups_x86.h" #endif @@ -462,6 +467,20 @@ static void RunArchOptimizations(InstructionSet instruction_set, break; } #endif +#ifdef ART_ENABLE_CODEGEN_mips + case kMips: { + mips::PcRelativeFixups* pc_relative_fixups = + new (arena) mips::PcRelativeFixups(graph, codegen, stats); + mips::DexCacheArrayFixups* dex_cache_array_fixups = + new (arena) mips::DexCacheArrayFixups(graph, stats); + HOptimization* mips_optimizations[] = { + pc_relative_fixups, + dex_cache_array_fixups + }; + RunOptimizations(mips_optimizations, arraysize(mips_optimizations), pass_observer); + break; + } +#endif #ifdef ART_ENABLE_CODEGEN_x86 case kX86: { x86::PcRelativeFixups* pc_relative_fixups = diff --git a/compiler/optimizing/pc_relative_fixups_mips.cc b/compiler/optimizing/pc_relative_fixups_mips.cc new file mode 100644 index 0000000000..ba405cdb69 --- /dev/null +++ b/compiler/optimizing/pc_relative_fixups_mips.cc @@ -0,0 +1,133 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "pc_relative_fixups_mips.h" +#include "code_generator_mips.h" +#include "intrinsics_mips.h" + +namespace art { +namespace mips { + +/** + * Finds instructions that need the constant area base as an input. + */ +class PCRelativeHandlerVisitor : public HGraphVisitor { + public: + PCRelativeHandlerVisitor(HGraph* graph, CodeGenerator* codegen) + : HGraphVisitor(graph), + codegen_(down_cast<CodeGeneratorMIPS*>(codegen)), + base_(nullptr) {} + + void MoveBaseIfNeeded() { + if (base_ != nullptr) { + // Bring the base closer to the first use (previously, it was in the + // entry block) and relieve some pressure on the register allocator + // while avoiding recalculation of the base in a loop. + base_->MoveBeforeFirstUserAndOutOfLoops(); + } + } + + private: + void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE { + HandleInvoke(invoke); + } + + void InitializePCRelativeBasePointer() { + // Ensure we only initialize the pointer once. + if (base_ != nullptr) { + return; + } + // Insert the base at the start of the entry block, move it to a better + // position later in MoveBaseIfNeeded(). + base_ = new (GetGraph()->GetArena()) HMipsComputeBaseMethodAddress(); + HBasicBlock* entry_block = GetGraph()->GetEntryBlock(); + entry_block->InsertInstructionBefore(base_, entry_block->GetFirstInstruction()); + DCHECK(base_ != nullptr); + } + + void HandleInvoke(HInvoke* invoke) { + // If this is an invoke-static/-direct with PC-relative dex cache array + // addressing, we need the PC-relative address base. + HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect(); + if (invoke_static_or_direct != nullptr) { + HInvokeStaticOrDirect::MethodLoadKind method_load_kind = + invoke_static_or_direct->GetMethodLoadKind(); + HInvokeStaticOrDirect::CodePtrLocation code_ptr_location = + invoke_static_or_direct->GetCodePtrLocation(); + + bool has_extra_input = + (method_load_kind == HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup) || + (code_ptr_location == HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup); + + // We can't add a pointer to the constant area if we already have a current + // method pointer. This may arise when sharpening doesn't remove the current + // method pointer from the invoke. + if (invoke_static_or_direct->HasCurrentMethodInput()) { + DCHECK(!invoke_static_or_direct->HasPcRelativeDexCache()); + CHECK(!has_extra_input); // TODO: review this. + return; + } + + if (has_extra_input && !WillHaveCallFreeIntrinsicsCodeGen(invoke)) { + InitializePCRelativeBasePointer(); + // Add the extra parameter base_. + invoke_static_or_direct->AddSpecialInput(base_); + } + } + } + + bool WillHaveCallFreeIntrinsicsCodeGen(HInvoke* invoke) { + if (invoke->GetIntrinsic() != Intrinsics::kNone) { + // This invoke may have intrinsic code generation defined. However, we must + // now also determine if this code generation is truly there and call-free + // (not unimplemented, no bail on instruction features, or call on slow path). + // This is done by actually calling the locations builder on the instruction + // and clearing out the locations once result is known. We assume this + // call only has creating locations as side effects! + IntrinsicLocationsBuilderMIPS builder(codegen_); + bool success = builder.TryDispatch(invoke) && !invoke->GetLocations()->CanCall(); + invoke->SetLocations(nullptr); + return success; + } + return false; + } + + CodeGeneratorMIPS* codegen_; + + // The generated HMipsComputeBaseMethodAddress in the entry block needed as an + // input to the HMipsLoadFromConstantTable instructions. + HMipsComputeBaseMethodAddress* base_; +}; + +void PcRelativeFixups::Run() { + CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen_); + if (mips_codegen->GetInstructionSetFeatures().IsR6()) { + // Do nothing for R6 because it has PC-relative addressing. + // TODO: review. Move this check into RunArchOptimizations()? + return; + } + if (graph_->HasIrreducibleLoops()) { + // Do not run this optimization, as irreducible loops do not work with an instruction + // that can be live-in at the irreducible loop header. + return; + } + PCRelativeHandlerVisitor visitor(graph_, codegen_); + visitor.VisitInsertionOrder(); + visitor.MoveBaseIfNeeded(); +} + +} // namespace mips +} // namespace art diff --git a/compiler/optimizing/pc_relative_fixups_mips.h b/compiler/optimizing/pc_relative_fixups_mips.h new file mode 100644 index 0000000000..1e8b071bb3 --- /dev/null +++ b/compiler/optimizing/pc_relative_fixups_mips.h @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_PC_RELATIVE_FIXUPS_MIPS_H_ +#define ART_COMPILER_OPTIMIZING_PC_RELATIVE_FIXUPS_MIPS_H_ + +#include "nodes.h" +#include "optimization.h" + +namespace art { + +class CodeGenerator; + +namespace mips { + +class PcRelativeFixups : public HOptimization { + public: + PcRelativeFixups(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats) + : HOptimization(graph, "pc_relative_fixups_mips", stats), + codegen_(codegen) {} + + void Run() OVERRIDE; + + private: + CodeGenerator* codegen_; +}; + +} // namespace mips +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_PC_RELATIVE_FIXUPS_MIPS_H_ diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc index ac930833f2..ebaf1c0cab 100644 --- a/compiler/utils/mips/assembler_mips.cc +++ b/compiler/utils/mips/assembler_mips.cc @@ -39,6 +39,7 @@ void MipsAssembler::FinalizeCode() { for (auto& exception_block : exception_blocks_) { EmitExceptionPoll(&exception_block); } + EmitLiterals(); PromoteBranches(); } @@ -444,6 +445,12 @@ void MipsAssembler::Lhu(Register rt, Register rs, uint16_t imm16) { EmitI(0x25, rs, rt, imm16); } +void MipsAssembler::Lwpc(Register rs, uint32_t imm19) { + CHECK(IsR6()); + CHECK(IsUint<19>(imm19)) << imm19; + EmitI21(0x3B, rs, (0x01 << 19) | imm19); +} + void MipsAssembler::Lui(Register rt, uint16_t imm16) { EmitI(0xf, static_cast<Register>(0), rt, imm16); } @@ -532,6 +539,10 @@ void MipsAssembler::B(uint16_t imm16) { EmitI(0x4, static_cast<Register>(0), static_cast<Register>(0), imm16); } +void MipsAssembler::Bal(uint16_t imm16) { + EmitI(0x1, static_cast<Register>(0), static_cast<Register>(0x11), imm16); +} + void MipsAssembler::Beq(Register rs, Register rt, uint16_t imm16) { EmitI(0x4, rs, rt, imm16); } @@ -624,6 +635,11 @@ void MipsAssembler::Bc(uint32_t imm26) { EmitI26(0x32, imm26); } +void MipsAssembler::Balc(uint32_t imm26) { + CHECK(IsR6()); + EmitI26(0x3A, imm26); +} + void MipsAssembler::Jic(Register rt, uint16_t imm16) { CHECK(IsR6()); EmitI(0x36, static_cast<Register>(0), rt, imm16); @@ -1489,30 +1505,47 @@ void MipsAssembler::Branch::InitShortOrLong(MipsAssembler::Branch::OffsetBits of type_ = (offset_size <= branch_info_[short_type].offset_size) ? short_type : long_type; } -void MipsAssembler::Branch::InitializeType(bool is_call, bool is_r6) { +void MipsAssembler::Branch::InitializeType(bool is_call, bool is_literal, bool is_r6) { + CHECK_EQ(is_call && is_literal, false); OffsetBits offset_size = GetOffsetSizeNeeded(location_, target_); if (is_r6) { // R6 - if (is_call) { + if (is_literal) { + CHECK(!IsResolved()); + type_ = kR6Literal; + } else if (is_call) { InitShortOrLong(offset_size, kR6Call, kR6LongCall); - } else if (condition_ == kUncond) { - InitShortOrLong(offset_size, kR6UncondBranch, kR6LongUncondBranch); } else { - if (condition_ == kCondEQZ || condition_ == kCondNEZ) { - // Special case for beqzc/bnezc with longer offset than in other b<cond>c instructions. - type_ = (offset_size <= kOffset23) ? kR6CondBranch : kR6LongCondBranch; - } else { - InitShortOrLong(offset_size, kR6CondBranch, kR6LongCondBranch); + switch (condition_) { + case kUncond: + InitShortOrLong(offset_size, kR6UncondBranch, kR6LongUncondBranch); + break; + case kCondEQZ: + case kCondNEZ: + // Special case for beqzc/bnezc with longer offset than in other b<cond>c instructions. + type_ = (offset_size <= kOffset23) ? kR6CondBranch : kR6LongCondBranch; + break; + default: + InitShortOrLong(offset_size, kR6CondBranch, kR6LongCondBranch); + break; } } } else { // R2 - if (is_call) { + if (is_literal) { + CHECK(!IsResolved()); + type_ = kLiteral; + } else if (is_call) { InitShortOrLong(offset_size, kCall, kLongCall); - } else if (condition_ == kUncond) { - InitShortOrLong(offset_size, kUncondBranch, kLongUncondBranch); } else { - InitShortOrLong(offset_size, kCondBranch, kLongCondBranch); + switch (condition_) { + case kUncond: + InitShortOrLong(offset_size, kUncondBranch, kLongUncondBranch); + break; + default: + InitShortOrLong(offset_size, kCondBranch, kLongCondBranch); + break; + } } } old_type_ = type_; @@ -1544,14 +1577,14 @@ bool MipsAssembler::Branch::IsUncond(BranchCondition condition, Register lhs, Re } } -MipsAssembler::Branch::Branch(bool is_r6, uint32_t location, uint32_t target) +MipsAssembler::Branch::Branch(bool is_r6, uint32_t location, uint32_t target, bool is_call) : old_location_(location), location_(location), target_(target), lhs_reg_(0), rhs_reg_(0), condition_(kUncond) { - InitializeType(false, is_r6); + InitializeType(is_call, /* is_literal */ false, is_r6); } MipsAssembler::Branch::Branch(bool is_r6, @@ -1608,19 +1641,23 @@ MipsAssembler::Branch::Branch(bool is_r6, // Branch condition is always true, make the branch unconditional. condition_ = kUncond; } - InitializeType(false, is_r6); + InitializeType(/* is_call */ false, /* is_literal */ false, is_r6); } -MipsAssembler::Branch::Branch(bool is_r6, uint32_t location, uint32_t target, Register indirect_reg) +MipsAssembler::Branch::Branch(bool is_r6, uint32_t location, Register dest_reg, Register base_reg) : old_location_(location), location_(location), - target_(target), - lhs_reg_(indirect_reg), - rhs_reg_(0), + target_(kUnresolved), + lhs_reg_(dest_reg), + rhs_reg_(base_reg), condition_(kUncond) { - CHECK_NE(indirect_reg, ZERO); - CHECK_NE(indirect_reg, AT); - InitializeType(true, is_r6); + CHECK_NE(dest_reg, ZERO); + if (is_r6) { + CHECK_EQ(base_reg, ZERO); + } else { + CHECK_NE(base_reg, ZERO); + } + InitializeType(/* is_call */ false, /* is_literal */ true, is_r6); } MipsAssembler::BranchCondition MipsAssembler::Branch::OppositeCondition( @@ -1722,19 +1759,27 @@ bool MipsAssembler::Branch::IsLong() const { case kUncondBranch: case kCondBranch: case kCall: + // R2 near literal. + case kLiteral: // R6 short branches. case kR6UncondBranch: case kR6CondBranch: case kR6Call: + // R6 near literal. + case kR6Literal: return false; // R2 long branches. case kLongUncondBranch: case kLongCondBranch: case kLongCall: + // R2 far literal. + case kFarLiteral: // R6 long branches. case kR6LongUncondBranch: case kR6LongCondBranch: case kR6LongCall: + // R6 far literal. + case kR6FarLiteral: return true; } UNREACHABLE(); @@ -1803,6 +1848,10 @@ void MipsAssembler::Branch::PromoteToLong() { case kCall: type_ = kLongCall; break; + // R2 near literal. + case kLiteral: + type_ = kFarLiteral; + break; // R6 short branches. case kR6UncondBranch: type_ = kR6LongUncondBranch; @@ -1813,6 +1862,10 @@ void MipsAssembler::Branch::PromoteToLong() { case kR6Call: type_ = kR6LongCall; break; + // R6 near literal. + case kR6Literal: + type_ = kR6FarLiteral; + break; default: // Note: 'type_' is already long. break; @@ -1820,14 +1873,26 @@ void MipsAssembler::Branch::PromoteToLong() { CHECK(IsLong()); } -uint32_t MipsAssembler::Branch::PromoteIfNeeded(uint32_t max_short_distance) { +uint32_t MipsAssembler::GetBranchLocationOrPcRelBase(const MipsAssembler::Branch* branch) const { + switch (branch->GetType()) { + case Branch::kLiteral: + case Branch::kFarLiteral: + return GetLabelLocation(&pc_rel_base_label_); + default: + return branch->GetLocation(); + } +} + +uint32_t MipsAssembler::Branch::PromoteIfNeeded(uint32_t location, uint32_t max_short_distance) { + // `location` is either `GetLabelLocation(&pc_rel_base_label_)` for R2 literals or + // `this->GetLocation()` for everything else. // If the branch is still unresolved or already long, nothing to do. if (IsLong() || !IsResolved()) { return 0; } // Promote the short branch to long if the offset size is too small - // to hold the distance between location_ and target_. - if (GetOffsetSizeNeeded(location_, target_) > GetOffsetSize()) { + // to hold the distance between location and target_. + if (GetOffsetSizeNeeded(location, target_) > GetOffsetSize()) { PromoteToLong(); uint32_t old_size = GetOldSize(); uint32_t new_size = GetSize(); @@ -1837,7 +1902,7 @@ uint32_t MipsAssembler::Branch::PromoteIfNeeded(uint32_t max_short_distance) { // The following logic is for debugging/testing purposes. // Promote some short branches to long when it's not really required. if (UNLIKELY(max_short_distance != std::numeric_limits<uint32_t>::max())) { - int64_t distance = static_cast<int64_t>(target_) - location_; + int64_t distance = static_cast<int64_t>(target_) - location; distance = (distance >= 0) ? distance : -distance; if (distance >= max_short_distance) { PromoteToLong(); @@ -1854,12 +1919,26 @@ uint32_t MipsAssembler::Branch::GetOffsetLocation() const { return location_ + branch_info_[type_].instr_offset * sizeof(uint32_t); } -uint32_t MipsAssembler::Branch::GetOffset() const { +uint32_t MipsAssembler::GetBranchOrPcRelBaseForEncoding(const MipsAssembler::Branch* branch) const { + switch (branch->GetType()) { + case Branch::kLiteral: + case Branch::kFarLiteral: + return GetLabelLocation(&pc_rel_base_label_); + default: + return branch->GetOffsetLocation() + + Branch::branch_info_[branch->GetType()].pc_org * sizeof(uint32_t); + } +} + +uint32_t MipsAssembler::Branch::GetOffset(uint32_t location) const { + // `location` is either `GetLabelLocation(&pc_rel_base_label_)` for R2 literals or + // `this->GetOffsetLocation() + branch_info_[this->GetType()].pc_org * sizeof(uint32_t)` + // for everything else. CHECK(IsResolved()); uint32_t ofs_mask = 0xFFFFFFFF >> (32 - GetOffsetSize()); // Calculate the byte distance between instructions and also account for // different PC-relative origins. - uint32_t offset = target_ - GetOffsetLocation() - branch_info_[type_].pc_org * sizeof(uint32_t); + uint32_t offset = target_ - location; // Prepare the offset for encoding into the instruction(s). offset = (offset & ofs_mask) >> branch_info_[type_].offset_shift; return offset; @@ -1906,7 +1985,7 @@ void MipsAssembler::Bind(MipsLabel* label) { label->BindTo(bound_pc); } -uint32_t MipsAssembler::GetLabelLocation(MipsLabel* label) const { +uint32_t MipsAssembler::GetLabelLocation(const MipsLabel* label) const { CHECK(label->IsBound()); uint32_t target = label->Position(); if (label->prev_branch_id_plus_one_) { @@ -1941,6 +2020,10 @@ uint32_t MipsAssembler::GetAdjustedPosition(uint32_t old_position) { return old_position + last_position_adjustment_; } +void MipsAssembler::BindPcRelBaseLabel() { + Bind(&pc_rel_base_label_); +} + void MipsAssembler::FinalizeLabeledBranch(MipsLabel* label) { uint32_t length = branches_.back().GetLength(); if (!label->IsBound()) { @@ -1962,7 +2045,7 @@ void MipsAssembler::FinalizeLabeledBranch(MipsLabel* label) { void MipsAssembler::Buncond(MipsLabel* label) { uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved; - branches_.emplace_back(IsR6(), buffer_.Size(), target); + branches_.emplace_back(IsR6(), buffer_.Size(), target, /* is_call */ false); FinalizeLabeledBranch(label); } @@ -1976,12 +2059,46 @@ void MipsAssembler::Bcond(MipsLabel* label, BranchCondition condition, Register FinalizeLabeledBranch(label); } -void MipsAssembler::Call(MipsLabel* label, Register indirect_reg) { +void MipsAssembler::Call(MipsLabel* label) { uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved; - branches_.emplace_back(IsR6(), buffer_.Size(), target, indirect_reg); + branches_.emplace_back(IsR6(), buffer_.Size(), target, /* is_call */ true); FinalizeLabeledBranch(label); } +Literal* MipsAssembler::NewLiteral(size_t size, const uint8_t* data) { + DCHECK(size == 4u || size == 8u) << size; + literals_.emplace_back(size, data); + return &literals_.back(); +} + +void MipsAssembler::LoadLiteral(Register dest_reg, Register base_reg, Literal* literal) { + // Literal loads are treated as pseudo branches since they require very similar handling. + DCHECK_EQ(literal->GetSize(), 4u); + MipsLabel* label = literal->GetLabel(); + DCHECK(!label->IsBound()); + branches_.emplace_back(IsR6(), + buffer_.Size(), + dest_reg, + base_reg); + FinalizeLabeledBranch(label); +} + +void MipsAssembler::EmitLiterals() { + if (!literals_.empty()) { + // We don't support byte and half-word literals. + // TODO: proper alignment for 64-bit literals when they're implemented. + for (Literal& literal : literals_) { + MipsLabel* label = literal.GetLabel(); + Bind(label); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + DCHECK(literal.GetSize() == 4u || literal.GetSize() == 8u); + for (size_t i = 0, size = literal.GetSize(); i != size; ++i) { + buffer_.Emit<uint8_t>(literal.GetData()[i]); + } + } + } +} + void MipsAssembler::PromoteBranches() { // Promote short branches to long as necessary. bool changed; @@ -1989,7 +2106,8 @@ void MipsAssembler::PromoteBranches() { changed = false; for (auto& branch : branches_) { CHECK(branch.IsResolved()); - uint32_t delta = branch.PromoteIfNeeded(); + uint32_t base = GetBranchLocationOrPcRelBase(&branch); + uint32_t delta = branch.PromoteIfNeeded(base); // If this branch has been promoted and needs to expand in size, // relocate all branches by the expansion size. if (delta) { @@ -2027,27 +2145,35 @@ const MipsAssembler::Branch::BranchInfo MipsAssembler::Branch::branch_info_[] = // R2 short branches. { 2, 0, 1, MipsAssembler::Branch::kOffset18, 2 }, // kUncondBranch { 2, 0, 1, MipsAssembler::Branch::kOffset18, 2 }, // kCondBranch - { 5, 2, 0, MipsAssembler::Branch::kOffset16, 0 }, // kCall + { 2, 0, 1, MipsAssembler::Branch::kOffset18, 2 }, // kCall + // R2 near literal. + { 1, 0, 0, MipsAssembler::Branch::kOffset16, 0 }, // kLiteral // R2 long branches. { 9, 3, 1, MipsAssembler::Branch::kOffset32, 0 }, // kLongUncondBranch { 10, 4, 1, MipsAssembler::Branch::kOffset32, 0 }, // kLongCondBranch { 6, 1, 1, MipsAssembler::Branch::kOffset32, 0 }, // kLongCall + // R2 far literal. + { 3, 0, 0, MipsAssembler::Branch::kOffset32, 0 }, // kFarLiteral // R6 short branches. { 1, 0, 1, MipsAssembler::Branch::kOffset28, 2 }, // kR6UncondBranch { 2, 0, 1, MipsAssembler::Branch::kOffset18, 2 }, // kR6CondBranch // Exception: kOffset23 for beqzc/bnezc. - { 2, 0, 0, MipsAssembler::Branch::kOffset21, 2 }, // kR6Call + { 1, 0, 1, MipsAssembler::Branch::kOffset28, 2 }, // kR6Call + // R6 near literal. + { 1, 0, 0, MipsAssembler::Branch::kOffset21, 2 }, // kR6Literal // R6 long branches. { 2, 0, 0, MipsAssembler::Branch::kOffset32, 0 }, // kR6LongUncondBranch { 3, 1, 0, MipsAssembler::Branch::kOffset32, 0 }, // kR6LongCondBranch - { 3, 0, 0, MipsAssembler::Branch::kOffset32, 0 }, // kR6LongCall + { 2, 0, 0, MipsAssembler::Branch::kOffset32, 0 }, // kR6LongCall + // R6 far literal. + { 2, 0, 0, MipsAssembler::Branch::kOffset32, 0 }, // kR6FarLiteral }; -// Note: make sure branch_info_[] and mitBranch() are kept synchronized. +// Note: make sure branch_info_[] and EmitBranch() are kept synchronized. void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) { CHECK_EQ(overwriting_, true); overwrite_location_ = branch->GetLocation(); - uint32_t offset = branch->GetOffset(); + uint32_t offset = branch->GetOffset(GetBranchOrPcRelBaseForEncoding(branch)); BranchCondition condition = branch->GetCondition(); Register lhs = branch->GetLeftRegister(); Register rhs = branch->GetRightRegister(); @@ -2064,12 +2190,15 @@ void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) { Nop(); // TODO: improve by filling the delay slot. break; case Branch::kCall: - Nal(); - Nop(); // TODO: is this NOP really needed here? CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); - Addiu(lhs, RA, offset); - Jalr(lhs); - Nop(); + Bal(offset); + Nop(); // TODO: improve by filling the delay slot. + break; + + // R2 near literal. + case Branch::kLiteral: + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Lw(lhs, rhs, offset); break; // R2 long branches. @@ -2123,11 +2252,20 @@ void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) { CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); Lui(AT, High16Bits(offset)); Ori(AT, AT, Low16Bits(offset)); - Addu(lhs, AT, RA); - Jalr(lhs); + Addu(AT, AT, RA); + Jalr(AT); Nop(); break; + // R2 far literal. + case Branch::kFarLiteral: + offset += (offset & 0x8000) << 1; // Account for sign extension in lw. + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Lui(AT, High16Bits(offset)); + Addu(AT, AT, rhs); + Lw(lhs, AT, Low16Bits(offset)); + break; + // R6 short branches. case Branch::kR6UncondBranch: CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); @@ -2140,8 +2278,13 @@ void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) { break; case Branch::kR6Call: CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); - Addiupc(lhs, offset); - Jialc(lhs, 0); + Balc(offset); + break; + + // R6 near literal. + case Branch::kR6Literal: + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Lwpc(lhs, offset); break; // R6 long branches. @@ -2159,11 +2302,18 @@ void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) { Jic(AT, Low16Bits(offset)); break; case Branch::kR6LongCall: - offset += (offset & 0x8000) << 1; // Account for sign extension in addiu. + offset += (offset & 0x8000) << 1; // Account for sign extension in jialc. CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); - Auipc(lhs, High16Bits(offset)); - Addiu(lhs, lhs, Low16Bits(offset)); - Jialc(lhs, 0); + Auipc(AT, High16Bits(offset)); + Jialc(AT, Low16Bits(offset)); + break; + + // R6 far literal. + case Branch::kR6FarLiteral: + offset += (offset & 0x8000) << 1; // Account for sign extension in lw. + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Auipc(AT, High16Bits(offset)); + Lw(lhs, AT, Low16Bits(offset)); break; } CHECK_EQ(overwrite_location_, branch->GetEndLocation()); @@ -2174,8 +2324,8 @@ void MipsAssembler::B(MipsLabel* label) { Buncond(label); } -void MipsAssembler::Jalr(MipsLabel* label, Register indirect_reg) { - Call(label, indirect_reg); +void MipsAssembler::Bal(MipsLabel* label) { + Call(label); } void MipsAssembler::Beq(Register rs, Register rt, MipsLabel* label) { diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h index 31b3b311eb..1f7781fef9 100644 --- a/compiler/utils/mips/assembler_mips.h +++ b/compiler/utils/mips/assembler_mips.h @@ -17,10 +17,12 @@ #ifndef ART_COMPILER_UTILS_MIPS_ASSEMBLER_MIPS_H_ #define ART_COMPILER_UTILS_MIPS_ASSEMBLER_MIPS_H_ +#include <deque> #include <utility> #include <vector> #include "arch/mips/instruction_set_features_mips.h" +#include "base/arena_containers.h" #include "base/macros.h" #include "constants_mips.h" #include "globals.h" @@ -79,6 +81,49 @@ class MipsLabel : public Label { DISALLOW_COPY_AND_ASSIGN(MipsLabel); }; +// Assembler literal is a value embedded in code, retrieved using a PC-relative load. +class Literal { + public: + static constexpr size_t kMaxSize = 8; + + Literal(uint32_t size, const uint8_t* data) + : label_(), size_(size) { + DCHECK_LE(size, Literal::kMaxSize); + memcpy(data_, data, size); + } + + template <typename T> + T GetValue() const { + DCHECK_EQ(size_, sizeof(T)); + T value; + memcpy(&value, data_, sizeof(T)); + return value; + } + + uint32_t GetSize() const { + return size_; + } + + const uint8_t* GetData() const { + return data_; + } + + MipsLabel* GetLabel() { + return &label_; + } + + const MipsLabel* GetLabel() const { + return &label_; + } + + private: + MipsLabel label_; + const uint32_t size_; + uint8_t data_[kMaxSize]; + + DISALLOW_COPY_AND_ASSIGN(Literal); +}; + // Slowpath entered when Thread::Current()->_exception is non-null. class MipsExceptionSlowPath { public: @@ -107,6 +152,7 @@ class MipsAssembler FINAL : public Assembler { : Assembler(arena), overwriting_(false), overwrite_location_(0), + literals_(arena->Adapter(kArenaAllocAssembler)), last_position_adjustment_(0), last_old_position_(0), last_branch_id_(0), @@ -182,6 +228,7 @@ class MipsAssembler FINAL : public Assembler { void Lwr(Register rt, Register rs, uint16_t imm16); void Lbu(Register rt, Register rs, uint16_t imm16); void Lhu(Register rt, Register rs, uint16_t imm16); + void Lwpc(Register rs, uint32_t imm19); // R6 void Lui(Register rt, uint16_t imm16); void Aui(Register rt, Register rs, uint16_t imm16); // R6 void Sync(uint32_t stype); @@ -205,6 +252,7 @@ class MipsAssembler FINAL : public Assembler { void Sltiu(Register rt, Register rs, uint16_t imm16); void B(uint16_t imm16); + void Bal(uint16_t imm16); void Beq(Register rs, Register rt, uint16_t imm16); void Bne(Register rs, Register rt, uint16_t imm16); void Beqz(Register rt, uint16_t imm16); @@ -226,6 +274,7 @@ class MipsAssembler FINAL : public Assembler { void Auipc(Register rs, uint16_t imm16); // R6 void Addiupc(Register rs, uint32_t imm19); // R6 void Bc(uint32_t imm26); // R6 + void Balc(uint32_t imm26); // R6 void Jic(Register rt, uint16_t imm16); // R6 void Jialc(Register rt, uint16_t imm16); // R6 void Bltc(Register rs, Register rt, uint16_t imm16); // R6 @@ -365,7 +414,7 @@ class MipsAssembler FINAL : public Assembler { // These will generate R2 branches or R6 branches as appropriate. void Bind(MipsLabel* label); void B(MipsLabel* label); - void Jalr(MipsLabel* label, Register indirect_reg); + void Bal(MipsLabel* label); void Beq(Register rs, Register rt, MipsLabel* label); void Bne(Register rs, Register rt, MipsLabel* label); void Beqz(Register rt, MipsLabel* label); @@ -412,6 +461,21 @@ class MipsAssembler FINAL : public Assembler { UNIMPLEMENTED(FATAL) << "Do not use Jump for MIPS"; } + // Create a new literal with a given value. + // NOTE: Force the template parameter to be explicitly specified. + template <typename T> + Literal* NewLiteral(typename Identity<T>::type value) { + static_assert(std::is_integral<T>::value, "T must be an integral type."); + return NewLiteral(sizeof(value), reinterpret_cast<const uint8_t*>(&value)); + } + + // Create a new literal with the given data. + Literal* NewLiteral(size_t size, const uint8_t* data); + + // Load literal using the base register (for R2 only) or using PC-relative loads + // (for R6 only; base_reg must be ZERO). + void LoadLiteral(Register dest_reg, Register base_reg, Literal* literal); + // // Overridden common assembler high-level functionality. // @@ -569,12 +633,22 @@ class MipsAssembler FINAL : public Assembler { // Returns the (always-)current location of a label (can be used in class CodeGeneratorMIPS, // must be used instead of MipsLabel::GetPosition()). - uint32_t GetLabelLocation(MipsLabel* label) const; + uint32_t GetLabelLocation(const MipsLabel* label) const; // Get the final position of a label after local fixup based on the old position // recorded before FinalizeCode(). uint32_t GetAdjustedPosition(uint32_t old_position); + // R2 doesn't have PC-relative addressing, which we need to access literals. We simulate it by + // reading the PC value into a general-purpose register with the NAL instruction and then loading + // literals through this base register. The code generator calls this method (at most once per + // method being compiled) to bind a label to the location for which the PC value is acquired. + // The assembler then computes literal offsets relative to this label. + void BindPcRelBaseLabel(); + + // Note that PC-relative literal loads are handled as pseudo branches because they need very + // similar relocation and may similarly expand in size to accomodate for larger offsets relative + // to PC. enum BranchCondition { kCondLT, kCondGE, @@ -604,18 +678,26 @@ class MipsAssembler FINAL : public Assembler { kUncondBranch, kCondBranch, kCall, + // R2 near literal. + kLiteral, // R2 long branches. kLongUncondBranch, kLongCondBranch, kLongCall, + // R2 far literal. + kFarLiteral, // R6 short branches. kR6UncondBranch, kR6CondBranch, kR6Call, + // R6 near literal. + kR6Literal, // R6 long branches. kR6LongUncondBranch, kR6LongCondBranch, kR6LongCall, + // R6 far literal. + kR6FarLiteral, }; // Bit sizes of offsets defined as enums to minimize chance of typos. enum OffsetBits { @@ -650,17 +732,17 @@ class MipsAssembler FINAL : public Assembler { }; static const BranchInfo branch_info_[/* Type */]; - // Unconditional branch. - Branch(bool is_r6, uint32_t location, uint32_t target); + // Unconditional branch or call. + Branch(bool is_r6, uint32_t location, uint32_t target, bool is_call); // Conditional branch. Branch(bool is_r6, uint32_t location, uint32_t target, BranchCondition condition, Register lhs_reg, - Register rhs_reg = ZERO); - // Call (branch and link) that stores the target address in a given register (i.e. T9). - Branch(bool is_r6, uint32_t location, uint32_t target, Register indirect_reg); + Register rhs_reg); + // Literal. + Branch(bool is_r6, uint32_t location, Register dest_reg, Register base_reg); // Some conditional branches with lhs = rhs are effectively NOPs, while some // others are effectively unconditional. MIPSR6 conditional branches require lhs != rhs. @@ -736,17 +818,18 @@ class MipsAssembler FINAL : public Assembler { // that is allowed for short branches. This is for debugging/testing purposes. // max_short_distance = 0 forces all short branches to become long. // Use the implicit default argument when not debugging/testing. - uint32_t PromoteIfNeeded(uint32_t max_short_distance = std::numeric_limits<uint32_t>::max()); + uint32_t PromoteIfNeeded(uint32_t location, + uint32_t max_short_distance = std::numeric_limits<uint32_t>::max()); // Returns the location of the instruction(s) containing the offset. uint32_t GetOffsetLocation() const; // Calculates and returns the offset ready for encoding in the branch instruction(s). - uint32_t GetOffset() const; + uint32_t GetOffset(uint32_t location) const; private: // Completes branch construction by determining and recording its type. - void InitializeType(bool is_call, bool is_r6); + void InitializeType(bool is_call, bool is_literal, bool is_r6); // Helper for the above. void InitShortOrLong(OffsetBits ofs_size, Type short_type, Type long_type); @@ -776,12 +859,15 @@ class MipsAssembler FINAL : public Assembler { void Buncond(MipsLabel* label); void Bcond(MipsLabel* label, BranchCondition condition, Register lhs, Register rhs = ZERO); - void Call(MipsLabel* label, Register indirect_reg); + void Call(MipsLabel* label); void FinalizeLabeledBranch(MipsLabel* label); Branch* GetBranch(uint32_t branch_id); const Branch* GetBranch(uint32_t branch_id) const; + uint32_t GetBranchLocationOrPcRelBase(const MipsAssembler::Branch* branch) const; + uint32_t GetBranchOrPcRelBaseForEncoding(const MipsAssembler::Branch* branch) const; + void EmitLiterals(); void PromoteBranches(); void EmitBranch(Branch* branch); void EmitBranches(); @@ -816,6 +902,15 @@ class MipsAssembler FINAL : public Assembler { // The current overwrite location. uint32_t overwrite_location_; + // Use std::deque<> for literal labels to allow insertions at the end + // without invalidating pointers and references to existing elements. + ArenaDeque<Literal> literals_; + + // There's no PC-relative addressing on MIPS32R2. So, in order to access literals relative to PC + // we get PC using the NAL instruction. This label marks the position within the assembler buffer + // that PC (from NAL) points to. + MipsLabel pc_rel_base_label_; + // Data for AdjustedPosition(), see the description there. uint32_t last_position_adjustment_; uint32_t last_old_position_; diff --git a/compiler/utils/mips/assembler_mips32r6_test.cc b/compiler/utils/mips/assembler_mips32r6_test.cc index ce92d602d0..49ef272fb0 100644 --- a/compiler/utils/mips/assembler_mips32r6_test.cc +++ b/compiler/utils/mips/assembler_mips32r6_test.cc @@ -48,8 +48,30 @@ class AssemblerMIPS32r6Test : public AssemblerTest<mips::MipsAssembler, return "mips"; } + std::string GetAssemblerCmdName() OVERRIDE { + // We assemble and link for MIPS32R6. See GetAssemblerParameters() for details. + return "gcc"; + } + std::string GetAssemblerParameters() OVERRIDE { - return " --no-warn -32 -march=mips32r6"; + // We assemble and link for MIPS32R6. The reason is that object files produced for MIPS32R6 + // (and MIPS64R6) with the GNU assembler don't have correct final offsets in PC-relative + // branches in the .text section and so they require a relocation pass (there's a relocation + // section, .rela.text, that has the needed info to fix up the branches). + // We use "-modd-spreg" so we can use odd-numbered single precision FPU registers. + // We put the code at address 0x1000000 (instead of 0) to avoid overlapping with the + // .MIPS.abiflags section (there doesn't seem to be a way to suppress its generation easily). + return " -march=mips32r6 -modd-spreg -Wa,--no-warn" + " -Wl,-Ttext=0x1000000 -Wl,-e0x1000000 -nostdlib"; + } + + void Pad(std::vector<uint8_t>& data) OVERRIDE { + // The GNU linker unconditionally pads the code segment with NOPs to a size that is a multiple + // of 16 and there doesn't appear to be a way to suppress this padding. Our assembler doesn't + // pad, so, in order for two assembler outputs to match, we need to match the padding as well. + // NOP is encoded as four zero bytes on MIPS. + size_t pad_size = RoundUp(data.size(), 16u) - data.size(); + data.insert(data.end(), pad_size, 0); } std::string GetDisassembleParameters() OVERRIDE { @@ -272,6 +294,21 @@ TEST_F(AssemblerMIPS32r6Test, Aui) { DriverStr(RepeatRRIb(&mips::MipsAssembler::Aui, 16, "aui ${reg1}, ${reg2}, {imm}"), "Aui"); } +TEST_F(AssemblerMIPS32r6Test, Auipc) { + DriverStr(RepeatRIb(&mips::MipsAssembler::Auipc, 16, "auipc ${reg}, {imm}"), "Auipc"); +} + +TEST_F(AssemblerMIPS32r6Test, Lwpc) { + // Lwpc() takes an unsigned 19-bit immediate, while the GNU assembler needs a signed offset, + // hence the sign extension from bit 18 with `imm - ((imm & 0x40000) << 1)`. + // The GNU assembler also wants the offset to be a multiple of 4, which it will shift right + // by 2 positions when encoding, hence `<< 2` to compensate for that shift. + // We capture the value of the immediate with `.set imm, {imm}` because the value is needed + // twice for the sign extension, but `{imm}` is substituted only once. + const char* code = ".set imm, {imm}\nlw ${reg}, ((imm - ((imm & 0x40000) << 1)) << 2)($pc)"; + DriverStr(RepeatRIb(&mips::MipsAssembler::Lwpc, 19, code), "Lwpc"); +} + TEST_F(AssemblerMIPS32r6Test, Bitswap) { DriverStr(RepeatRR(&mips::MipsAssembler::Bitswap, "bitswap ${reg1}, ${reg2}"), "bitswap"); } @@ -598,12 +635,45 @@ TEST_F(AssemblerMIPS32r6Test, StoreDToOffset) { DriverStr(expected, "StoreDToOffset"); } +TEST_F(AssemblerMIPS32r6Test, LoadFarthestNearLiteral) { + mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678); + __ LoadLiteral(mips::V0, mips::ZERO, literal); + constexpr size_t kAdduCount = 0x3FFDE; + for (size_t i = 0; i != kAdduCount; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + + std::string expected = + "lwpc $v0, 1f\n" + + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "1:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadFarthestNearLiteral"); +} + +TEST_F(AssemblerMIPS32r6Test, LoadNearestFarLiteral) { + mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678); + __ LoadLiteral(mips::V0, mips::ZERO, literal); + constexpr size_t kAdduCount = 0x3FFDF; + for (size_t i = 0; i != kAdduCount; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + + std::string expected = + "1:\n" + "auipc $at, %hi(2f - 1b)\n" + "lw $v0, %lo(2f - 1b)($at)\n" + + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadNearestFarLiteral"); +} + ////////////// // BRANCHES // ////////////// -// TODO: MipsAssembler::Auipc -// MipsAssembler::Addiupc +// TODO: MipsAssembler::Addiupc // MipsAssembler::Bc // MipsAssembler::Jic // MipsAssembler::Jialc diff --git a/compiler/utils/mips/assembler_mips_test.cc b/compiler/utils/mips/assembler_mips_test.cc index a1d6ad6a2f..50a8dc202a 100644 --- a/compiler/utils/mips/assembler_mips_test.cc +++ b/compiler/utils/mips/assembler_mips_test.cc @@ -2293,6 +2293,44 @@ TEST_F(AssemblerMIPSTest, LoadConst32) { DriverStr(expected, "LoadConst32"); } +TEST_F(AssemblerMIPSTest, LoadFarthestNearLiteral) { + mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678); + __ BindPcRelBaseLabel(); + __ LoadLiteral(mips::V0, mips::V1, literal); + constexpr size_t kAddiuCount = 0x1FDE; + for (size_t i = 0; i != kAddiuCount; ++i) { + __ Addiu(mips::A0, mips::A1, 0); + } + + std::string expected = + "1:\n" + "lw $v0, %lo(2f - 1b)($v1)\n" + + RepeatInsn(kAddiuCount, "addiu $a0, $a1, %hi(2f - 1b)\n") + + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadFarthestNearLiteral"); +} + +TEST_F(AssemblerMIPSTest, LoadNearestFarLiteral) { + mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678); + __ BindPcRelBaseLabel(); + __ LoadLiteral(mips::V0, mips::V1, literal); + constexpr size_t kAdduCount = 0x1FDF; + for (size_t i = 0; i != kAdduCount; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + + std::string expected = + "1:\n" + "lui $at, %hi(2f - 1b)\n" + "addu $at, $at, $v1\n" + "lw $v0, %lo(2f - 1b)($at)\n" + + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadNearestFarLiteral"); +} + #undef __ } // namespace art diff --git a/disassembler/disassembler_mips.cc b/disassembler/disassembler_mips.cc index 1f513113ec..769263ec5b 100644 --- a/disassembler/disassembler_mips.cc +++ b/disassembler/disassembler_mips.cc @@ -330,8 +330,10 @@ static const MipsInstruction gMipsInstructions[] = { { kITypeMask, 55u << kOpcodeShift, "ld", "TO", }, { kITypeMask, 56u << kOpcodeShift, "sc", "TO", }, { kITypeMask, 57u << kOpcodeShift, "swc1", "tO", }, + { kJTypeMask, 58u << kOpcodeShift, "balc", "P" }, { kITypeMask | (0x1f << 16), (59u << kOpcodeShift) | (30 << 16), "auipc", "Si" }, { kITypeMask | (0x3 << 19), (59u << kOpcodeShift) | (0 << 19), "addiupc", "Sp" }, + { kITypeMask | (0x3 << 19), (59u << kOpcodeShift) | (1 << 19), "lwpc", "So" }, { kITypeMask, 61u << kOpcodeShift, "sdc1", "tO", }, { kITypeMask | (0x1f << 21), 62u << kOpcodeShift, "jialc", "Ti" }, { kITypeMask | (1 << 21), (62u << kOpcodeShift) | (1 << 21), "bnezc", "Sb" }, // TODO: de-dup? @@ -509,7 +511,15 @@ size_t DisassemblerMips::Dump(std::ostream& os, const uint8_t* instr_ptr) { } } break; - case 'P': // 26-bit offset in bc. + case 'o': // 19-bit offset in lwpc. + { + int32_t offset = (instruction & 0x7ffff) - ((instruction & 0x40000) << 1); + offset <<= 2; + args << FormatInstructionPointer(instr_ptr + offset); + args << StringPrintf(" ; %+d", offset); + } + break; + case 'P': // 26-bit offset in bc and balc. { int32_t offset = (instruction & 0x3ffffff) - ((instruction & 0x2000000) << 1); offset <<= 2; @@ -540,6 +550,7 @@ size_t DisassemblerMips::Dump(std::ostream& os, const uint8_t* instr_ptr) { } } + // TODO: Simplify this once these sequences are simplified in the compiler. // Special cases for sequences of: // pc-relative +/- 2GB branch: // auipc reg, imm |