summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'compiler')
-rw-r--r--compiler/Android.bp1
-rw-r--r--compiler/compiled_class.h40
-rw-r--r--compiler/dex/verification_results.cc7
-rw-r--r--compiler/dex/verified_method.cc13
-rw-r--r--compiler/dex/verified_method.h6
-rw-r--r--compiler/driver/compiler_driver.cc27
-rw-r--r--compiler/driver/compiler_driver.h7
-rw-r--r--compiler/driver/compiler_driver_test.cc10
-rw-r--r--compiler/linker/arm/relative_patcher_arm_base.cc12
-rw-r--r--compiler/linker/arm/relative_patcher_arm_base.h37
-rw-r--r--compiler/linker/arm/relative_patcher_thumb2.cc4
-rw-r--r--compiler/linker/arm/relative_patcher_thumb2.h4
-rw-r--r--compiler/linker/arm64/relative_patcher_arm64.cc4
-rw-r--r--compiler/linker/arm64/relative_patcher_arm64.h2
-rw-r--r--compiler/oat_writer.cc21
-rw-r--r--compiler/optimizing/block_builder.cc20
-rw-r--r--compiler/optimizing/bounds_check_elimination.cc4
-rw-r--r--compiler/optimizing/code_generator.cc2
-rw-r--r--compiler/optimizing/code_generator.h2
-rw-r--r--compiler/optimizing/code_generator_arm.cc23
-rw-r--r--compiler/optimizing/code_generator_arm64.cc46
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.cc24
-rw-r--r--compiler/optimizing/code_generator_mips.cc139
-rw-r--r--compiler/optimizing/code_generator_mips64.cc101
-rw-r--r--compiler/optimizing/code_generator_mips64.h6
-rw-r--r--compiler/optimizing/code_generator_vector_arm64.cc6
-rw-r--r--compiler/optimizing/code_generator_x86.cc12
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc12
-rw-r--r--compiler/optimizing/codegen_test.cc6
-rw-r--r--compiler/optimizing/codegen_test_utils.h2
-rw-r--r--compiler/optimizing/gvn.cc4
-rw-r--r--compiler/optimizing/instruction_simplifier_arm64.cc13
-rw-r--r--compiler/optimizing/instruction_simplifier_arm64.h2
-rw-r--r--compiler/optimizing/instruction_simplifier_shared.cc57
-rw-r--r--compiler/optimizing/instruction_simplifier_shared.h1
-rw-r--r--compiler/optimizing/intrinsics_mips.cc159
-rw-r--r--compiler/optimizing/intrinsics_mips64.cc91
-rw-r--r--compiler/optimizing/intrinsics_x86_64.cc4
-rw-r--r--compiler/optimizing/nodes.h3
-rw-r--r--compiler/optimizing/nodes_shared.h43
-rw-r--r--compiler/optimizing/nodes_vector.h7
-rw-r--r--compiler/optimizing/optimizing_compiler.cc5
-rw-r--r--compiler/optimizing/register_allocator_graph_color.cc3
-rw-r--r--compiler/optimizing/scheduler.cc40
-rw-r--r--compiler/optimizing/scheduler.h5
-rw-r--r--compiler/optimizing/scheduler_arm.cc822
-rw-r--r--compiler/optimizing/scheduler_arm.h158
-rw-r--r--compiler/optimizing/scheduler_test.cc260
-rw-r--r--compiler/optimizing/sharpening.cc2
-rw-r--r--compiler/utils/mips64/assembler_mips64.cc88
-rw-r--r--compiler/utils/mips64/assembler_mips64.h48
-rw-r--r--compiler/utils/mips64/assembler_mips64_test.cc234
52 files changed, 2023 insertions, 626 deletions
diff --git a/compiler/Android.bp b/compiler/Android.bp
index a2b07af810..df896dc73c 100644
--- a/compiler/Android.bp
+++ b/compiler/Android.bp
@@ -115,6 +115,7 @@ art_cc_defaults {
"optimizing/intrinsics_arm.cc",
"optimizing/intrinsics_arm_vixl.cc",
"optimizing/nodes_shared.cc",
+ "optimizing/scheduler_arm.cc",
"utils/arm/assembler_arm.cc",
"utils/arm/assembler_arm_vixl.cc",
"utils/arm/assembler_thumb2.cc",
diff --git a/compiler/compiled_class.h b/compiler/compiled_class.h
deleted file mode 100644
index 06ce946942..0000000000
--- a/compiler/compiled_class.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_COMPILED_CLASS_H_
-#define ART_COMPILER_COMPILED_CLASS_H_
-
-#include "mirror/class.h"
-
-namespace art {
-
-class CompiledClass {
- public:
- explicit CompiledClass(mirror::Class::Status status) : status_(status) {}
- ~CompiledClass() {}
- mirror::Class::Status GetStatus() const {
- return status_;
- }
- void SetStatus(mirror::Class::Status status) {
- status_ = status;
- }
- private:
- mirror::Class::Status status_;
-};
-
-} // namespace art
-
-#endif // ART_COMPILER_COMPILED_CLASS_H_
diff --git a/compiler/dex/verification_results.cc b/compiler/dex/verification_results.cc
index 3f0df3b2c8..0338cfde8c 100644
--- a/compiler/dex/verification_results.cc
+++ b/compiler/dex/verification_results.cc
@@ -82,7 +82,12 @@ void VerificationResults::ProcessVerifiedMethod(verifier::MethodVerifier* method
// TODO: Investigate why are we doing the work again for this method and try to avoid it.
LOG(WARNING) << "Method processed more than once: " << ref.PrettyMethod();
if (!Runtime::Current()->UseJitCompilation()) {
- DCHECK_EQ(existing->GetSafeCastSet().size(), verified_method->GetSafeCastSet().size());
+ if (kIsDebugBuild) {
+ auto ex_set = existing->GetSafeCastSet();
+ auto ve_set = verified_method->GetSafeCastSet();
+ CHECK_EQ(ex_set == nullptr, ve_set == nullptr);
+ CHECK((ex_set == nullptr) || (ex_set->size() == ve_set->size()));
+ }
}
// Let the unique_ptr delete the new verified method since there was already an existing one
// registered. It is unsafe to replace the existing one since the JIT may be using it to
diff --git a/compiler/dex/verified_method.cc b/compiler/dex/verified_method.cc
index 608a18aa66..e46dc597fa 100644
--- a/compiler/dex/verified_method.cc
+++ b/compiler/dex/verified_method.cc
@@ -49,7 +49,10 @@ const VerifiedMethod* VerifiedMethod::Create(verifier::MethodVerifier* method_ve
}
bool VerifiedMethod::IsSafeCast(uint32_t pc) const {
- return std::binary_search(safe_cast_set_.begin(), safe_cast_set_.end(), pc);
+ if (safe_cast_set_ == nullptr) {
+ return false;
+ }
+ return std::binary_search(safe_cast_set_->begin(), safe_cast_set_->end(), pc);
}
void VerifiedMethod::GenerateSafeCastSet(verifier::MethodVerifier* method_verifier) {
@@ -94,12 +97,16 @@ void VerifiedMethod::GenerateSafeCastSet(verifier::MethodVerifier* method_verifi
/* strict */ true,
/* assignable */ true);
}
+ if (safe_cast_set_ == nullptr) {
+ safe_cast_set_.reset(new SafeCastSet());
+ }
// Verify ordering for push_back() to the sorted vector.
- DCHECK(safe_cast_set_.empty() || safe_cast_set_.back() < dex_pc);
- safe_cast_set_.push_back(dex_pc);
+ DCHECK(safe_cast_set_->empty() || safe_cast_set_->back() < dex_pc);
+ safe_cast_set_->push_back(dex_pc);
}
}
}
+ DCHECK(safe_cast_set_ == nullptr || !safe_cast_set_->empty());
}
} // namespace art
diff --git a/compiler/dex/verified_method.h b/compiler/dex/verified_method.h
index 439e69ece9..64b3f448e6 100644
--- a/compiler/dex/verified_method.h
+++ b/compiler/dex/verified_method.h
@@ -43,8 +43,8 @@ class VerifiedMethod {
REQUIRES_SHARED(Locks::mutator_lock_);
~VerifiedMethod() = default;
- const SafeCastSet& GetSafeCastSet() const {
- return safe_cast_set_;
+ const SafeCastSet* GetSafeCastSet() const {
+ return safe_cast_set_.get();
}
// Returns true if the cast can statically be verified to be redundant
@@ -69,7 +69,7 @@ class VerifiedMethod {
void GenerateSafeCastSet(verifier::MethodVerifier* method_verifier)
REQUIRES_SHARED(Locks::mutator_lock_);
- SafeCastSet safe_cast_set_;
+ std::unique_ptr<SafeCastSet> safe_cast_set_;
const uint32_t encountered_error_types_;
const bool has_runtime_throw_;
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index a8ab7c6091..c2d792d352 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -36,7 +36,6 @@
#include "base/time_utils.h"
#include "base/timing_logger.h"
#include "class_linker-inl.h"
-#include "compiled_class.h"
#include "compiled_method.h"
#include "compiler.h"
#include "compiler_callbacks.h"
@@ -317,11 +316,6 @@ CompilerDriver::CompilerDriver(
}
CompilerDriver::~CompilerDriver() {
- Thread* self = Thread::Current();
- {
- MutexLock mu(self, compiled_classes_lock_);
- STLDeleteValues(&compiled_classes_);
- }
compiled_methods_.Visit([this](const MethodReference& ref ATTRIBUTE_UNUSED,
CompiledMethod* method) {
if (method != nullptr) {
@@ -1978,8 +1972,7 @@ bool CompilerDriver::FastVerify(jobject jclass_loader,
if (compiler_only_verifies) {
// Just update the compiled_classes_ map. The compiler doesn't need to resolve
// the type.
- compiled_classes_.Overwrite(
- ClassReference(dex_file, i), new CompiledClass(mirror::Class::kStatusVerified));
+ compiled_classes_.Overwrite(ClassReference(dex_file, i), mirror::Class::kStatusVerified);
} else {
// Update the class status, so later compilation stages know they don't need to verify
// the class.
@@ -2690,14 +2683,15 @@ void CompilerDriver::AddCompiledMethod(const MethodReference& method_ref,
<< method_ref.dex_file->PrettyMethod(method_ref.dex_method_index);
}
-CompiledClass* CompilerDriver::GetCompiledClass(ClassReference ref) const {
+bool CompilerDriver::GetCompiledClass(ClassReference ref, mirror::Class::Status* status) const {
+ DCHECK(status != nullptr);
MutexLock mu(Thread::Current(), compiled_classes_lock_);
- ClassTable::const_iterator it = compiled_classes_.find(ref);
+ ClassStateTable::const_iterator it = compiled_classes_.find(ref);
if (it == compiled_classes_.end()) {
- return nullptr;
+ return false;
}
- CHECK(it->second != nullptr);
- return it->second;
+ *status = it->second;
+ return true;
}
void CompilerDriver::RecordClassStatus(ClassReference ref, mirror::Class::Status status) {
@@ -2719,12 +2713,11 @@ void CompilerDriver::RecordClassStatus(ClassReference ref, mirror::Class::Status
MutexLock mu(Thread::Current(), compiled_classes_lock_);
auto it = compiled_classes_.find(ref);
if (it == compiled_classes_.end()) {
- CompiledClass* compiled_class = new CompiledClass(status);
- compiled_classes_.Overwrite(ref, compiled_class);
- } else if (status > it->second->GetStatus()) {
+ compiled_classes_.Overwrite(ref, status);
+ } else if (status > it->second) {
// Update the status if we now have a greater one. This happens with vdex,
// which records a class is verified, but does not resolve it.
- it->second->SetStatus(status);
+ it->second = status;
}
}
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index fbab9dfbaf..e0d97b7c16 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -56,7 +56,6 @@ class VerifierDepsTest;
} // namespace verifier
class BitVector;
-class CompiledClass;
class CompiledMethod;
class CompilerOptions;
class DexCompilationUnit;
@@ -164,7 +163,7 @@ class CompilerDriver {
std::unique_ptr<const std::vector<uint8_t>> CreateQuickResolutionTrampoline() const;
std::unique_ptr<const std::vector<uint8_t>> CreateQuickToInterpreterBridge() const;
- CompiledClass* GetCompiledClass(ClassReference ref) const
+ bool GetCompiledClass(ClassReference ref, mirror::Class::Status* status) const
REQUIRES(!compiled_classes_lock_);
CompiledMethod* GetCompiledMethod(MethodReference ref) const;
@@ -505,10 +504,10 @@ class CompilerDriver {
std::map<ClassReference, bool> requires_constructor_barrier_
GUARDED_BY(requires_constructor_barrier_lock_);
- typedef SafeMap<const ClassReference, CompiledClass*> ClassTable;
+ using ClassStateTable = SafeMap<const ClassReference, mirror::Class::Status>;
// All class references that this compiler has compiled.
mutable Mutex compiled_classes_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
- ClassTable compiled_classes_ GUARDED_BY(compiled_classes_lock_);
+ ClassStateTable compiled_classes_ GUARDED_BY(compiled_classes_lock_);
typedef AtomicMethodRefMap<CompiledMethod*> MethodTable;
diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc
index 17854fd61a..26ea39f205 100644
--- a/compiler/driver/compiler_driver_test.cc
+++ b/compiler/driver/compiler_driver_test.cc
@@ -23,7 +23,6 @@
#include "art_method-inl.h"
#include "class_linker-inl.h"
#include "common_compiler_test.h"
-#include "compiled_class.h"
#include "dex_file.h"
#include "dex_file_types.h"
#include "gc/heap.h"
@@ -339,10 +338,11 @@ class CompilerDriverVerifyTest : public CompilerDriverTest {
ASSERT_NE(klass, nullptr);
EXPECT_TRUE(klass->IsVerified());
- CompiledClass* compiled_class = compiler_driver_->GetCompiledClass(
- ClassReference(&klass->GetDexFile(), klass->GetDexTypeIndex().index_));
- ASSERT_NE(compiled_class, nullptr);
- EXPECT_EQ(compiled_class->GetStatus(), mirror::Class::kStatusVerified);
+ mirror::Class::Status status;
+ bool found = compiler_driver_->GetCompiledClass(
+ ClassReference(&klass->GetDexFile(), klass->GetDexTypeIndex().index_), &status);
+ ASSERT_TRUE(found);
+ EXPECT_EQ(status, mirror::Class::kStatusVerified);
}
};
diff --git a/compiler/linker/arm/relative_patcher_arm_base.cc b/compiler/linker/arm/relative_patcher_arm_base.cc
index e9d579d2b3..c1ac230d43 100644
--- a/compiler/linker/arm/relative_patcher_arm_base.cc
+++ b/compiler/linker/arm/relative_patcher_arm_base.cc
@@ -311,24 +311,22 @@ uint32_t ArmBaseRelativePatcher::GetThunkTargetOffset(const ThunkKey& key, uint3
}
ArmBaseRelativePatcher::ThunkKey ArmBaseRelativePatcher::GetMethodCallKey() {
- return ThunkKey(ThunkType::kMethodCall, ThunkParams{{ 0u, 0u }}); // NOLINT(whitespace/braces)
+ return ThunkKey(ThunkType::kMethodCall);
}
ArmBaseRelativePatcher::ThunkKey ArmBaseRelativePatcher::GetBakerThunkKey(
const LinkerPatch& patch) {
DCHECK_EQ(patch.GetType(), LinkerPatch::Type::kBakerReadBarrierBranch);
- ThunkParams params;
- params.baker_params.custom_value1 = patch.GetBakerCustomValue1();
- params.baker_params.custom_value2 = patch.GetBakerCustomValue2();
- ThunkKey key(ThunkType::kBakerReadBarrier, params);
- return key;
+ return ThunkKey(ThunkType::kBakerReadBarrier,
+ patch.GetBakerCustomValue1(),
+ patch.GetBakerCustomValue2());
}
void ArmBaseRelativePatcher::ProcessPatches(const CompiledMethod* compiled_method,
uint32_t code_offset) {
for (const LinkerPatch& patch : compiled_method->GetPatches()) {
uint32_t patch_offset = code_offset + patch.LiteralOffset();
- ThunkKey key(static_cast<ThunkType>(-1), ThunkParams{{ 0u, 0u }}); // NOLINT(whitespace/braces)
+ ThunkKey key(static_cast<ThunkType>(-1));
ThunkData* old_data = nullptr;
if (patch.GetType() == LinkerPatch::Type::kCallRelative) {
key = GetMethodCallKey();
diff --git a/compiler/linker/arm/relative_patcher_arm_base.h b/compiler/linker/arm/relative_patcher_arm_base.h
index fd204c05a6..5197ce2549 100644
--- a/compiler/linker/arm/relative_patcher_arm_base.h
+++ b/compiler/linker/arm/relative_patcher_arm_base.h
@@ -45,42 +45,27 @@ class ArmBaseRelativePatcher : public RelativePatcher {
kBakerReadBarrier, // Baker read barrier.
};
- struct BakerReadBarrierParams {
- uint32_t custom_value1;
- uint32_t custom_value2;
- };
-
- struct RawThunkParams {
- uint32_t first;
- uint32_t second;
- };
-
- union ThunkParams {
- RawThunkParams raw_params;
- BakerReadBarrierParams baker_params;
- static_assert(sizeof(raw_params) == sizeof(baker_params), "baker_params size check");
- };
-
class ThunkKey {
public:
- ThunkKey(ThunkType type, ThunkParams params) : type_(type), params_(params) { }
+ explicit ThunkKey(ThunkType type, uint32_t custom_value1 = 0u, uint32_t custom_value2 = 0u)
+ : type_(type), custom_value1_(custom_value1), custom_value2_(custom_value2) { }
ThunkType GetType() const {
return type_;
}
- BakerReadBarrierParams GetBakerReadBarrierParams() const {
- DCHECK(type_ == ThunkType::kBakerReadBarrier);
- return params_.baker_params;
+ uint32_t GetCustomValue1() const {
+ return custom_value1_;
}
- RawThunkParams GetRawParams() const {
- return params_.raw_params;
+ uint32_t GetCustomValue2() const {
+ return custom_value2_;
}
private:
ThunkType type_;
- ThunkParams params_;
+ uint32_t custom_value1_;
+ uint32_t custom_value2_;
};
class ThunkKeyCompare {
@@ -89,10 +74,10 @@ class ArmBaseRelativePatcher : public RelativePatcher {
if (lhs.GetType() != rhs.GetType()) {
return lhs.GetType() < rhs.GetType();
}
- if (lhs.GetRawParams().first != rhs.GetRawParams().first) {
- return lhs.GetRawParams().first < rhs.GetRawParams().first;
+ if (lhs.GetCustomValue1() != rhs.GetCustomValue1()) {
+ return lhs.GetCustomValue1() < rhs.GetCustomValue1();
}
- return lhs.GetRawParams().second < rhs.GetRawParams().second;
+ return lhs.GetCustomValue2() < rhs.GetCustomValue2();
}
};
diff --git a/compiler/linker/arm/relative_patcher_thumb2.cc b/compiler/linker/arm/relative_patcher_thumb2.cc
index a98aedfc69..aa5a9457b2 100644
--- a/compiler/linker/arm/relative_patcher_thumb2.cc
+++ b/compiler/linker/arm/relative_patcher_thumb2.cc
@@ -108,7 +108,7 @@ void Thumb2RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* co
DCHECK_EQ(insn, 0xf0408000); // BNE +0 (unpatched)
ThunkKey key = GetBakerThunkKey(patch);
if (kIsDebugBuild) {
- const uint32_t encoded_data = key.GetBakerReadBarrierParams().custom_value1;
+ const uint32_t encoded_data = key.GetCustomValue1();
BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
// Check that the next instruction matches the expected LDR.
switch (kind) {
@@ -346,7 +346,7 @@ std::vector<uint8_t> Thumb2RelativePatcher::CompileThunk(const ThunkKey& key) {
__ Bkpt(0);
break;
case ThunkType::kBakerReadBarrier:
- CompileBakerReadBarrierThunk(assembler, key.GetBakerReadBarrierParams().custom_value1);
+ CompileBakerReadBarrierThunk(assembler, key.GetCustomValue1());
break;
}
diff --git a/compiler/linker/arm/relative_patcher_thumb2.h b/compiler/linker/arm/relative_patcher_thumb2.h
index 7e787d2916..183e5e6c96 100644
--- a/compiler/linker/arm/relative_patcher_thumb2.h
+++ b/compiler/linker/arm/relative_patcher_thumb2.h
@@ -94,13 +94,13 @@ class Thumb2RelativePatcher FINAL : public ArmBaseRelativePatcher {
kField, // Field get or array get with constant offset (i.e. constant index).
kArray, // Array get with index in register.
kGcRoot, // GC root load.
- kLast
+ kLast = kGcRoot
};
enum class BakerReadBarrierWidth : uint8_t {
kWide, // 32-bit LDR (and 32-bit NEG if heap poisoning is enabled).
kNarrow, // 16-bit LDR (and 16-bit NEG if heap poisoning is enabled).
- kLast
+ kLast = kNarrow
};
static constexpr size_t kBitsForBakerReadBarrierKind =
diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc
index 2b06e3f649..e99687a54f 100644
--- a/compiler/linker/arm64/relative_patcher_arm64.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64.cc
@@ -307,7 +307,7 @@ void Arm64RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* cod
DCHECK_EQ(insn & 0xffffffe0u, 0xb5000000); // CBNZ Xt, +0 (unpatched)
ThunkKey key = GetBakerThunkKey(patch);
if (kIsDebugBuild) {
- const uint32_t encoded_data = key.GetBakerReadBarrierParams().custom_value1;
+ const uint32_t encoded_data = key.GetCustomValue1();
BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
// Check that the next instruction matches the expected LDR.
switch (kind) {
@@ -500,7 +500,7 @@ std::vector<uint8_t> Arm64RelativePatcher::CompileThunk(const ThunkKey& key) {
break;
}
case ThunkType::kBakerReadBarrier: {
- CompileBakerReadBarrierThunk(assembler, key.GetBakerReadBarrierParams().custom_value1);
+ CompileBakerReadBarrierThunk(assembler, key.GetCustomValue1());
break;
}
}
diff --git a/compiler/linker/arm64/relative_patcher_arm64.h b/compiler/linker/arm64/relative_patcher_arm64.h
index 02a5b1ef8f..b00dd081b6 100644
--- a/compiler/linker/arm64/relative_patcher_arm64.h
+++ b/compiler/linker/arm64/relative_patcher_arm64.h
@@ -86,7 +86,7 @@ class Arm64RelativePatcher FINAL : public ArmBaseRelativePatcher {
kField, // Field get or array get with constant offset (i.e. constant index).
kArray, // Array get with index in register.
kGcRoot, // GC root load.
- kLast
+ kLast = kGcRoot
};
static constexpr size_t kBitsForBakerReadBarrierKind =
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 6b5387ae19..9b22334ead 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -28,7 +28,6 @@
#include "base/stl_util.h"
#include "base/unix_file/fd_file.h"
#include "class_linker.h"
-#include "compiled_class.h"
#include "compiled_method.h"
#include "debug/method_debug_info.h"
#include "dex/verification_results.h"
@@ -712,17 +711,17 @@ class OatWriter::InitOatClassesMethodVisitor : public DexMethodVisitor {
bool EndClass() {
ClassReference class_ref(dex_file_, class_def_index_);
- CompiledClass* compiled_class = writer_->compiler_driver_->GetCompiledClass(class_ref);
mirror::Class::Status status;
- if (compiled_class != nullptr) {
- status = compiled_class->GetStatus();
- } else if (writer_->compiler_driver_->GetVerificationResults()->IsClassRejected(class_ref)) {
- // The oat class status is used only for verification of resolved classes,
- // so use kStatusErrorResolved whether the class was resolved or unresolved
- // during compile-time verification.
- status = mirror::Class::kStatusErrorResolved;
- } else {
- status = mirror::Class::kStatusNotReady;
+ bool found = writer_->compiler_driver_->GetCompiledClass(class_ref, &status);
+ if (!found) {
+ if (writer_->compiler_driver_->GetVerificationResults()->IsClassRejected(class_ref)) {
+ // The oat class status is used only for verification of resolved classes,
+ // so use kStatusErrorResolved whether the class was resolved or unresolved
+ // during compile-time verification.
+ status = mirror::Class::kStatusErrorResolved;
+ } else {
+ status = mirror::Class::kStatusNotReady;
+ }
}
writer_->oat_classes_.emplace_back(offset_,
diff --git a/compiler/optimizing/block_builder.cc b/compiler/optimizing/block_builder.cc
index 5e70a8284d..1e75f10ebe 100644
--- a/compiler/optimizing/block_builder.cc
+++ b/compiler/optimizing/block_builder.cc
@@ -310,16 +310,18 @@ void HBasicBlockBuilder::InsertTryBoundaryBlocks() {
// least one predecessor is not covered by the same TryItem as the try block.
// We do not split each edge separately, but rather create one boundary block
// that all predecessors are relinked to. This preserves loop headers (b/23895756).
- for (auto entry : try_block_info) {
- HBasicBlock* try_block = graph_->GetBlocks()[entry.first];
+ for (const auto& entry : try_block_info) {
+ uint32_t block_id = entry.first;
+ const DexFile::TryItem* try_item = entry.second;
+ HBasicBlock* try_block = graph_->GetBlocks()[block_id];
for (HBasicBlock* predecessor : try_block->GetPredecessors()) {
- if (GetTryItem(predecessor, try_block_info) != entry.second) {
+ if (GetTryItem(predecessor, try_block_info) != try_item) {
// Found a predecessor not covered by the same TryItem. Insert entering
// boundary block.
HTryBoundary* try_entry =
new (arena_) HTryBoundary(HTryBoundary::BoundaryKind::kEntry, try_block->GetDexPc());
try_block->CreateImmediateDominator()->AddInstruction(try_entry);
- LinkToCatchBlocks(try_entry, code_item_, entry.second, catch_blocks);
+ LinkToCatchBlocks(try_entry, code_item_, try_item, catch_blocks);
break;
}
}
@@ -327,8 +329,10 @@ void HBasicBlockBuilder::InsertTryBoundaryBlocks() {
// Do a second pass over the try blocks and insert exit TryBoundaries where
// the successor is not in the same TryItem.
- for (auto entry : try_block_info) {
- HBasicBlock* try_block = graph_->GetBlocks()[entry.first];
+ for (const auto& entry : try_block_info) {
+ uint32_t block_id = entry.first;
+ const DexFile::TryItem* try_item = entry.second;
+ HBasicBlock* try_block = graph_->GetBlocks()[block_id];
// NOTE: Do not use iterators because SplitEdge would invalidate them.
for (size_t i = 0, e = try_block->GetSuccessors().size(); i < e; ++i) {
HBasicBlock* successor = try_block->GetSuccessors()[i];
@@ -337,7 +341,7 @@ void HBasicBlockBuilder::InsertTryBoundaryBlocks() {
// covered by the same TryItem. Otherwise the previous pass would have
// created a non-throwing boundary block.
if (GetTryItem(successor, try_block_info) != nullptr) {
- DCHECK_EQ(entry.second, GetTryItem(successor, try_block_info));
+ DCHECK_EQ(try_item, GetTryItem(successor, try_block_info));
continue;
}
@@ -345,7 +349,7 @@ void HBasicBlockBuilder::InsertTryBoundaryBlocks() {
HTryBoundary* try_exit =
new (arena_) HTryBoundary(HTryBoundary::BoundaryKind::kExit, successor->GetDexPc());
graph_->SplitEdge(try_block, successor)->AddInstruction(try_exit);
- LinkToCatchBlocks(try_exit, code_item_, entry.second, catch_blocks);
+ LinkToCatchBlocks(try_exit, code_item_, try_item, catch_blocks);
}
}
}
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index ed630cda91..f3ecdf036a 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -1734,8 +1734,8 @@ class BCEVisitor : public HGraphVisitor {
*/
void InsertPhiNodes() {
// Scan all new deoptimization blocks.
- for (auto it1 = taken_test_loop_.begin(); it1 != taken_test_loop_.end(); ++it1) {
- HBasicBlock* true_block = it1->second;
+ for (const auto& entry : taken_test_loop_) {
+ HBasicBlock* true_block = entry.second;
HBasicBlock* new_preheader = true_block->GetSingleSuccessor();
// Scan all instructions in a new deoptimization block.
for (HInstructionIterator it(true_block->GetInstructions()); !it.Done(); it.Advance()) {
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 5136d7d2b8..65f3c72e99 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -145,7 +145,7 @@ size_t CodeGenerator::GetCacheOffset(uint32_t index) {
}
size_t CodeGenerator::GetCachePointerOffset(uint32_t index) {
- auto pointer_size = InstructionSetPointerSize(GetInstructionSet());
+ PointerSize pointer_size = InstructionSetPointerSize(GetInstructionSet());
return static_cast<size_t>(pointer_size) * index;
}
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index ea463eeb62..9ef692aaf0 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -842,7 +842,7 @@ class SlowPathGenerator {
const uint32_t dex_pc = instruction->GetDexPc();
auto iter = slow_path_map_.find(dex_pc);
if (iter != slow_path_map_.end()) {
- auto candidates = iter->second;
+ const ArenaVector<std::pair<InstructionType*, SlowPathCode*>>& candidates = iter->second;
for (const auto& it : candidates) {
InstructionType* other_instruction = it.first;
SlowPathCodeType* other_slow_path = down_cast<SlowPathCodeType*>(it.second);
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index cf2a391e8f..ab3d499235 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -6374,6 +6374,15 @@ void InstructionCodeGeneratorARM::VisitIntermediateAddress(HIntermediateAddress*
}
}
+void LocationsBuilderARM::VisitIntermediateAddressIndex(HIntermediateAddressIndex* instruction) {
+ LOG(FATAL) << "Unreachable " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARM::VisitIntermediateAddressIndex(
+ HIntermediateAddressIndex* instruction) {
+ LOG(FATAL) << "Unreachable " << instruction->GetId();
+}
+
void LocationsBuilderARM::VisitBoundsCheck(HBoundsCheck* instruction) {
RegisterSet caller_saves = RegisterSet::Empty();
InvokeRuntimeCallingConvention calling_convention;
@@ -9067,14 +9076,20 @@ static void PatchJitRootUse(uint8_t* code,
void CodeGeneratorARM::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
for (const auto& entry : jit_string_patches_) {
- const auto& it = jit_string_roots_.find(entry.first);
+ const StringReference& string_reference = entry.first;
+ Literal* table_entry_literal = entry.second;
+ const auto it = jit_string_roots_.find(string_reference);
DCHECK(it != jit_string_roots_.end());
- PatchJitRootUse(code, roots_data, entry.second, it->second);
+ uint64_t index_in_table = it->second;
+ PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
}
for (const auto& entry : jit_class_patches_) {
- const auto& it = jit_class_roots_.find(entry.first);
+ const TypeReference& type_reference = entry.first;
+ Literal* table_entry_literal = entry.second;
+ const auto it = jit_class_roots_.find(type_reference);
DCHECK(it != jit_class_roots_.end());
- PatchJitRootUse(code, roots_data, entry.second, it->second);
+ uint64_t index_in_table = it->second;
+ PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
}
}
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index d59f8b435c..fa39b79e39 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -2661,6 +2661,38 @@ void InstructionCodeGeneratorARM64::VisitIntermediateAddress(HIntermediateAddres
Operand(InputOperandAt(instruction, 1)));
}
+void LocationsBuilderARM64::VisitIntermediateAddressIndex(HIntermediateAddressIndex* instruction) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+
+ HIntConstant* shift = instruction->GetShift()->AsIntConstant();
+
+ locations->SetInAt(0, Location::RequiresRegister());
+ // For byte case we don't need to shift the index variable so we can encode the data offset into
+ // ADD instruction. For other cases we prefer the data_offset to be in register; that will hoist
+ // data offset constant generation out of the loop and reduce the critical path length in the
+ // loop.
+ locations->SetInAt(1, shift->GetValue() == 0
+ ? Location::ConstantLocation(instruction->GetOffset()->AsIntConstant())
+ : Location::RequiresRegister());
+ locations->SetInAt(2, Location::ConstantLocation(shift));
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorARM64::VisitIntermediateAddressIndex(
+ HIntermediateAddressIndex* instruction) {
+ Register index_reg = InputRegisterAt(instruction, 0);
+ uint32_t shift = Int64ConstantFrom(instruction->GetLocations()->InAt(2));
+ uint32_t offset = instruction->GetOffset()->AsIntConstant()->GetValue();
+
+ if (shift == 0) {
+ __ Add(OutputRegister(instruction), index_reg, offset);
+ } else {
+ Register offset_reg = InputRegisterAt(instruction, 1);
+ __ Add(OutputRegister(instruction), offset_reg, Operand(index_reg, LSL, shift));
+ }
+}
+
void LocationsBuilderARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instr, LocationSummary::kNoCall);
@@ -6571,14 +6603,20 @@ static void PatchJitRootUse(uint8_t* code,
void CodeGeneratorARM64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
for (const auto& entry : jit_string_patches_) {
- const auto& it = jit_string_roots_.find(entry.first);
+ const StringReference& string_reference = entry.first;
+ vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second;
+ const auto it = jit_string_roots_.find(string_reference);
DCHECK(it != jit_string_roots_.end());
- PatchJitRootUse(code, roots_data, entry.second, it->second);
+ uint64_t index_in_table = it->second;
+ PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
}
for (const auto& entry : jit_class_patches_) {
- const auto& it = jit_class_roots_.find(entry.first);
+ const TypeReference& type_reference = entry.first;
+ vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second;
+ const auto it = jit_class_roots_.find(type_reference);
DCHECK(it != jit_class_roots_.end());
- PatchJitRootUse(code, roots_data, entry.second, it->second);
+ uint64_t index_in_table = it->second;
+ PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
}
}
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 9f03a39bd5..1759c68125 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -6447,6 +6447,16 @@ void InstructionCodeGeneratorARMVIXL::VisitIntermediateAddress(HIntermediateAddr
}
}
+void LocationsBuilderARMVIXL::VisitIntermediateAddressIndex(
+ HIntermediateAddressIndex* instruction) {
+ LOG(FATAL) << "Unreachable " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitIntermediateAddressIndex(
+ HIntermediateAddressIndex* instruction) {
+ LOG(FATAL) << "Unreachable " << instruction->GetId();
+}
+
void LocationsBuilderARMVIXL::VisitBoundsCheck(HBoundsCheck* instruction) {
RegisterSet caller_saves = RegisterSet::Empty();
InvokeRuntimeCallingConventionARMVIXL calling_convention;
@@ -9251,14 +9261,20 @@ static void PatchJitRootUse(uint8_t* code,
void CodeGeneratorARMVIXL::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
for (const auto& entry : jit_string_patches_) {
- const auto& it = jit_string_roots_.find(entry.first);
+ const StringReference& string_reference = entry.first;
+ VIXLUInt32Literal* table_entry_literal = entry.second;
+ const auto it = jit_string_roots_.find(string_reference);
DCHECK(it != jit_string_roots_.end());
- PatchJitRootUse(code, roots_data, entry.second, it->second);
+ uint64_t index_in_table = it->second;
+ PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
}
for (const auto& entry : jit_class_patches_) {
- const auto& it = jit_class_roots_.find(entry.first);
+ const TypeReference& type_reference = entry.first;
+ VIXLUInt32Literal* table_entry_literal = entry.second;
+ const auto it = jit_class_roots_.find(type_reference);
DCHECK(it != jit_class_roots_.end());
- PatchJitRootUse(code, roots_data, entry.second, it->second);
+ uint64_t index_in_table = it->second;
+ PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
}
}
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index e9870acff4..503026e399 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -1780,16 +1780,18 @@ void CodeGeneratorMIPS::PatchJitRootUse(uint8_t* code,
void CodeGeneratorMIPS::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
for (const JitPatchInfo& info : jit_string_patches_) {
- const auto& it = jit_string_roots_.find(StringReference(&info.target_dex_file,
- dex::StringIndex(info.index)));
+ const auto it = jit_string_roots_.find(StringReference(&info.target_dex_file,
+ dex::StringIndex(info.index)));
DCHECK(it != jit_string_roots_.end());
- PatchJitRootUse(code, roots_data, info, it->second);
+ uint64_t index_in_table = it->second;
+ PatchJitRootUse(code, roots_data, info, index_in_table);
}
for (const JitPatchInfo& info : jit_class_patches_) {
- const auto& it = jit_class_roots_.find(TypeReference(&info.target_dex_file,
- dex::TypeIndex(info.index)));
+ const auto it = jit_class_roots_.find(TypeReference(&info.target_dex_file,
+ dex::TypeIndex(info.index)));
DCHECK(it != jit_class_roots_.end());
- PatchJitRootUse(code, roots_data, info, it->second);
+ uint64_t index_in_table = it->second;
+ PatchJitRootUse(code, roots_data, info, index_in_table);
}
}
@@ -8413,6 +8415,23 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi
}
} else if (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type)) {
CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong);
+
+ // When NAN2008=1 (R6), the truncate instruction caps the output at the minimum/maximum
+ // value of the output type if the input is outside of the range after the truncation or
+ // produces 0 when the input is a NaN. IOW, the three special cases produce three distinct
+ // results. This matches the desired float/double-to-int/long conversion exactly.
+ //
+ // When NAN2008=0 (R2 and before), the truncate instruction produces the maximum positive
+ // value when the input is either a NaN or is outside of the range of the output type
+ // after the truncation. IOW, the three special cases (NaN, too small, too big) produce
+ // the same result.
+ //
+ // The code takes care of the different behaviors by first comparing the input to the
+ // minimum output value (-2**-63 for truncating to long, -2**-31 for truncating to int).
+ // If the input is greater than or equal to the minimum, it procedes to the truncate
+ // instruction, which will handle such an input the same way irrespective of NAN2008.
+ // Otherwise the input is compared to itself to determine whether it is a NaN or not
+ // in order to return either zero or the minimum value.
if (result_type == Primitive::kPrimLong) {
if (isR6) {
// trunc.l.s/trunc.l.d requires MIPSR2+ with FR=1. MIPS32R6 is implemented as a secondary
@@ -8420,62 +8439,6 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi
FRegister src = locations->InAt(0).AsFpuRegister<FRegister>();
Register dst_high = locations->Out().AsRegisterPairHigh<Register>();
Register dst_low = locations->Out().AsRegisterPairLow<Register>();
- MipsLabel truncate;
- MipsLabel done;
-
- // When NAN2008=0 (R2 and before), the truncate instruction produces the maximum positive
- // value when the input is either a NaN or is outside of the range of the output type
- // after the truncation. IOW, the three special cases (NaN, too small, too big) produce
- // the same result.
- //
- // When NAN2008=1 (R6), the truncate instruction caps the output at the minimum/maximum
- // value of the output type if the input is outside of the range after the truncation or
- // produces 0 when the input is a NaN. IOW, the three special cases produce three distinct
- // results. This matches the desired float/double-to-int/long conversion exactly.
- //
- // So, NAN2008 affects handling of negative values and NaNs by the truncate instruction.
- //
- // The following code supports both NAN2008=0 and NAN2008=1 behaviors of the truncate
- // instruction, the reason being that the emulator implements NAN2008=0 on MIPS64R6,
- // even though it must be NAN2008=1 on R6.
- //
- // The code takes care of the different behaviors by first comparing the input to the
- // minimum output value (-2**-63 for truncating to long, -2**-31 for truncating to int).
- // If the input is greater than or equal to the minimum, it procedes to the truncate
- // instruction, which will handle such an input the same way irrespective of NAN2008.
- // Otherwise the input is compared to itself to determine whether it is a NaN or not
- // in order to return either zero or the minimum value.
- //
- // TODO: simplify this when the emulator correctly implements NAN2008=1 behavior of the
- // truncate instruction for MIPS64R6.
- if (input_type == Primitive::kPrimFloat) {
- uint32_t min_val = bit_cast<uint32_t, float>(std::numeric_limits<int64_t>::min());
- __ LoadConst32(TMP, min_val);
- __ Mtc1(TMP, FTMP);
- __ CmpLeS(FTMP, FTMP, src);
- } else {
- uint64_t min_val = bit_cast<uint64_t, double>(std::numeric_limits<int64_t>::min());
- __ LoadConst32(TMP, High32Bits(min_val));
- __ Mtc1(ZERO, FTMP);
- __ Mthc1(TMP, FTMP);
- __ CmpLeD(FTMP, FTMP, src);
- }
-
- __ Bc1nez(FTMP, &truncate);
-
- if (input_type == Primitive::kPrimFloat) {
- __ CmpEqS(FTMP, src, src);
- } else {
- __ CmpEqD(FTMP, src, src);
- }
- __ Move(dst_low, ZERO);
- __ LoadConst32(dst_high, std::numeric_limits<int32_t>::min());
- __ Mfc1(TMP, FTMP);
- __ And(dst_high, dst_high, TMP);
-
- __ B(&done);
-
- __ Bind(&truncate);
if (input_type == Primitive::kPrimFloat) {
__ TruncLS(FTMP, src);
@@ -8484,8 +8447,6 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi
}
__ Mfc1(dst_low, FTMP);
__ Mfhc1(dst_high, FTMP);
-
- __ Bind(&done);
} else {
QuickEntrypointEnum entrypoint = (input_type == Primitive::kPrimFloat) ? kQuickF2l
: kQuickD2l;
@@ -8502,43 +8463,19 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi
MipsLabel truncate;
MipsLabel done;
- // The following code supports both NAN2008=0 and NAN2008=1 behaviors of the truncate
- // instruction, the reason being that the emulator implements NAN2008=0 on MIPS64R6,
- // even though it must be NAN2008=1 on R6.
- //
- // For details see the large comment above for the truncation of float/double to long on R6.
- //
- // TODO: simplify this when the emulator correctly implements NAN2008=1 behavior of the
- // truncate instruction for MIPS64R6.
- if (input_type == Primitive::kPrimFloat) {
- uint32_t min_val = bit_cast<uint32_t, float>(std::numeric_limits<int32_t>::min());
- __ LoadConst32(TMP, min_val);
- __ Mtc1(TMP, FTMP);
- } else {
- uint64_t min_val = bit_cast<uint64_t, double>(std::numeric_limits<int32_t>::min());
- __ LoadConst32(TMP, High32Bits(min_val));
- __ Mtc1(ZERO, FTMP);
- __ MoveToFpuHigh(TMP, FTMP);
- }
-
- if (isR6) {
+ if (!isR6) {
if (input_type == Primitive::kPrimFloat) {
- __ CmpLeS(FTMP, FTMP, src);
+ uint32_t min_val = bit_cast<uint32_t, float>(std::numeric_limits<int32_t>::min());
+ __ LoadConst32(TMP, min_val);
+ __ Mtc1(TMP, FTMP);
} else {
- __ CmpLeD(FTMP, FTMP, src);
+ uint64_t min_val = bit_cast<uint64_t, double>(std::numeric_limits<int32_t>::min());
+ __ LoadConst32(TMP, High32Bits(min_val));
+ __ Mtc1(ZERO, FTMP);
+ __ MoveToFpuHigh(TMP, FTMP);
}
- __ Bc1nez(FTMP, &truncate);
if (input_type == Primitive::kPrimFloat) {
- __ CmpEqS(FTMP, src, src);
- } else {
- __ CmpEqD(FTMP, src, src);
- }
- __ LoadConst32(dst, std::numeric_limits<int32_t>::min());
- __ Mfc1(TMP, FTMP);
- __ And(dst, dst, TMP);
- } else {
- if (input_type == Primitive::kPrimFloat) {
__ ColeS(0, FTMP, src);
} else {
__ ColeD(0, FTMP, src);
@@ -8552,11 +8489,11 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi
}
__ LoadConst32(dst, std::numeric_limits<int32_t>::min());
__ Movf(dst, ZERO, 0);
- }
- __ B(&done);
+ __ B(&done);
- __ Bind(&truncate);
+ __ Bind(&truncate);
+ }
if (input_type == Primitive::kPrimFloat) {
__ TruncWS(FTMP, src);
@@ -8565,7 +8502,9 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi
}
__ Mfc1(dst, FTMP);
- __ Bind(&done);
+ if (!isR6) {
+ __ Bind(&done);
+ }
}
} else if (Primitive::IsFloatingPointType(result_type) &&
Primitive::IsFloatingPointType(input_type)) {
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index f04e3841f5..e0dba21d71 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -302,10 +302,13 @@ class SuspendCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
: SlowPathCodeMIPS64(instruction), successor_(successor) {}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
__ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations); // Only saves live vector registers for SIMD.
mips64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickTestSuspend, void, void>();
+ RestoreLiveRegisters(codegen, locations); // Only restores live vector registers for SIMD.
if (successor_ == nullptr) {
__ Bc(GetReturnLabel());
} else {
@@ -1586,14 +1589,20 @@ void CodeGeneratorMIPS64::PatchJitRootUse(uint8_t* code,
void CodeGeneratorMIPS64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
for (const auto& entry : jit_string_patches_) {
- const auto& it = jit_string_roots_.find(entry.first);
+ const StringReference& string_reference = entry.first;
+ Literal* table_entry_literal = entry.second;
+ const auto it = jit_string_roots_.find(string_reference);
DCHECK(it != jit_string_roots_.end());
- PatchJitRootUse(code, roots_data, entry.second, it->second);
+ uint64_t index_in_table = it->second;
+ PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
}
for (const auto& entry : jit_class_patches_) {
- const auto& it = jit_class_roots_.find(entry.first);
+ const TypeReference& type_reference = entry.first;
+ Literal* table_entry_literal = entry.second;
+ const auto it = jit_class_roots_.find(type_reference);
DCHECK(it != jit_class_roots_.end());
- PatchJitRootUse(code, roots_data, entry.second, it->second);
+ uint64_t index_in_table = it->second;
+ PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
}
}
@@ -1641,13 +1650,19 @@ size_t CodeGeneratorMIPS64::RestoreCoreRegister(size_t stack_index, uint32_t reg
}
size_t CodeGeneratorMIPS64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
- __ StoreFpuToOffset(kStoreDoubleword, FpuRegister(reg_id), SP, stack_index);
- return kMips64DoublewordSize;
+ __ StoreFpuToOffset(GetGraph()->HasSIMD() ? kStoreQuadword : kStoreDoubleword,
+ FpuRegister(reg_id),
+ SP,
+ stack_index);
+ return GetFloatingPointSpillSlotSize();
}
size_t CodeGeneratorMIPS64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
- __ LoadFpuFromOffset(kLoadDoubleword, FpuRegister(reg_id), SP, stack_index);
- return kMips64DoublewordSize;
+ __ LoadFpuFromOffset(GetGraph()->HasSIMD() ? kLoadQuadword : kLoadDoubleword,
+ FpuRegister(reg_id),
+ SP,
+ stack_index);
+ return GetFloatingPointSpillSlotSize();
}
void CodeGeneratorMIPS64::DumpCoreRegister(std::ostream& stream, int reg) const {
@@ -5846,7 +5861,11 @@ void InstructionCodeGeneratorMIPS64::VisitUnresolvedStaticFieldSet(
void LocationsBuilderMIPS64::VisitSuspendCheck(HSuspendCheck* instruction) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
- locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ // In suspend check slow path, usually there are no caller-save registers at all.
+ // If SIMD instructions are present, however, we force spilling all live SIMD
+ // registers in full width (since the runtime only saves/restores lower part).
+ locations->SetCustomSlowPathCallerSaves(
+ GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
}
void InstructionCodeGeneratorMIPS64::VisitSuspendCheck(HSuspendCheck* instruction) {
@@ -5973,68 +5992,6 @@ void InstructionCodeGeneratorMIPS64::VisitTypeConversion(HTypeConversion* conver
CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong);
GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
FpuRegister src = locations->InAt(0).AsFpuRegister<FpuRegister>();
- Mips64Label truncate;
- Mips64Label done;
-
- // When NAN2008=0 (R2 and before), the truncate instruction produces the maximum positive
- // value when the input is either a NaN or is outside of the range of the output type
- // after the truncation. IOW, the three special cases (NaN, too small, too big) produce
- // the same result.
- //
- // When NAN2008=1 (R6), the truncate instruction caps the output at the minimum/maximum
- // value of the output type if the input is outside of the range after the truncation or
- // produces 0 when the input is a NaN. IOW, the three special cases produce three distinct
- // results. This matches the desired float/double-to-int/long conversion exactly.
- //
- // So, NAN2008 affects handling of negative values and NaNs by the truncate instruction.
- //
- // The following code supports both NAN2008=0 and NAN2008=1 behaviors of the truncate
- // instruction, the reason being that the emulator implements NAN2008=0 on MIPS64R6,
- // even though it must be NAN2008=1 on R6.
- //
- // The code takes care of the different behaviors by first comparing the input to the
- // minimum output value (-2**-63 for truncating to long, -2**-31 for truncating to int).
- // If the input is greater than or equal to the minimum, it procedes to the truncate
- // instruction, which will handle such an input the same way irrespective of NAN2008.
- // Otherwise the input is compared to itself to determine whether it is a NaN or not
- // in order to return either zero or the minimum value.
- //
- // TODO: simplify this when the emulator correctly implements NAN2008=1 behavior of the
- // truncate instruction for MIPS64R6.
- if (input_type == Primitive::kPrimFloat) {
- uint32_t min_val = (result_type == Primitive::kPrimLong)
- ? bit_cast<uint32_t, float>(std::numeric_limits<int64_t>::min())
- : bit_cast<uint32_t, float>(std::numeric_limits<int32_t>::min());
- __ LoadConst32(TMP, min_val);
- __ Mtc1(TMP, FTMP);
- __ CmpLeS(FTMP, FTMP, src);
- } else {
- uint64_t min_val = (result_type == Primitive::kPrimLong)
- ? bit_cast<uint64_t, double>(std::numeric_limits<int64_t>::min())
- : bit_cast<uint64_t, double>(std::numeric_limits<int32_t>::min());
- __ LoadConst64(TMP, min_val);
- __ Dmtc1(TMP, FTMP);
- __ CmpLeD(FTMP, FTMP, src);
- }
-
- __ Bc1nez(FTMP, &truncate);
-
- if (input_type == Primitive::kPrimFloat) {
- __ CmpEqS(FTMP, src, src);
- } else {
- __ CmpEqD(FTMP, src, src);
- }
- if (result_type == Primitive::kPrimLong) {
- __ LoadConst64(dst, std::numeric_limits<int64_t>::min());
- } else {
- __ LoadConst32(dst, std::numeric_limits<int32_t>::min());
- }
- __ Mfc1(TMP, FTMP);
- __ And(dst, dst, TMP);
-
- __ Bc(&done);
-
- __ Bind(&truncate);
if (result_type == Primitive::kPrimLong) {
if (input_type == Primitive::kPrimFloat) {
@@ -6051,8 +6008,6 @@ void InstructionCodeGeneratorMIPS64::VisitTypeConversion(HTypeConversion* conver
}
__ Mfc1(dst, FTMP);
}
-
- __ Bind(&done);
} else if (Primitive::IsFloatingPointType(result_type) &&
Primitive::IsFloatingPointType(input_type)) {
FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index 200e884c09..4c8376623f 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -336,7 +336,11 @@ class CodeGeneratorMIPS64 : public CodeGenerator {
size_t GetWordSize() const OVERRIDE { return kMips64DoublewordSize; }
- size_t GetFloatingPointSpillSlotSize() const OVERRIDE { return kMips64DoublewordSize; }
+ size_t GetFloatingPointSpillSlotSize() const OVERRIDE {
+ return GetGraph()->HasSIMD()
+ ? 2 * kMips64DoublewordSize // 16 bytes for each spill.
+ : 1 * kMips64DoublewordSize; // 8 bytes for each spill.
+ }
uintptr_t GetAddressOf(HBasicBlock* block) OVERRIDE {
return assembler_.GetLabelLocation(GetLabelOf(block));
diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc
index 57f7e6b25c..478bd24388 100644
--- a/compiler/optimizing/code_generator_vector_arm64.cc
+++ b/compiler/optimizing/code_generator_vector_arm64.cc
@@ -783,6 +783,12 @@ MemOperand InstructionCodeGeneratorARM64::VecAddress(
/*out*/ Register* scratch) {
LocationSummary* locations = instruction->GetLocations();
Register base = InputRegisterAt(instruction, 0);
+
+ if (instruction->InputAt(1)->IsIntermediateAddressIndex()) {
+ DCHECK(!is_string_char_at);
+ return MemOperand(base.X(), InputRegisterAt(instruction, 1).X());
+ }
+
Location index = locations->InAt(1);
uint32_t offset = is_string_char_at
? mirror::String::ValueOffset().Uint32Value()
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index cf2d5cbee3..bd9a5d2564 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -7703,7 +7703,7 @@ void CodeGeneratorX86::Finalize(CodeAllocator* allocator) {
constant_area_start_ = assembler->CodeSize();
// Populate any jump tables.
- for (auto jump_table : fixups_to_jump_tables_) {
+ for (JumpTableRIPFixup* jump_table : fixups_to_jump_tables_) {
jump_table->CreateJumpTable();
}
@@ -7842,17 +7842,19 @@ void CodeGeneratorX86::PatchJitRootUse(uint8_t* code,
void CodeGeneratorX86::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
for (const PatchInfo<Label>& info : jit_string_patches_) {
- const auto& it = jit_string_roots_.find(
+ const auto it = jit_string_roots_.find(
StringReference(&info.dex_file, dex::StringIndex(info.index)));
DCHECK(it != jit_string_roots_.end());
- PatchJitRootUse(code, roots_data, info, it->second);
+ uint64_t index_in_table = it->second;
+ PatchJitRootUse(code, roots_data, info, index_in_table);
}
for (const PatchInfo<Label>& info : jit_class_patches_) {
- const auto& it = jit_class_roots_.find(
+ const auto it = jit_class_roots_.find(
TypeReference(&info.dex_file, dex::TypeIndex(info.index)));
DCHECK(it != jit_class_roots_.end());
- PatchJitRootUse(code, roots_data, info, it->second);
+ uint64_t index_in_table = it->second;
+ PatchJitRootUse(code, roots_data, info, index_in_table);
}
}
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index f2ed52b5a5..6b0e001ad8 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -7055,7 +7055,7 @@ void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) {
constant_area_start_ = assembler->CodeSize();
// Populate any jump tables.
- for (auto jump_table : fixups_to_jump_tables_) {
+ for (JumpTableRIPFixup* jump_table : fixups_to_jump_tables_) {
jump_table->CreateJumpTable();
}
@@ -7149,17 +7149,19 @@ void CodeGeneratorX86_64::PatchJitRootUse(uint8_t* code,
void CodeGeneratorX86_64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
for (const PatchInfo<Label>& info : jit_string_patches_) {
- const auto& it = jit_string_roots_.find(
+ const auto it = jit_string_roots_.find(
StringReference(&info.dex_file, dex::StringIndex(info.index)));
DCHECK(it != jit_string_roots_.end());
- PatchJitRootUse(code, roots_data, info, it->second);
+ uint64_t index_in_table = it->second;
+ PatchJitRootUse(code, roots_data, info, index_in_table);
}
for (const PatchInfo<Label>& info : jit_class_patches_) {
- const auto& it = jit_class_roots_.find(
+ const auto it = jit_class_roots_.find(
TypeReference(&info.dex_file, dex::TypeIndex(info.index)));
DCHECK(it != jit_class_roots_.end());
- PatchJitRootUse(code, roots_data, info, it->second);
+ uint64_t index_in_table = it->second;
+ PatchJitRootUse(code, roots_data, info, index_in_table);
}
}
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index 7e3c377198..fe25b7690d 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -64,7 +64,7 @@ static ::std::vector<CodegenTargetConfig> GetTargetConfigs() {
#endif
};
- for (auto test_config : test_config_candidates) {
+ for (const CodegenTargetConfig& test_config : test_config_candidates) {
if (CanExecute(test_config.GetInstructionSet())) {
v.push_back(test_config);
}
@@ -76,7 +76,7 @@ static ::std::vector<CodegenTargetConfig> GetTargetConfigs() {
static void TestCode(const uint16_t* data,
bool has_result = false,
int32_t expected = 0) {
- for (CodegenTargetConfig target_config : GetTargetConfigs()) {
+ for (const CodegenTargetConfig& target_config : GetTargetConfigs()) {
ArenaPool pool;
ArenaAllocator arena(&pool);
HGraph* graph = CreateCFG(&arena, data);
@@ -89,7 +89,7 @@ static void TestCode(const uint16_t* data,
static void TestCodeLong(const uint16_t* data,
bool has_result,
int64_t expected) {
- for (CodegenTargetConfig target_config : GetTargetConfigs()) {
+ for (const CodegenTargetConfig& target_config : GetTargetConfigs()) {
ArenaPool pool;
ArenaAllocator arena(&pool);
HGraph* graph = CreateCFG(&arena, data, Primitive::kPrimLong);
diff --git a/compiler/optimizing/codegen_test_utils.h b/compiler/optimizing/codegen_test_utils.h
index 31cd204c9f..00a16fe849 100644
--- a/compiler/optimizing/codegen_test_utils.h
+++ b/compiler/optimizing/codegen_test_utils.h
@@ -243,7 +243,7 @@ static void ValidateGraph(HGraph* graph) {
GraphChecker graph_checker(graph);
graph_checker.Run();
if (!graph_checker.IsValid()) {
- for (const auto& error : graph_checker.GetErrors()) {
+ for (const std::string& error : graph_checker.GetErrors()) {
std::cout << error << std::endl;
}
}
diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc
index c93bc210be..8ea312d0ea 100644
--- a/compiler/optimizing/gvn.cc
+++ b/compiler/optimizing/gvn.cc
@@ -516,13 +516,13 @@ void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) {
bool GlobalValueNumberer::WillBeReferencedAgain(HBasicBlock* block) const {
DCHECK(visited_blocks_.IsBitSet(block->GetBlockId()));
- for (auto dominated_block : block->GetDominatedBlocks()) {
+ for (const HBasicBlock* dominated_block : block->GetDominatedBlocks()) {
if (!visited_blocks_.IsBitSet(dominated_block->GetBlockId())) {
return true;
}
}
- for (auto successor : block->GetSuccessors()) {
+ for (const HBasicBlock* successor : block->GetSuccessors()) {
if (!visited_blocks_.IsBitSet(successor->GetBlockId())) {
return true;
}
diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc
index f16e3727c8..311be1fb49 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.cc
+++ b/compiler/optimizing/instruction_simplifier_arm64.cc
@@ -216,5 +216,18 @@ void InstructionSimplifierArm64Visitor::VisitVecMul(HVecMul* instruction) {
}
}
+void InstructionSimplifierArm64Visitor::VisitVecLoad(HVecLoad* instruction) {
+ if (!instruction->IsStringCharAt()
+ && TryExtractVecArrayAccessAddress(instruction, instruction->GetIndex())) {
+ RecordSimplification();
+ }
+}
+
+void InstructionSimplifierArm64Visitor::VisitVecStore(HVecStore* instruction) {
+ if (TryExtractVecArrayAccessAddress(instruction, instruction->GetIndex())) {
+ RecordSimplification();
+ }
+}
+
} // namespace arm64
} // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h
index eec4e49792..8596f6ad40 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.h
+++ b/compiler/optimizing/instruction_simplifier_arm64.h
@@ -75,6 +75,8 @@ class InstructionSimplifierArm64Visitor : public HGraphVisitor {
void VisitUShr(HUShr* instruction) OVERRIDE;
void VisitXor(HXor* instruction) OVERRIDE;
void VisitVecMul(HVecMul* instruction) OVERRIDE;
+ void VisitVecLoad(HVecLoad* instruction) OVERRIDE;
+ void VisitVecStore(HVecStore* instruction) OVERRIDE;
OptimizingCompilerStats* stats_;
};
diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc
index c39e5f4d3b..e5a8499ff4 100644
--- a/compiler/optimizing/instruction_simplifier_shared.cc
+++ b/compiler/optimizing/instruction_simplifier_shared.cc
@@ -16,6 +16,8 @@
#include "instruction_simplifier_shared.h"
+#include "mirror/array-inl.h"
+
namespace art {
namespace {
@@ -346,4 +348,59 @@ bool TryCombineVecMultiplyAccumulate(HVecMul* mul, InstructionSet isa) {
return false;
}
+bool TryExtractVecArrayAccessAddress(HVecMemoryOperation* access, HInstruction* index) {
+ if (index->IsConstant()) {
+ // If index is constant the whole address calculation often can be done by LDR/STR themselves.
+ // TODO: Treat the case with not-embedable constant.
+ return false;
+ }
+
+ HGraph* graph = access->GetBlock()->GetGraph();
+ ArenaAllocator* arena = graph->GetArena();
+ Primitive::Type packed_type = access->GetPackedType();
+ uint32_t data_offset = mirror::Array::DataOffset(
+ Primitive::ComponentSize(packed_type)).Uint32Value();
+ size_t component_shift = Primitive::ComponentSizeShift(packed_type);
+
+ bool is_extracting_beneficial = false;
+ // It is beneficial to extract index intermediate address only if there are at least 2 users.
+ for (const HUseListNode<HInstruction*>& use : index->GetUses()) {
+ HInstruction* user = use.GetUser();
+ if (user->IsVecMemoryOperation() && user != access) {
+ HVecMemoryOperation* another_access = user->AsVecMemoryOperation();
+ Primitive::Type another_packed_type = another_access->GetPackedType();
+ uint32_t another_data_offset = mirror::Array::DataOffset(
+ Primitive::ComponentSize(another_packed_type)).Uint32Value();
+ size_t another_component_shift = Primitive::ComponentSizeShift(another_packed_type);
+ if (another_data_offset == data_offset && another_component_shift == component_shift) {
+ is_extracting_beneficial = true;
+ break;
+ }
+ } else if (user->IsIntermediateAddressIndex()) {
+ HIntermediateAddressIndex* another_access = user->AsIntermediateAddressIndex();
+ uint32_t another_data_offset = another_access->GetOffset()->AsIntConstant()->GetValue();
+ size_t another_component_shift = another_access->GetShift()->AsIntConstant()->GetValue();
+ if (another_data_offset == data_offset && another_component_shift == component_shift) {
+ is_extracting_beneficial = true;
+ break;
+ }
+ }
+ }
+
+ if (!is_extracting_beneficial) {
+ return false;
+ }
+
+ // Proceed to extract the index + data_offset address computation.
+ HIntConstant* offset = graph->GetIntConstant(data_offset);
+ HIntConstant* shift = graph->GetIntConstant(component_shift);
+ HIntermediateAddressIndex* address =
+ new (arena) HIntermediateAddressIndex(index, offset, shift, kNoDexPc);
+
+ access->GetBlock()->InsertInstructionBefore(address, access);
+ access->ReplaceInput(address, 1);
+
+ return true;
+}
+
} // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_shared.h b/compiler/optimizing/instruction_simplifier_shared.h
index 2ea103a518..371619fa2e 100644
--- a/compiler/optimizing/instruction_simplifier_shared.h
+++ b/compiler/optimizing/instruction_simplifier_shared.h
@@ -59,6 +59,7 @@ bool TryExtractArrayAccessAddress(HInstruction* access,
size_t data_offset);
bool TryCombineVecMultiplyAccumulate(HVecMul* mul, InstructionSet isa);
+bool TryExtractVecArrayAccessAddress(HVecMemoryOperation* access, HInstruction* index);
} // namespace art
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index abf5b122c8..eb28742672 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -2555,101 +2555,110 @@ void IntrinsicCodeGeneratorMIPS::VisitMathRoundFloat(HInvoke* invoke) {
Register out = locations->Out().AsRegister<Register>();
MipsLabel done;
- MipsLabel finite;
- MipsLabel add;
- // if (in.isNaN) {
- // return 0;
- // }
- //
- // out = floor.w.s(in);
- //
- // /*
- // * This "if" statement is only needed for the pre-R6 version of floor.w.s
- // * which outputs Integer.MAX_VALUE for negative numbers with magnitudes
- // * too large to fit in a 32-bit integer.
- // *
- // * Starting with MIPSR6, which always sets FCSR.NAN2008=1, negative
- // * numbers which are too large to be represented in a 32-bit signed
- // * integer will be processed by floor.w.s to output Integer.MIN_VALUE,
- // * and will no longer be processed by this "if" statement.
- // */
- // if (out == Integer.MAX_VALUE) {
- // TMP = (in < 0.0f) ? 1 : 0;
- // /*
- // * If TMP is 1, then adding it to out will wrap its value from
- // * Integer.MAX_VALUE to Integer.MIN_VALUE.
- // */
- // return out += TMP;
- // }
- //
- // /*
- // * For negative values not handled by the previous "if" statement the
- // * test here will correctly set the value of TMP.
- // */
- // TMP = ((in - out) >= 0.5f) ? 1 : 0;
- // return out += TMP;
-
- // Test for NaN.
if (IsR6()) {
- __ CmpUnS(FTMP, in, in);
+ // out = floor(in);
+ //
+ // if (out != MAX_VALUE && out != MIN_VALUE) {
+ // TMP = ((in - out) >= 0.5) ? 1 : 0;
+ // return out += TMP;
+ // }
+ // return out;
+
+ // out = floor(in);
+ __ FloorWS(FTMP, in);
+ __ Mfc1(out, FTMP);
+
+ // if (out != MAX_VALUE && out != MIN_VALUE)
+ __ Addiu(TMP, out, 1);
+ __ Aui(TMP, TMP, 0x8000); // TMP = out + 0x8000 0001
+ // or out - 0x7FFF FFFF.
+ // IOW, TMP = 1 if out = Int.MIN_VALUE
+ // or TMP = 0 if out = Int.MAX_VALUE.
+ __ Srl(TMP, TMP, 1); // TMP = 0 if out = Int.MIN_VALUE
+ // or out = Int.MAX_VALUE.
+ __ Beqz(TMP, &done);
+
+ // TMP = (0.5f <= (in - out)) ? -1 : 0;
+ __ Cvtsw(FTMP, FTMP); // Convert output of floor.w.s back to "float".
+ __ LoadConst32(AT, bit_cast<int32_t, float>(0.5f));
+ __ SubS(FTMP, in, FTMP);
+ __ Mtc1(AT, half);
+
+ __ CmpLeS(FTMP, half, FTMP);
+ __ Mfc1(TMP, FTMP);
+
+ // Return out -= TMP.
+ __ Subu(out, out, TMP);
} else {
+ // if (in.isNaN) {
+ // return 0;
+ // }
+ //
+ // out = floor.w.s(in);
+ //
+ // /*
+ // * This "if" statement is only needed for the pre-R6 version of floor.w.s
+ // * which outputs Integer.MAX_VALUE for negative numbers with magnitudes
+ // * too large to fit in a 32-bit integer.
+ // */
+ // if (out == Integer.MAX_VALUE) {
+ // TMP = (in < 0.0f) ? 1 : 0;
+ // /*
+ // * If TMP is 1, then adding it to out will wrap its value from
+ // * Integer.MAX_VALUE to Integer.MIN_VALUE.
+ // */
+ // return out += TMP;
+ // }
+ //
+ // /*
+ // * For negative values not handled by the previous "if" statement the
+ // * test here will correctly set the value of TMP.
+ // */
+ // TMP = ((in - out) >= 0.5f) ? 1 : 0;
+ // return out += TMP;
+
+ MipsLabel finite;
+ MipsLabel add;
+
+ // Test for NaN.
__ CunS(in, in);
- }
- // Return zero for NaN.
- __ Move(out, ZERO);
- if (IsR6()) {
- __ Bc1nez(FTMP, &done);
- } else {
+ // Return zero for NaN.
+ __ Move(out, ZERO);
__ Bc1t(&done);
- }
- // out = floor(in);
- __ FloorWS(FTMP, in);
- __ Mfc1(out, FTMP);
+ // out = floor(in);
+ __ FloorWS(FTMP, in);
+ __ Mfc1(out, FTMP);
- if (!IsR6()) {
__ LoadConst32(TMP, -1);
- }
- // TMP = (out = java.lang.Integer.MAX_VALUE) ? -1 : 0;
- __ LoadConst32(AT, std::numeric_limits<int32_t>::max());
- __ Bne(AT, out, &finite);
+ // TMP = (out = java.lang.Integer.MAX_VALUE) ? -1 : 0;
+ __ LoadConst32(AT, std::numeric_limits<int32_t>::max());
+ __ Bne(AT, out, &finite);
- __ Mtc1(ZERO, FTMP);
- if (IsR6()) {
- __ CmpLtS(FTMP, in, FTMP);
- __ Mfc1(TMP, FTMP);
- } else {
+ __ Mtc1(ZERO, FTMP);
__ ColtS(in, FTMP);
- }
- __ B(&add);
+ __ B(&add);
- __ Bind(&finite);
+ __ Bind(&finite);
- // TMP = (0.5f <= (in - out)) ? -1 : 0;
- __ Cvtsw(FTMP, FTMP); // Convert output of floor.w.s back to "float".
- __ LoadConst32(AT, bit_cast<int32_t, float>(0.5f));
- __ SubS(FTMP, in, FTMP);
- __ Mtc1(AT, half);
- if (IsR6()) {
- __ CmpLeS(FTMP, half, FTMP);
- __ Mfc1(TMP, FTMP);
- } else {
+ // TMP = (0.5f <= (in - out)) ? -1 : 0;
+ __ Cvtsw(FTMP, FTMP); // Convert output of floor.w.s back to "float".
+ __ LoadConst32(AT, bit_cast<int32_t, float>(0.5f));
+ __ SubS(FTMP, in, FTMP);
+ __ Mtc1(AT, half);
__ ColeS(half, FTMP);
- }
- __ Bind(&add);
+ __ Bind(&add);
- if (!IsR6()) {
__ Movf(TMP, ZERO);
- }
-
- // Return out -= TMP.
- __ Subu(out, out, TMP);
+ // Return out -= TMP.
+ __ Subu(out, out, TMP);
+ }
__ Bind(&done);
}
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 9dce59b2af..a476b2bc25 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -890,54 +890,14 @@ static void GenRound(LocationSummary* locations, Mips64Assembler* assembler, Pri
DCHECK(type == Primitive::kPrimFloat || type == Primitive::kPrimDouble);
Mips64Label done;
- Mips64Label finite;
- Mips64Label add;
- // if (in.isNaN) {
- // return 0;
- // }
- //
// out = floor(in);
//
- // /*
- // * TODO: Amend this code when emulator FCSR.NAN2008=1 bug is fixed.
- // *
- // * Starting with MIPSR6, which always sets FCSR.NAN2008=1, negative
- // * numbers which are too large to be represented in a 32-/64-bit
- // * signed integer will be processed by floor.X.Y to output
- // * Integer.MIN_VALUE/Long.MIN_VALUE, and will no longer be
- // * processed by this "if" statement.
- // *
- // * However, this bug in the 64-bit MIPS emulator causes the
- // * behavior of floor.X.Y to be the same as pre-R6 implementations
- // * of MIPS64. When that bug is fixed this logic should be amended.
- // */
- // if (out == MAX_VALUE) {
- // TMP = (in < 0.0) ? 1 : 0;
- // /*
- // * If TMP is 1, then adding it to out will wrap its value from
- // * MAX_VALUE to MIN_VALUE.
- // */
+ // if (out != MAX_VALUE && out != MIN_VALUE) {
+ // TMP = ((in - out) >= 0.5) ? 1 : 0;
// return out += TMP;
// }
- //
- // /*
- // * For negative values not handled by the previous "if" statement the
- // * test here will correctly set the value of TMP.
- // */
- // TMP = ((in - out) >= 0.5) ? 1 : 0;
- // return out += TMP;
-
- // Test for NaN.
- if (type == Primitive::kPrimDouble) {
- __ CmpUnD(FTMP, in, in);
- } else {
- __ CmpUnS(FTMP, in, in);
- }
-
- // Return zero for NaN.
- __ Move(out, ZERO);
- __ Bc1nez(FTMP, &done);
+ // return out;
// out = floor(in);
if (type == Primitive::kPrimDouble) {
@@ -948,28 +908,27 @@ static void GenRound(LocationSummary* locations, Mips64Assembler* assembler, Pri
__ Mfc1(out, FTMP);
}
- // TMP = (out = java.lang.Integer.MAX_VALUE) ? 1 : 0;
- if (type == Primitive::kPrimDouble) {
- __ LoadConst64(AT, std::numeric_limits<int64_t>::max());
- } else {
- __ LoadConst32(AT, std::numeric_limits<int32_t>::max());
- }
- __ Bnec(AT, out, &finite);
-
+ // if (out != MAX_VALUE && out != MIN_VALUE)
if (type == Primitive::kPrimDouble) {
- __ Dmtc1(ZERO, FTMP);
- __ CmpLtD(FTMP, in, FTMP);
- __ Dmfc1(AT, FTMP);
+ __ Daddiu(TMP, out, 1);
+ __ Dati(TMP, 0x8000); // TMP = out + 0x8000 0000 0000 0001
+ // or out - 0x7FFF FFFF FFFF FFFF.
+ // IOW, TMP = 1 if out = Long.MIN_VALUE
+ // or TMP = 0 if out = Long.MAX_VALUE.
+ __ Dsrl(TMP, TMP, 1); // TMP = 0 if out = Long.MIN_VALUE
+ // or out = Long.MAX_VALUE.
+ __ Beqzc(TMP, &done);
} else {
- __ Mtc1(ZERO, FTMP);
- __ CmpLtS(FTMP, in, FTMP);
- __ Mfc1(AT, FTMP);
+ __ Addiu(TMP, out, 1);
+ __ Aui(TMP, TMP, 0x8000); // TMP = out + 0x8000 0001
+ // or out - 0x7FFF FFFF.
+ // IOW, TMP = 1 if out = Int.MIN_VALUE
+ // or TMP = 0 if out = Int.MAX_VALUE.
+ __ Srl(TMP, TMP, 1); // TMP = 0 if out = Int.MIN_VALUE
+ // or out = Int.MAX_VALUE.
+ __ Beqzc(TMP, &done);
}
- __ Bc(&add);
-
- __ Bind(&finite);
-
// TMP = (0.5 <= (in - out)) ? -1 : 0;
if (type == Primitive::kPrimDouble) {
__ Cvtdl(FTMP, FTMP); // Convert output of floor.l.d back to "double".
@@ -977,23 +936,21 @@ static void GenRound(LocationSummary* locations, Mips64Assembler* assembler, Pri
__ SubD(FTMP, in, FTMP);
__ Dmtc1(AT, half);
__ CmpLeD(FTMP, half, FTMP);
- __ Dmfc1(AT, FTMP);
+ __ Dmfc1(TMP, FTMP);
} else {
__ Cvtsw(FTMP, FTMP); // Convert output of floor.w.s back to "float".
__ LoadConst32(AT, bit_cast<int32_t, float>(0.5f));
__ SubS(FTMP, in, FTMP);
__ Mtc1(AT, half);
__ CmpLeS(FTMP, half, FTMP);
- __ Mfc1(AT, FTMP);
+ __ Mfc1(TMP, FTMP);
}
- __ Bind(&add);
-
// Return out -= TMP.
if (type == Primitive::kPrimDouble) {
- __ Dsubu(out, out, AT);
+ __ Dsubu(out, out, TMP);
} else {
- __ Subu(out, out, AT);
+ __ Subu(out, out, TMP);
}
__ Bind(&done);
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 8ed2ad86bf..af0b193b03 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -759,7 +759,7 @@ static void CreateFPToFPCallLocations(ArenaAllocator* arena,
// We have to ensure that the native code doesn't clobber the XMM registers which are
// non-volatile for ART, but volatile for Native calls. This will ensure that they are
// saved in the prologue and properly restored.
- for (auto fp_reg : non_volatile_xmm_regs) {
+ for (FloatRegister fp_reg : non_volatile_xmm_regs) {
locations->AddTemp(Location::FpuRegisterLocation(fp_reg));
}
}
@@ -898,7 +898,7 @@ static void CreateFPFPToFPCallLocations(ArenaAllocator* arena,
// We have to ensure that the native code doesn't clobber the XMM registers which are
// non-volatile for ART, but volatile for Native calls. This will ensure that they are
// saved in the prologue and properly restored.
- for (auto fp_reg : non_volatile_xmm_regs) {
+ for (FloatRegister fp_reg : non_volatile_xmm_regs) {
locations->AddTemp(Location::FpuRegisterLocation(fp_reg));
}
}
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index b4da20b558..522962485b 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1406,7 +1406,8 @@ class HLoopInformationOutwardIterator : public ValueObject {
M(BitwiseNegatedRight, Instruction) \
M(DataProcWithShifterOp, Instruction) \
M(MultiplyAccumulate, Instruction) \
- M(IntermediateAddress, Instruction)
+ M(IntermediateAddress, Instruction) \
+ M(IntermediateAddressIndex, Instruction)
#endif
#ifndef ART_ENABLE_CODEGEN_arm
diff --git a/compiler/optimizing/nodes_shared.h b/compiler/optimizing/nodes_shared.h
index c6bfbcc7fb..075a816f3f 100644
--- a/compiler/optimizing/nodes_shared.h
+++ b/compiler/optimizing/nodes_shared.h
@@ -150,6 +150,49 @@ class HIntermediateAddress FINAL : public HExpression<2> {
DISALLOW_COPY_AND_ASSIGN(HIntermediateAddress);
};
+// This instruction computes part of the array access offset (data and index offset).
+//
+// For array accesses the element address has the following structure:
+// Address = CONST_OFFSET + base_addr + index << ELEM_SHIFT. Taking into account LDR/STR addressing
+// modes address part (CONST_OFFSET + index << ELEM_SHIFT) can be shared across array access with
+// the same data type and index. For example, for the following loop 5 accesses can share address
+// computation:
+//
+// void foo(int[] a, int[] b, int[] c) {
+// for (i...) {
+// a[i] = a[i] + 5;
+// b[i] = b[i] + c[i];
+// }
+// }
+//
+// Note: as the instruction doesn't involve base array address into computations it has no side
+// effects (in comparison of HIntermediateAddress).
+class HIntermediateAddressIndex FINAL : public HExpression<3> {
+ public:
+ HIntermediateAddressIndex(
+ HInstruction* index, HInstruction* offset, HInstruction* shift, uint32_t dex_pc)
+ : HExpression(Primitive::kPrimInt, SideEffects::None(), dex_pc) {
+ SetRawInputAt(0, index);
+ SetRawInputAt(1, offset);
+ SetRawInputAt(2, shift);
+ }
+
+ bool CanBeMoved() const OVERRIDE { return true; }
+ bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+ return true;
+ }
+ bool IsActualObject() const OVERRIDE { return false; }
+
+ HInstruction* GetIndex() const { return InputAt(0); }
+ HInstruction* GetOffset() const { return InputAt(1); }
+ HInstruction* GetShift() const { return InputAt(2); }
+
+ DECLARE_INSTRUCTION(IntermediateAddressIndex);
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HIntermediateAddressIndex);
+};
+
class HDataProcWithShifterOp FINAL : public HExpression<2> {
public:
enum OpKind {
diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h
index 52c247b52f..92fe9bfa7d 100644
--- a/compiler/optimizing/nodes_vector.h
+++ b/compiler/optimizing/nodes_vector.h
@@ -178,12 +178,17 @@ class HVecMemoryOperation : public HVecOperation {
size_t vector_length,
uint32_t dex_pc)
: HVecOperation(arena, packed_type, side_effects, number_of_inputs, vector_length, dex_pc),
- alignment_(Primitive::ComponentSize(packed_type), 0) { }
+ alignment_(Primitive::ComponentSize(packed_type), 0) {
+ DCHECK_GE(number_of_inputs, 2u);
+ }
void SetAlignment(Alignment alignment) { alignment_ = alignment; }
Alignment GetAlignment() const { return alignment_; }
+ HInstruction* GetArray() const { return InputAt(0); }
+ HInstruction* GetIndex() const { return InputAt(1); }
+
DECLARE_ABSTRACT_INSTRUCTION(VecMemoryOperation);
private:
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 065c11eddb..f928f71209 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -638,11 +638,14 @@ void OptimizingCompiler::RunArchOptimizations(InstructionSet instruction_set,
new (arena) arm::InstructionSimplifierArm(graph, stats);
SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph);
GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects, "GVN$after_arch");
+ HInstructionScheduling* scheduling =
+ new (arena) HInstructionScheduling(graph, instruction_set, codegen);
HOptimization* arm_optimizations[] = {
simplifier,
side_effects,
gvn,
- fixups
+ fixups,
+ scheduling,
};
RunOptimizations(arm_optimizations, arraysize(arm_optimizations), pass_observer);
break;
diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc
index 87f709f63d..300f4c6239 100644
--- a/compiler/optimizing/register_allocator_graph_color.cc
+++ b/compiler/optimizing/register_allocator_graph_color.cc
@@ -1968,8 +1968,7 @@ void RegisterAllocatorGraphColor::ColorSpillSlots(ArenaVector<LiveInterval*>* in
ArenaVector<std::tuple<size_t, bool, LiveInterval*>> interval_endpoints(
allocator_->Adapter(kArenaAllocRegisterAllocator));
- for (auto it = intervals->begin(), e = intervals->end(); it != e; ++it) {
- LiveInterval* parent_interval = *it;
+ for (LiveInterval* parent_interval : *intervals) {
DCHECK(parent_interval->IsParent());
DCHECK(!parent_interval->HasSpillSlot());
size_t start = parent_interval->GetStart();
diff --git a/compiler/optimizing/scheduler.cc b/compiler/optimizing/scheduler.cc
index d65d20cf43..320f01a727 100644
--- a/compiler/optimizing/scheduler.cc
+++ b/compiler/optimizing/scheduler.cc
@@ -23,6 +23,10 @@
#include "scheduler_arm64.h"
#endif
+#ifdef ART_ENABLE_CODEGEN_arm
+#include "scheduler_arm.h"
+#endif
+
namespace art {
void SchedulingGraph::AddDependency(SchedulingNode* node,
@@ -264,10 +268,11 @@ void SchedulingGraph::DumpAsDotGraph(const std::string& description,
// Start the dot graph. Use an increasing index for easier differentiation.
output << "digraph G {\n";
for (const auto& entry : nodes_map_) {
- DumpAsDotNode(output, entry.second);
+ SchedulingNode* node = entry.second;
+ DumpAsDotNode(output, node);
}
// Create a fake 'end_of_scheduling' node to help visualization of critical_paths.
- for (auto node : initial_candidates) {
+ for (SchedulingNode* node : initial_candidates) {
const HInstruction* instruction = node->GetInstruction();
output << InstructionTypeId(instruction) << ":s -> end_of_scheduling:n "
<< "[label=\"" << node->GetLatency() << "\",dir=back]\n";
@@ -580,28 +585,39 @@ bool HScheduler::IsSchedulingBarrier(const HInstruction* instr) const {
void HInstructionScheduling::Run(bool only_optimize_loop_blocks,
bool schedule_randomly) {
+#if defined(ART_ENABLE_CODEGEN_arm64) || defined(ART_ENABLE_CODEGEN_arm)
+ // Phase-local allocator that allocates scheduler internal data structures like
+ // scheduling nodes, internel nodes map, dependencies, etc.
+ ArenaAllocator arena_allocator(graph_->GetArena()->GetArenaPool());
+ CriticalPathSchedulingNodeSelector critical_path_selector;
+ RandomSchedulingNodeSelector random_selector;
+ SchedulingNodeSelector* selector = schedule_randomly
+ ? static_cast<SchedulingNodeSelector*>(&random_selector)
+ : static_cast<SchedulingNodeSelector*>(&critical_path_selector);
+#else
// Avoid compilation error when compiling for unsupported instruction set.
UNUSED(only_optimize_loop_blocks);
UNUSED(schedule_randomly);
+#endif
switch (instruction_set_) {
#ifdef ART_ENABLE_CODEGEN_arm64
case kArm64: {
- // Phase-local allocator that allocates scheduler internal data structures like
- // scheduling nodes, internel nodes map, dependencies, etc.
- ArenaAllocator arena_allocator(graph_->GetArena()->GetArenaPool());
-
- CriticalPathSchedulingNodeSelector critical_path_selector;
- RandomSchedulingNodeSelector random_selector;
- SchedulingNodeSelector* selector = schedule_randomly
- ? static_cast<SchedulingNodeSelector*>(&random_selector)
- : static_cast<SchedulingNodeSelector*>(&critical_path_selector);
-
arm64::HSchedulerARM64 scheduler(&arena_allocator, selector);
scheduler.SetOnlyOptimizeLoopBlocks(only_optimize_loop_blocks);
scheduler.Schedule(graph_);
break;
}
#endif
+#if defined(ART_ENABLE_CODEGEN_arm)
+ case kThumb2:
+ case kArm: {
+ arm::SchedulingLatencyVisitorARM arm_latency_visitor(codegen_);
+ arm::HSchedulerARM scheduler(&arena_allocator, selector, &arm_latency_visitor);
+ scheduler.SetOnlyOptimizeLoopBlocks(only_optimize_loop_blocks);
+ scheduler.Schedule(graph_);
+ break;
+ }
+#endif
default:
break;
}
diff --git a/compiler/optimizing/scheduler.h b/compiler/optimizing/scheduler.h
index 9236a0e4fa..73e8087cd0 100644
--- a/compiler/optimizing/scheduler.h
+++ b/compiler/optimizing/scheduler.h
@@ -23,6 +23,7 @@
#include "driver/compiler_driver.h"
#include "nodes.h"
#include "optimization.h"
+#include "code_generator.h"
namespace art {
@@ -469,8 +470,9 @@ inline bool SchedulingGraph::IsSchedulingBarrier(const HInstruction* instruction
class HInstructionScheduling : public HOptimization {
public:
- HInstructionScheduling(HGraph* graph, InstructionSet instruction_set)
+ HInstructionScheduling(HGraph* graph, InstructionSet instruction_set, CodeGenerator* cg = nullptr)
: HOptimization(graph, kInstructionScheduling),
+ codegen_(cg),
instruction_set_(instruction_set) {}
void Run() {
@@ -480,6 +482,7 @@ class HInstructionScheduling : public HOptimization {
static constexpr const char* kInstructionScheduling = "scheduler";
+ CodeGenerator* const codegen_;
const InstructionSet instruction_set_;
private:
diff --git a/compiler/optimizing/scheduler_arm.cc b/compiler/optimizing/scheduler_arm.cc
new file mode 100644
index 0000000000..1a89567991
--- /dev/null
+++ b/compiler/optimizing/scheduler_arm.cc
@@ -0,0 +1,822 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arch/arm/instruction_set_features_arm.h"
+#include "code_generator_utils.h"
+#include "common_arm.h"
+#include "mirror/array-inl.h"
+#include "scheduler_arm.h"
+
+namespace art {
+namespace arm {
+
+using helpers::Int32ConstantFrom;
+using helpers::Uint64ConstantFrom;
+
+void SchedulingLatencyVisitorARM::HandleBinaryOperationLantencies(HBinaryOperation* instr) {
+ switch (instr->GetResultType()) {
+ case Primitive::kPrimLong:
+ // HAdd and HSub long operations translate to ADDS+ADC or SUBS+SBC pairs,
+ // so a bubble (kArmNopLatency) is added to represent the internal carry flag
+ // dependency inside these pairs.
+ last_visited_internal_latency_ = kArmIntegerOpLatency + kArmNopLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ last_visited_latency_ = kArmFloatingPointOpLatency;
+ break;
+ default:
+ last_visited_latency_ = kArmIntegerOpLatency;
+ break;
+ }
+}
+
+void SchedulingLatencyVisitorARM::VisitAdd(HAdd* instr) {
+ HandleBinaryOperationLantencies(instr);
+}
+
+void SchedulingLatencyVisitorARM::VisitSub(HSub* instr) {
+ HandleBinaryOperationLantencies(instr);
+}
+
+void SchedulingLatencyVisitorARM::VisitMul(HMul* instr) {
+ switch (instr->GetResultType()) {
+ case Primitive::kPrimLong:
+ last_visited_internal_latency_ = 3 * kArmMulIntegerLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ last_visited_latency_ = kArmMulFloatingPointLatency;
+ break;
+ default:
+ last_visited_latency_ = kArmMulIntegerLatency;
+ break;
+ }
+}
+
+void SchedulingLatencyVisitorARM::HandleBitwiseOperationLantencies(HBinaryOperation* instr) {
+ switch (instr->GetResultType()) {
+ case Primitive::kPrimLong:
+ last_visited_internal_latency_ = kArmIntegerOpLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ last_visited_latency_ = kArmFloatingPointOpLatency;
+ break;
+ default:
+ last_visited_latency_ = kArmIntegerOpLatency;
+ break;
+ }
+}
+
+void SchedulingLatencyVisitorARM::VisitAnd(HAnd* instr) {
+ HandleBitwiseOperationLantencies(instr);
+}
+
+void SchedulingLatencyVisitorARM::VisitOr(HOr* instr) {
+ HandleBitwiseOperationLantencies(instr);
+}
+
+void SchedulingLatencyVisitorARM::VisitXor(HXor* instr) {
+ HandleBitwiseOperationLantencies(instr);
+}
+
+void SchedulingLatencyVisitorARM::VisitRor(HRor* instr) {
+ switch (instr->GetResultType()) {
+ case Primitive::kPrimInt:
+ last_visited_latency_ = kArmIntegerOpLatency;
+ break;
+ case Primitive::kPrimLong: {
+ // HandleLongRotate
+ HInstruction* rhs = instr->GetRight();
+ if (rhs->IsConstant()) {
+ uint64_t rot = Uint64ConstantFrom(rhs->AsConstant()) & kMaxLongShiftDistance;
+ if (rot != 0u) {
+ last_visited_internal_latency_ = 3 * kArmIntegerOpLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+ } else {
+ last_visited_internal_latency_ = kArmIntegerOpLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+ }
+ } else {
+ last_visited_internal_latency_ = 9 * kArmIntegerOpLatency + kArmBranchLatency;
+ last_visited_latency_ = kArmBranchLatency;
+ }
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected operation type " << instr->GetResultType();
+ UNREACHABLE();
+ }
+}
+
+void SchedulingLatencyVisitorARM::HandleShiftLatencies(HBinaryOperation* instr) {
+ Primitive::Type type = instr->GetResultType();
+ HInstruction* rhs = instr->GetRight();
+ switch (type) {
+ case Primitive::kPrimInt:
+ if (!rhs->IsConstant()) {
+ last_visited_internal_latency_ = kArmIntegerOpLatency;
+ }
+ last_visited_latency_ = kArmIntegerOpLatency;
+ break;
+ case Primitive::kPrimLong:
+ if (!rhs->IsConstant()) {
+ last_visited_internal_latency_ = 8 * kArmIntegerOpLatency;
+ } else {
+ uint32_t shift_value = Int32ConstantFrom(rhs->AsConstant()) & kMaxLongShiftDistance;
+ if (shift_value == 1 || shift_value >= 32) {
+ last_visited_internal_latency_ = kArmIntegerOpLatency;
+ } else {
+ last_visited_internal_latency_ = 2 * kArmIntegerOpLatency;
+ }
+ }
+ last_visited_latency_ = kArmIntegerOpLatency;
+ break;
+ default:
+ LOG(FATAL) << "Unexpected operation type " << type;
+ UNREACHABLE();
+ }
+}
+
+void SchedulingLatencyVisitorARM::VisitShl(HShl* instr) {
+ HandleShiftLatencies(instr);
+}
+
+void SchedulingLatencyVisitorARM::VisitShr(HShr* instr) {
+ HandleShiftLatencies(instr);
+}
+
+void SchedulingLatencyVisitorARM::VisitUShr(HUShr* instr) {
+ HandleShiftLatencies(instr);
+}
+
+void SchedulingLatencyVisitorARM::VisitCondition(HCondition* instr) {
+ switch (instr->GetLeft()->GetType()) {
+ case Primitive::kPrimLong:
+ last_visited_internal_latency_ = 4 * kArmIntegerOpLatency;
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ last_visited_internal_latency_ = 2 * kArmFloatingPointOpLatency;
+ break;
+ default:
+ last_visited_internal_latency_ = 2 * kArmIntegerOpLatency;
+ break;
+ }
+ last_visited_latency_ = kArmIntegerOpLatency;
+}
+
+void SchedulingLatencyVisitorARM::VisitCompare(HCompare* instr) {
+ Primitive::Type type = instr->InputAt(0)->GetType();
+ switch (type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimInt:
+ last_visited_internal_latency_ = 2 * kArmIntegerOpLatency;
+ break;
+ case Primitive::kPrimLong:
+ last_visited_internal_latency_ = 2 * kArmIntegerOpLatency + 3 * kArmBranchLatency;
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ last_visited_internal_latency_ = kArmIntegerOpLatency + 2 * kArmFloatingPointOpLatency;
+ break;
+ default:
+ last_visited_internal_latency_ = 2 * kArmIntegerOpLatency;
+ break;
+ }
+ last_visited_latency_ = kArmIntegerOpLatency;
+}
+
+void SchedulingLatencyVisitorARM::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) {
+ if (instruction->GetResultType() == Primitive::kPrimInt) {
+ last_visited_latency_ = kArmIntegerOpLatency;
+ } else {
+ last_visited_internal_latency_ = kArmIntegerOpLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+ }
+}
+
+void SchedulingLatencyVisitorARM::HandleGenerateDataProcInstruction(bool internal_latency) {
+ if (internal_latency) {
+ last_visited_internal_latency_ += kArmIntegerOpLatency;
+ } else {
+ last_visited_latency_ = kArmDataProcWithShifterOpLatency;
+ }
+}
+
+void SchedulingLatencyVisitorARM::HandleGenerateDataProc(HDataProcWithShifterOp* instruction) {
+ const HInstruction::InstructionKind kind = instruction->GetInstrKind();
+ if (kind == HInstruction::kAdd) {
+ last_visited_internal_latency_ = kArmIntegerOpLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+ } else if (kind == HInstruction::kSub) {
+ last_visited_internal_latency_ = kArmIntegerOpLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+ } else {
+ HandleGenerateDataProcInstruction(/* internal_latency */ true);
+ HandleGenerateDataProcInstruction();
+ }
+}
+
+void SchedulingLatencyVisitorARM::HandleGenerateLongDataProc(HDataProcWithShifterOp* instruction) {
+ DCHECK_EQ(instruction->GetType(), Primitive::kPrimLong);
+ DCHECK(HDataProcWithShifterOp::IsShiftOp(instruction->GetOpKind()));
+
+ const uint32_t shift_value = instruction->GetShiftAmount();
+ const HInstruction::InstructionKind kind = instruction->GetInstrKind();
+
+ if (shift_value >= 32) {
+ // Different shift types actually generate similar code here,
+ // no need to differentiate shift types like the codegen pass does,
+ // which also avoids handling shift types from different ARM backends.
+ HandleGenerateDataProc(instruction);
+ } else {
+ DCHECK_GT(shift_value, 1U);
+ DCHECK_LT(shift_value, 32U);
+
+ if (kind == HInstruction::kOr || kind == HInstruction::kXor) {
+ HandleGenerateDataProcInstruction(/* internal_latency */ true);
+ HandleGenerateDataProcInstruction(/* internal_latency */ true);
+ HandleGenerateDataProcInstruction();
+ } else {
+ last_visited_internal_latency_ += 2 * kArmIntegerOpLatency;
+ HandleGenerateDataProc(instruction);
+ }
+ }
+}
+
+void SchedulingLatencyVisitorARM::VisitDataProcWithShifterOp(HDataProcWithShifterOp* instruction) {
+ const HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind();
+
+ if (instruction->GetType() == Primitive::kPrimInt) {
+ DCHECK(!HDataProcWithShifterOp::IsExtensionOp(op_kind));
+ HandleGenerateDataProcInstruction();
+ } else {
+ DCHECK_EQ(instruction->GetType(), Primitive::kPrimLong);
+ if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) {
+ HandleGenerateDataProc(instruction);
+ } else {
+ HandleGenerateLongDataProc(instruction);
+ }
+ }
+}
+
+void SchedulingLatencyVisitorARM::VisitIntermediateAddress(HIntermediateAddress* ATTRIBUTE_UNUSED) {
+ // Although the code generated is a simple `add` instruction, we found through empirical results
+ // that spacing it from its use in memory accesses was beneficial.
+ last_visited_internal_latency_ = kArmNopLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+}
+
+void SchedulingLatencyVisitorARM::VisitMultiplyAccumulate(HMultiplyAccumulate* ATTRIBUTE_UNUSED) {
+ last_visited_latency_ = kArmMulIntegerLatency;
+}
+
+void SchedulingLatencyVisitorARM::VisitArrayGet(HArrayGet* instruction) {
+ Primitive::Type type = instruction->GetType();
+ const bool maybe_compressed_char_at =
+ mirror::kUseStringCompression && instruction->IsStringCharAt();
+ HInstruction* array_instr = instruction->GetArray();
+ bool has_intermediate_address = array_instr->IsIntermediateAddress();
+ HInstruction* index = instruction->InputAt(1);
+
+ switch (type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimInt: {
+ if (maybe_compressed_char_at) {
+ last_visited_internal_latency_ += kArmMemoryLoadLatency;
+ }
+ if (index->IsConstant()) {
+ if (maybe_compressed_char_at) {
+ last_visited_internal_latency_ +=
+ kArmIntegerOpLatency + kArmBranchLatency + kArmMemoryLoadLatency;
+ last_visited_latency_ = kArmBranchLatency;
+ } else {
+ last_visited_latency_ += kArmMemoryLoadLatency;
+ }
+ } else {
+ if (has_intermediate_address) {
+ } else {
+ last_visited_internal_latency_ += kArmIntegerOpLatency;
+ }
+ if (maybe_compressed_char_at) {
+ last_visited_internal_latency_ +=
+ kArmIntegerOpLatency + kArmBranchLatency + kArmMemoryLoadLatency;
+ last_visited_latency_ = kArmBranchLatency;
+ } else {
+ last_visited_latency_ += kArmMemoryLoadLatency;
+ }
+ }
+ break;
+ }
+
+ case Primitive::kPrimNot: {
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ last_visited_latency_ = kArmLoadWithBakerReadBarrierLatency;
+ } else {
+ if (index->IsConstant()) {
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ } else {
+ if (has_intermediate_address) {
+ } else {
+ last_visited_internal_latency_ += kArmIntegerOpLatency;
+ }
+ last_visited_internal_latency_ = kArmMemoryLoadLatency;
+ }
+ }
+ break;
+ }
+
+ case Primitive::kPrimLong: {
+ if (index->IsConstant()) {
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ } else {
+ last_visited_internal_latency_ += kArmIntegerOpLatency;
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ }
+ break;
+ }
+
+ case Primitive::kPrimFloat: {
+ if (index->IsConstant()) {
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ } else {
+ last_visited_internal_latency_ += kArmIntegerOpLatency;
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ }
+ break;
+ }
+
+ case Primitive::kPrimDouble: {
+ if (index->IsConstant()) {
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ } else {
+ last_visited_internal_latency_ += kArmIntegerOpLatency;
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ }
+ break;
+ }
+
+ default:
+ LOG(FATAL) << "Unreachable type " << type;
+ UNREACHABLE();
+ }
+}
+
+void SchedulingLatencyVisitorARM::VisitArrayLength(HArrayLength* instruction) {
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ if (mirror::kUseStringCompression && instruction->IsStringLength()) {
+ last_visited_internal_latency_ = kArmMemoryLoadLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+ }
+}
+
+void SchedulingLatencyVisitorARM::VisitArraySet(HArraySet* instruction) {
+ HInstruction* index = instruction->InputAt(1);
+ Primitive::Type value_type = instruction->GetComponentType();
+ HInstruction* array_instr = instruction->GetArray();
+ bool has_intermediate_address = array_instr->IsIntermediateAddress();
+
+ switch (value_type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimInt: {
+ if (index->IsConstant()) {
+ last_visited_latency_ = kArmMemoryStoreLatency;
+ } else {
+ if (has_intermediate_address) {
+ } else {
+ last_visited_internal_latency_ = kArmIntegerOpLatency;
+ }
+ last_visited_latency_ = kArmMemoryStoreLatency;
+ }
+ break;
+ }
+
+ case Primitive::kPrimNot: {
+ if (instruction->InputAt(2)->IsNullConstant()) {
+ if (index->IsConstant()) {
+ last_visited_latency_ = kArmMemoryStoreLatency;
+ } else {
+ last_visited_internal_latency_ = kArmIntegerOpLatency;
+ last_visited_latency_ = kArmMemoryStoreLatency;
+ }
+ } else {
+ // Following the exact instructions of runtime type checks is too complicated,
+ // just giving it a simple slow latency.
+ last_visited_latency_ = kArmRuntimeTypeCheckLatency;
+ }
+ break;
+ }
+
+ case Primitive::kPrimLong: {
+ if (index->IsConstant()) {
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ } else {
+ last_visited_internal_latency_ = kArmIntegerOpLatency;
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ }
+ break;
+ }
+
+ case Primitive::kPrimFloat: {
+ if (index->IsConstant()) {
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ } else {
+ last_visited_internal_latency_ = kArmIntegerOpLatency;
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ }
+ break;
+ }
+
+ case Primitive::kPrimDouble: {
+ if (index->IsConstant()) {
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ } else {
+ last_visited_internal_latency_ = kArmIntegerOpLatency;
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ }
+ break;
+ }
+
+ default:
+ LOG(FATAL) << "Unreachable type " << value_type;
+ UNREACHABLE();
+ }
+}
+
+void SchedulingLatencyVisitorARM::VisitBoundsCheck(HBoundsCheck* ATTRIBUTE_UNUSED) {
+ last_visited_internal_latency_ = kArmIntegerOpLatency;
+ // Users do not use any data results.
+ last_visited_latency_ = 0;
+}
+
+void SchedulingLatencyVisitorARM::HandleDivRemConstantIntegralLatencies(int32_t imm) {
+ if (imm == 0) {
+ last_visited_internal_latency_ = 0;
+ last_visited_latency_ = 0;
+ } else if (imm == 1 || imm == -1) {
+ last_visited_latency_ = kArmIntegerOpLatency;
+ } else if (IsPowerOfTwo(AbsOrMin(imm))) {
+ last_visited_internal_latency_ = 3 * kArmIntegerOpLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+ } else {
+ last_visited_internal_latency_ = kArmMulIntegerLatency + 2 * kArmIntegerOpLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+ }
+}
+
+void SchedulingLatencyVisitorARM::VisitDiv(HDiv* instruction) {
+ Primitive::Type type = instruction->GetResultType();
+ switch (type) {
+ case Primitive::kPrimInt: {
+ HInstruction* rhs = instruction->GetRight();
+ if (rhs->IsConstant()) {
+ int32_t imm = Int32ConstantFrom(rhs->AsConstant());
+ HandleDivRemConstantIntegralLatencies(imm);
+ } else {
+ last_visited_latency_ = kArmDivIntegerLatency;
+ }
+ break;
+ }
+ case Primitive::kPrimFloat:
+ last_visited_latency_ = kArmDivFloatLatency;
+ break;
+ case Primitive::kPrimDouble:
+ last_visited_latency_ = kArmDivDoubleLatency;
+ break;
+ default:
+ last_visited_internal_latency_ = kArmCallInternalLatency;
+ last_visited_latency_ = kArmCallLatency;
+ break;
+ }
+}
+
+void SchedulingLatencyVisitorARM::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
+ HandleFieldGetLatencies(instruction, instruction->GetFieldInfo());
+}
+
+void SchedulingLatencyVisitorARM::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
+ HandleFieldSetLatencies(instruction, instruction->GetFieldInfo());
+}
+
+void SchedulingLatencyVisitorARM::VisitInstanceOf(HInstanceOf* ATTRIBUTE_UNUSED) {
+ last_visited_internal_latency_ = kArmCallInternalLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+}
+
+void SchedulingLatencyVisitorARM::VisitInvoke(HInvoke* ATTRIBUTE_UNUSED) {
+ last_visited_internal_latency_ = kArmCallInternalLatency;
+ last_visited_latency_ = kArmCallLatency;
+}
+
+void SchedulingLatencyVisitorARM::VisitLoadString(HLoadString* ATTRIBUTE_UNUSED) {
+ last_visited_internal_latency_ = kArmLoadStringInternalLatency;
+ last_visited_latency_ = kArmMemoryLoadLatency;
+}
+
+void SchedulingLatencyVisitorARM::VisitNewArray(HNewArray* ATTRIBUTE_UNUSED) {
+ last_visited_internal_latency_ = kArmIntegerOpLatency + kArmCallInternalLatency;
+ last_visited_latency_ = kArmCallLatency;
+}
+
+void SchedulingLatencyVisitorARM::VisitNewInstance(HNewInstance* instruction) {
+ if (instruction->IsStringAlloc()) {
+ last_visited_internal_latency_ = 2 * kArmMemoryLoadLatency + kArmCallInternalLatency;
+ } else {
+ last_visited_internal_latency_ = kArmCallInternalLatency;
+ }
+ last_visited_latency_ = kArmCallLatency;
+}
+
+void SchedulingLatencyVisitorARM::VisitRem(HRem* instruction) {
+ Primitive::Type type = instruction->GetResultType();
+ switch (type) {
+ case Primitive::kPrimInt: {
+ HInstruction* rhs = instruction->GetRight();
+ if (rhs->IsConstant()) {
+ int32_t imm = Int32ConstantFrom(rhs->AsConstant());
+ HandleDivRemConstantIntegralLatencies(imm);
+ } else {
+ last_visited_internal_latency_ = kArmDivIntegerLatency;
+ last_visited_latency_ = kArmMulIntegerLatency;
+ }
+ break;
+ }
+ default:
+ last_visited_internal_latency_ = kArmCallInternalLatency;
+ last_visited_latency_ = kArmCallLatency;
+ break;
+ }
+}
+
+void SchedulingLatencyVisitorARM::HandleFieldGetLatencies(HInstruction* instruction,
+ const FieldInfo& field_info) {
+ DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
+ DCHECK(codegen_ != nullptr);
+ bool is_volatile = field_info.IsVolatile();
+ Primitive::Type field_type = field_info.GetFieldType();
+ bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
+
+ switch (field_type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimInt:
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ break;
+
+ case Primitive::kPrimNot:
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ last_visited_internal_latency_ = kArmMemoryLoadLatency + kArmIntegerOpLatency;
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ } else {
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ }
+ break;
+
+ case Primitive::kPrimLong:
+ if (is_volatile && !atomic_ldrd_strd) {
+ last_visited_internal_latency_ = kArmMemoryLoadLatency + kArmIntegerOpLatency;
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ } else {
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ }
+ break;
+
+ case Primitive::kPrimFloat:
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ break;
+
+ case Primitive::kPrimDouble:
+ if (is_volatile && !atomic_ldrd_strd) {
+ last_visited_internal_latency_ =
+ kArmMemoryLoadLatency + kArmIntegerOpLatency + kArmMemoryLoadLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+ } else {
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ }
+ break;
+
+ default:
+ last_visited_latency_ = kArmMemoryLoadLatency;
+ break;
+ }
+
+ if (is_volatile) {
+ last_visited_internal_latency_ += kArmMemoryBarrierLatency;
+ }
+}
+
+void SchedulingLatencyVisitorARM::HandleFieldSetLatencies(HInstruction* instruction,
+ const FieldInfo& field_info) {
+ DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
+ DCHECK(codegen_ != nullptr);
+ bool is_volatile = field_info.IsVolatile();
+ Primitive::Type field_type = field_info.GetFieldType();
+ bool needs_write_barrier =
+ CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
+ bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
+
+ switch (field_type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimChar:
+ if (is_volatile) {
+ last_visited_internal_latency_ = kArmMemoryBarrierLatency + kArmMemoryStoreLatency;
+ last_visited_latency_ = kArmMemoryBarrierLatency;
+ } else {
+ last_visited_latency_ = kArmMemoryStoreLatency;
+ }
+ break;
+
+ case Primitive::kPrimInt:
+ case Primitive::kPrimNot:
+ if (kPoisonHeapReferences && needs_write_barrier) {
+ last_visited_internal_latency_ += kArmIntegerOpLatency * 2;
+ }
+ last_visited_latency_ = kArmMemoryStoreLatency;
+ break;
+
+ case Primitive::kPrimLong:
+ if (is_volatile && !atomic_ldrd_strd) {
+ last_visited_internal_latency_ =
+ kArmIntegerOpLatency + kArmMemoryLoadLatency + kArmMemoryStoreLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+ } else {
+ last_visited_latency_ = kArmMemoryStoreLatency;
+ }
+ break;
+
+ case Primitive::kPrimFloat:
+ last_visited_latency_ = kArmMemoryStoreLatency;
+ break;
+
+ case Primitive::kPrimDouble:
+ if (is_volatile && !atomic_ldrd_strd) {
+ last_visited_internal_latency_ = kArmIntegerOpLatency +
+ kArmIntegerOpLatency + kArmMemoryLoadLatency + kArmMemoryStoreLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+ } else {
+ last_visited_latency_ = kArmMemoryStoreLatency;
+ }
+ break;
+
+ default:
+ last_visited_latency_ = kArmMemoryStoreLatency;
+ break;
+ }
+}
+
+void SchedulingLatencyVisitorARM::VisitStaticFieldGet(HStaticFieldGet* instruction) {
+ HandleFieldGetLatencies(instruction, instruction->GetFieldInfo());
+}
+
+void SchedulingLatencyVisitorARM::VisitStaticFieldSet(HStaticFieldSet* instruction) {
+ HandleFieldSetLatencies(instruction, instruction->GetFieldInfo());
+}
+
+void SchedulingLatencyVisitorARM::VisitSuspendCheck(HSuspendCheck* instruction) {
+ HBasicBlock* block = instruction->GetBlock();
+ DCHECK((block->GetLoopInformation() != nullptr) ||
+ (block->IsEntryBlock() && instruction->GetNext()->IsGoto()));
+ // Users do not use any data results.
+ last_visited_latency_ = 0;
+}
+
+void SchedulingLatencyVisitorARM::VisitTypeConversion(HTypeConversion* instr) {
+ Primitive::Type result_type = instr->GetResultType();
+ Primitive::Type input_type = instr->GetInputType();
+
+ switch (result_type) {
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ last_visited_latency_ = kArmIntegerOpLatency; // SBFX or UBFX
+ break;
+
+ case Primitive::kPrimInt:
+ switch (input_type) {
+ case Primitive::kPrimLong:
+ last_visited_latency_ = kArmIntegerOpLatency; // MOV
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ last_visited_internal_latency_ = kArmTypeConversionFloatingPointIntegerLatency;
+ last_visited_latency_ = kArmFloatingPointOpLatency;
+ break;
+ default:
+ last_visited_latency_ = kArmIntegerOpLatency;
+ break;
+ }
+ break;
+
+ case Primitive::kPrimLong:
+ switch (input_type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ // MOV and extension
+ last_visited_internal_latency_ = kArmIntegerOpLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ // invokes runtime
+ last_visited_internal_latency_ = kArmCallInternalLatency;
+ break;
+ default:
+ last_visited_internal_latency_ = kArmIntegerOpLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+ break;
+ }
+ break;
+
+ case Primitive::kPrimFloat:
+ switch (input_type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ last_visited_internal_latency_ = kArmTypeConversionFloatingPointIntegerLatency;
+ last_visited_latency_ = kArmFloatingPointOpLatency;
+ break;
+ case Primitive::kPrimLong:
+ // invokes runtime
+ last_visited_internal_latency_ = kArmCallInternalLatency;
+ break;
+ case Primitive::kPrimDouble:
+ last_visited_latency_ = kArmFloatingPointOpLatency;
+ break;
+ default:
+ last_visited_latency_ = kArmFloatingPointOpLatency;
+ break;
+ }
+ break;
+
+ case Primitive::kPrimDouble:
+ switch (input_type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ last_visited_internal_latency_ = kArmTypeConversionFloatingPointIntegerLatency;
+ last_visited_latency_ = kArmFloatingPointOpLatency;
+ break;
+ case Primitive::kPrimLong:
+ last_visited_internal_latency_ = 5 * kArmFloatingPointOpLatency;
+ last_visited_latency_ = kArmFloatingPointOpLatency;
+ break;
+ case Primitive::kPrimFloat:
+ last_visited_latency_ = kArmFloatingPointOpLatency;
+ break;
+ default:
+ last_visited_latency_ = kArmFloatingPointOpLatency;
+ break;
+ }
+ break;
+
+ default:
+ last_visited_latency_ = kArmTypeConversionFloatingPointIntegerLatency;
+ break;
+ }
+}
+
+void SchedulingLatencyVisitorARM::VisitArmDexCacheArraysBase(art::HArmDexCacheArraysBase*) {
+ last_visited_internal_latency_ = kArmIntegerOpLatency;
+ last_visited_latency_ = kArmIntegerOpLatency;
+}
+
+} // namespace arm
+} // namespace art
diff --git a/compiler/optimizing/scheduler_arm.h b/compiler/optimizing/scheduler_arm.h
new file mode 100644
index 0000000000..8d5e4f375b
--- /dev/null
+++ b/compiler/optimizing/scheduler_arm.h
@@ -0,0 +1,158 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_
+#define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_
+
+#include "code_generator_arm_vixl.h"
+#include "scheduler.h"
+
+namespace art {
+namespace arm {
+#ifdef ART_USE_OLD_ARM_BACKEND
+typedef CodeGeneratorARM CodeGeneratorARMType;
+#else
+typedef CodeGeneratorARMVIXL CodeGeneratorARMType;
+#endif
+
+// AArch32 instruction latencies.
+// We currently assume that all ARM CPUs share the same instruction latency list.
+// The following latencies were tuned based on performance experiments and
+// automatic tuning using differential evolution approach on various benchmarks.
+static constexpr uint32_t kArmIntegerOpLatency = 2;
+static constexpr uint32_t kArmFloatingPointOpLatency = 11;
+static constexpr uint32_t kArmDataProcWithShifterOpLatency = 4;
+static constexpr uint32_t kArmMulIntegerLatency = 6;
+static constexpr uint32_t kArmMulFloatingPointLatency = 11;
+static constexpr uint32_t kArmDivIntegerLatency = 10;
+static constexpr uint32_t kArmDivFloatLatency = 20;
+static constexpr uint32_t kArmDivDoubleLatency = 25;
+static constexpr uint32_t kArmTypeConversionFloatingPointIntegerLatency = 11;
+static constexpr uint32_t kArmMemoryLoadLatency = 9;
+static constexpr uint32_t kArmMemoryStoreLatency = 9;
+static constexpr uint32_t kArmMemoryBarrierLatency = 6;
+static constexpr uint32_t kArmBranchLatency = 4;
+static constexpr uint32_t kArmCallLatency = 5;
+static constexpr uint32_t kArmCallInternalLatency = 29;
+static constexpr uint32_t kArmLoadStringInternalLatency = 10;
+static constexpr uint32_t kArmNopLatency = 2;
+static constexpr uint32_t kArmLoadWithBakerReadBarrierLatency = 18;
+static constexpr uint32_t kArmRuntimeTypeCheckLatency = 46;
+
+class SchedulingLatencyVisitorARM : public SchedulingLatencyVisitor {
+ public:
+ explicit SchedulingLatencyVisitorARM(CodeGenerator* codegen)
+ : codegen_(down_cast<CodeGeneratorARMType*>(codegen)) {}
+
+ // Default visitor for instructions not handled specifically below.
+ void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) {
+ last_visited_latency_ = kArmIntegerOpLatency;
+ }
+
+// We add a second unused parameter to be able to use this macro like the others
+// defined in `nodes.h`.
+#define FOR_EACH_SCHEDULED_ARM_INSTRUCTION(M) \
+ M(ArrayGet , unused) \
+ M(ArrayLength , unused) \
+ M(ArraySet , unused) \
+ M(Add , unused) \
+ M(Sub , unused) \
+ M(And , unused) \
+ M(Or , unused) \
+ M(Ror , unused) \
+ M(Xor , unused) \
+ M(Shl , unused) \
+ M(Shr , unused) \
+ M(UShr , unused) \
+ M(Mul , unused) \
+ M(Div , unused) \
+ M(Condition , unused) \
+ M(Compare , unused) \
+ M(BoundsCheck , unused) \
+ M(InstanceFieldGet , unused) \
+ M(InstanceFieldSet , unused) \
+ M(InstanceOf , unused) \
+ M(Invoke , unused) \
+ M(LoadString , unused) \
+ M(NewArray , unused) \
+ M(NewInstance , unused) \
+ M(Rem , unused) \
+ M(StaticFieldGet , unused) \
+ M(StaticFieldSet , unused) \
+ M(SuspendCheck , unused) \
+ M(TypeConversion , unused)
+
+#define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \
+ M(BitwiseNegatedRight, unused) \
+ M(MultiplyAccumulate, unused) \
+ M(IntermediateAddress, unused) \
+ M(DataProcWithShifterOp, unused)
+
+#define DECLARE_VISIT_INSTRUCTION(type, unused) \
+ void Visit##type(H##type* instruction) OVERRIDE;
+
+ FOR_EACH_SCHEDULED_ARM_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
+ FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
+ FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION)
+
+#undef DECLARE_VISIT_INSTRUCTION
+
+ private:
+ void HandleBinaryOperationLantencies(HBinaryOperation* instr);
+ void HandleBitwiseOperationLantencies(HBinaryOperation* instr);
+ void HandleShiftLatencies(HBinaryOperation* instr);
+ void HandleDivRemConstantIntegralLatencies(int32_t imm);
+ void HandleFieldSetLatencies(HInstruction* instruction, const FieldInfo& field_info);
+ void HandleFieldGetLatencies(HInstruction* instruction, const FieldInfo& field_info);
+ void HandleGenerateDataProcInstruction(bool internal_latency = false);
+ void HandleGenerateDataProc(HDataProcWithShifterOp* instruction);
+ void HandleGenerateLongDataProc(HDataProcWithShifterOp* instruction);
+
+ // The latency setting for each HInstruction depends on how CodeGenerator may generate code,
+ // latency visitors may query CodeGenerator for such information for accurate latency settings.
+ CodeGeneratorARMType* codegen_;
+};
+
+class HSchedulerARM : public HScheduler {
+ public:
+ HSchedulerARM(ArenaAllocator* arena,
+ SchedulingNodeSelector* selector,
+ SchedulingLatencyVisitorARM* arm_latency_visitor)
+ : HScheduler(arena, arm_latency_visitor, selector) {}
+ ~HSchedulerARM() OVERRIDE {}
+
+ bool IsSchedulable(const HInstruction* instruction) const OVERRIDE {
+#define CASE_INSTRUCTION_KIND(type, unused) case \
+ HInstruction::InstructionKind::k##type:
+ switch (instruction->GetKind()) {
+ FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(CASE_INSTRUCTION_KIND)
+ return true;
+ FOR_EACH_CONCRETE_INSTRUCTION_ARM(CASE_INSTRUCTION_KIND)
+ return true;
+ default:
+ return HScheduler::IsSchedulable(instruction);
+ }
+#undef CASE_INSTRUCTION_KIND
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HSchedulerARM);
+};
+
+} // namespace arm
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_
diff --git a/compiler/optimizing/scheduler_test.cc b/compiler/optimizing/scheduler_test.cc
index 31d13e2a26..d87600aa5e 100644
--- a/compiler/optimizing/scheduler_test.cc
+++ b/compiler/optimizing/scheduler_test.cc
@@ -28,6 +28,10 @@
#include "scheduler_arm64.h"
#endif
+#ifdef ART_ENABLE_CODEGEN_arm
+#include "scheduler_arm.h"
+#endif
+
namespace art {
// Return all combinations of ISA and code generator that are executable on
@@ -56,7 +60,7 @@ static ::std::vector<CodegenTargetConfig> GetTargetConfigs() {
#endif
};
- for (auto test_config : test_config_candidates) {
+ for (const CodegenTargetConfig& test_config : test_config_candidates) {
if (CanExecute(test_config.GetInstructionSet())) {
v.push_back(test_config);
}
@@ -65,133 +69,151 @@ static ::std::vector<CodegenTargetConfig> GetTargetConfigs() {
return v;
}
-class SchedulerTest : public CommonCompilerTest {};
-
-#ifdef ART_ENABLE_CODEGEN_arm64
-TEST_F(SchedulerTest, DependencyGraph) {
- ArenaPool pool;
- ArenaAllocator allocator(&pool);
- HGraph* graph = CreateGraph(&allocator);
- HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
- HBasicBlock* block1 = new (&allocator) HBasicBlock(graph);
- graph->AddBlock(entry);
- graph->AddBlock(block1);
- graph->SetEntryBlock(entry);
-
- // entry:
- // array ParameterValue
- // c1 IntConstant
- // c2 IntConstant
- // block1:
- // add1 Add [c1, c2]
- // add2 Add [add1, c2]
- // mul Mul [add1, add2]
- // div_check DivZeroCheck [add2] (env: add2, mul)
- // div Div [add1, div_check]
- // array_get1 ArrayGet [array, add1]
- // array_set1 ArraySet [array, add1, add2]
- // array_get2 ArrayGet [array, add1]
- // array_set2 ArraySet [array, add1, add2]
-
- HInstruction* array = new (&allocator) HParameterValue(graph->GetDexFile(),
- dex::TypeIndex(0),
- 0,
- Primitive::kPrimNot);
- HInstruction* c1 = graph->GetIntConstant(1);
- HInstruction* c2 = graph->GetIntConstant(10);
- HInstruction* add1 = new (&allocator) HAdd(Primitive::kPrimInt, c1, c2);
- HInstruction* add2 = new (&allocator) HAdd(Primitive::kPrimInt, add1, c2);
- HInstruction* mul = new (&allocator) HMul(Primitive::kPrimInt, add1, add2);
- HInstruction* div_check = new (&allocator) HDivZeroCheck(add2, 0);
- HInstruction* div = new (&allocator) HDiv(Primitive::kPrimInt, add1, div_check, 0);
- HInstruction* array_get1 = new (&allocator) HArrayGet(array, add1, Primitive::kPrimInt, 0);
- HInstruction* array_set1 = new (&allocator) HArraySet(array, add1, add2, Primitive::kPrimInt, 0);
- HInstruction* array_get2 = new (&allocator) HArrayGet(array, add1, Primitive::kPrimInt, 0);
- HInstruction* array_set2 = new (&allocator) HArraySet(array, add1, add2, Primitive::kPrimInt, 0);
-
- DCHECK(div_check->CanThrow());
-
- entry->AddInstruction(array);
-
- HInstruction* block_instructions[] = {add1,
- add2,
- mul,
- div_check,
- div,
- array_get1,
- array_set1,
- array_get2,
- array_set2};
- for (auto instr : block_instructions) {
- block1->AddInstruction(instr);
+class SchedulerTest : public CommonCompilerTest {
+ public:
+ SchedulerTest() : pool_(), allocator_(&pool_) {
+ graph_ = CreateGraph(&allocator_);
}
- HEnvironment* environment = new (&allocator) HEnvironment(&allocator,
- 2,
- graph->GetArtMethod(),
+ // Build scheduling graph, and run target specific scheduling on it.
+ void TestBuildDependencyGraphAndSchedule(HScheduler* scheduler) {
+ HBasicBlock* entry = new (&allocator_) HBasicBlock(graph_);
+ HBasicBlock* block1 = new (&allocator_) HBasicBlock(graph_);
+ graph_->AddBlock(entry);
+ graph_->AddBlock(block1);
+ graph_->SetEntryBlock(entry);
+
+ // entry:
+ // array ParameterValue
+ // c1 IntConstant
+ // c2 IntConstant
+ // block1:
+ // add1 Add [c1, c2]
+ // add2 Add [add1, c2]
+ // mul Mul [add1, add2]
+ // div_check DivZeroCheck [add2] (env: add2, mul)
+ // div Div [add1, div_check]
+ // array_get1 ArrayGet [array, add1]
+ // array_set1 ArraySet [array, add1, add2]
+ // array_get2 ArrayGet [array, add1]
+ // array_set2 ArraySet [array, add1, add2]
+
+ HInstruction* array = new (&allocator_) HParameterValue(graph_->GetDexFile(),
+ dex::TypeIndex(0),
0,
- div_check);
- div_check->SetRawEnvironment(environment);
- environment->SetRawEnvAt(0, add2);
- add2->AddEnvUseAt(div_check->GetEnvironment(), 0);
- environment->SetRawEnvAt(1, mul);
- mul->AddEnvUseAt(div_check->GetEnvironment(), 1);
-
- ArenaAllocator* arena = graph->GetArena();
- CriticalPathSchedulingNodeSelector critical_path_selector;
- arm64::HSchedulerARM64 scheduler(arena, &critical_path_selector);
- SchedulingGraph scheduling_graph(&scheduler, arena);
- // Instructions must be inserted in reverse order into the scheduling graph.
- for (auto instr : ReverseRange(block_instructions)) {
- scheduling_graph.AddNode(instr);
+ Primitive::kPrimNot);
+ HInstruction* c1 = graph_->GetIntConstant(1);
+ HInstruction* c2 = graph_->GetIntConstant(10);
+ HInstruction* add1 = new (&allocator_) HAdd(Primitive::kPrimInt, c1, c2);
+ HInstruction* add2 = new (&allocator_) HAdd(Primitive::kPrimInt, add1, c2);
+ HInstruction* mul = new (&allocator_) HMul(Primitive::kPrimInt, add1, add2);
+ HInstruction* div_check = new (&allocator_) HDivZeroCheck(add2, 0);
+ HInstruction* div = new (&allocator_) HDiv(Primitive::kPrimInt, add1, div_check, 0);
+ HInstruction* array_get1 = new (&allocator_) HArrayGet(array, add1, Primitive::kPrimInt, 0);
+ HInstruction* array_set1 = new (&allocator_) HArraySet(array, add1, add2, Primitive::kPrimInt, 0);
+ HInstruction* array_get2 = new (&allocator_) HArrayGet(array, add1, Primitive::kPrimInt, 0);
+ HInstruction* array_set2 = new (&allocator_) HArraySet(array, add1, add2, Primitive::kPrimInt, 0);
+
+ DCHECK(div_check->CanThrow());
+
+ entry->AddInstruction(array);
+
+ HInstruction* block_instructions[] = {add1,
+ add2,
+ mul,
+ div_check,
+ div,
+ array_get1,
+ array_set1,
+ array_get2,
+ array_set2};
+ for (HInstruction* instr : block_instructions) {
+ block1->AddInstruction(instr);
+ }
+
+ HEnvironment* environment = new (&allocator_) HEnvironment(&allocator_,
+ 2,
+ graph_->GetArtMethod(),
+ 0,
+ div_check);
+ div_check->SetRawEnvironment(environment);
+ environment->SetRawEnvAt(0, add2);
+ add2->AddEnvUseAt(div_check->GetEnvironment(), 0);
+ environment->SetRawEnvAt(1, mul);
+ mul->AddEnvUseAt(div_check->GetEnvironment(), 1);
+
+ SchedulingGraph scheduling_graph(scheduler, graph_->GetArena());
+ // Instructions must be inserted in reverse order into the scheduling graph.
+ for (HInstruction* instr : ReverseRange(block_instructions)) {
+ scheduling_graph.AddNode(instr);
+ }
+
+ // Should not have dependencies cross basic blocks.
+ ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(add1, c1));
+ ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(add2, c2));
+
+ // Define-use dependency.
+ ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(add2, add1));
+ ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(add1, add2));
+ ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(div_check, add2));
+ ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(div_check, add1));
+ ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(div, div_check));
+ ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(array_set1, add1));
+ ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(array_set1, add2));
+
+ // Read and write dependencies
+ ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set1, array_get1));
+ ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set2, array_get2));
+ ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_get2, array_set1));
+ ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set2, array_set1));
+
+ // Env dependency.
+ ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(div_check, mul));
+ ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(mul, div_check));
+
+ // CanThrow.
+ ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set1, div_check));
+
+ // Exercise the code path of target specific scheduler and SchedulingLatencyVisitor.
+ scheduler->Schedule(graph_);
}
- // Should not have dependencies cross basic blocks.
- ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(add1, c1));
- ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(add2, c2));
-
- // Define-use dependency.
- ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(add2, add1));
- ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(add1, add2));
- ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(div_check, add2));
- ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(div_check, add1));
- ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(div, div_check));
- ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(array_set1, add1));
- ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(array_set1, add2));
-
- // Read and write dependencies
- ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set1, array_get1));
- ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set2, array_get2));
- ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_get2, array_set1));
- ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set2, array_set1));
-
- // Env dependency.
- ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(div_check, mul));
- ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(mul, div_check));
-
- // CanThrow.
- ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set1, div_check));
+ void CompileWithRandomSchedulerAndRun(const uint16_t* data, bool has_result, int expected) {
+ for (CodegenTargetConfig target_config : GetTargetConfigs()) {
+ HGraph* graph = CreateCFG(&allocator_, data);
+
+ // Schedule the graph randomly.
+ HInstructionScheduling scheduling(graph, target_config.GetInstructionSet());
+ scheduling.Run(/*only_optimize_loop_blocks*/ false, /*schedule_randomly*/ true);
+
+ RunCode(target_config,
+ graph,
+ [](HGraph* graph_arg) { RemoveSuspendChecks(graph_arg); },
+ has_result, expected);
+ }
+ }
+
+ ArenaPool pool_;
+ ArenaAllocator allocator_;
+ HGraph* graph_;
+};
+
+#if defined(ART_ENABLE_CODEGEN_arm64)
+TEST_F(SchedulerTest, DependencyGraphAndSchedulerARM64) {
+ CriticalPathSchedulingNodeSelector critical_path_selector;
+ arm64::HSchedulerARM64 scheduler(&allocator_, &critical_path_selector);
+ TestBuildDependencyGraphAndSchedule(&scheduler);
}
#endif
-static void CompileWithRandomSchedulerAndRun(const uint16_t* data,
- bool has_result,
- int expected) {
- for (CodegenTargetConfig target_config : GetTargetConfigs()) {
- ArenaPool pool;
- ArenaAllocator arena(&pool);
- HGraph* graph = CreateCFG(&arena, data);
-
- // Schedule the graph randomly.
- HInstructionScheduling scheduling(graph, target_config.GetInstructionSet());
- scheduling.Run(/*only_optimize_loop_blocks*/ false, /*schedule_randomly*/ true);
-
- RunCode(target_config,
- graph,
- [](HGraph* graph_arg) { RemoveSuspendChecks(graph_arg); },
- has_result, expected);
- }
+#if defined(ART_ENABLE_CODEGEN_arm)
+TEST_F(SchedulerTest, DependencyGrapAndSchedulerARM) {
+ CriticalPathSchedulingNodeSelector critical_path_selector;
+ arm::SchedulingLatencyVisitorARM arm_latency_visitor(/*CodeGenerator*/ nullptr);
+ arm::HSchedulerARM scheduler(&allocator_, &critical_path_selector, &arm_latency_visitor);
+ TestBuildDependencyGraphAndSchedule(&scheduler);
}
+#endif
TEST_F(SchedulerTest, RandomScheduling) {
//
diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc
index eedaf6e67e..98ded24257 100644
--- a/compiler/optimizing/sharpening.cc
+++ b/compiler/optimizing/sharpening.cc
@@ -56,7 +56,7 @@ static bool IsInBootImage(ArtMethod* method) {
const std::vector<gc::space::ImageSpace*>& image_spaces =
Runtime::Current()->GetHeap()->GetBootImageSpaces();
for (gc::space::ImageSpace* image_space : image_spaces) {
- const auto& method_section = image_space->GetImageHeader().GetMethodsSection();
+ const ImageSection& method_section = image_space->GetImageHeader().GetMethodsSection();
if (method_section.Contains(reinterpret_cast<uint8_t*>(method) - image_space->Begin())) {
return true;
}
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index 7972931c31..c03b98c5c2 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -2822,6 +2822,94 @@ void Mips64Assembler::AdjustBaseAndOffset(GpuRegister& base,
CHECK_EQ(misalignment, offset & (kMips64DoublewordSize - 1));
}
+void Mips64Assembler::AdjustBaseOffsetAndElementSizeShift(GpuRegister& base,
+ int32_t& offset,
+ int& element_size_shift) {
+ // This method is used to adjust the base register, offset and element_size_shift
+ // for a vector load/store when the offset doesn't fit into allowed number of bits.
+ // MSA ld.df and st.df instructions take signed offsets as arguments, but maximum
+ // offset is dependant on the size of the data format df (10-bit offsets for ld.b,
+ // 11-bit for ld.h, 12-bit for ld.w and 13-bit for ld.d).
+ // If element_size_shift is non-negative at entry, it won't be changed, but offset
+ // will be checked for appropriate alignment. If negative at entry, it will be
+ // adjusted based on offset for maximum fit.
+ // It's assumed that `base` is a multiple of 8.
+
+ CHECK_NE(base, AT); // Must not overwrite the register `base` while loading `offset`.
+
+ if (element_size_shift >= 0) {
+ CHECK_LE(element_size_shift, TIMES_8);
+ CHECK_GE(JAVASTYLE_CTZ(offset), element_size_shift);
+ } else if (IsAligned<kMips64DoublewordSize>(offset)) {
+ element_size_shift = TIMES_8;
+ } else if (IsAligned<kMips64WordSize>(offset)) {
+ element_size_shift = TIMES_4;
+ } else if (IsAligned<kMips64HalfwordSize>(offset)) {
+ element_size_shift = TIMES_2;
+ } else {
+ element_size_shift = TIMES_1;
+ }
+
+ const int low_len = 10 + element_size_shift; // How many low bits of `offset` ld.df/st.df
+ // will take.
+ int16_t low = offset & ((1 << low_len) - 1); // Isolate these bits.
+ low -= (low & (1 << (low_len - 1))) << 1; // Sign-extend these bits.
+ if (low == offset) {
+ return; // `offset` fits into ld.df/st.df.
+ }
+
+ // First, see if `offset` can be represented as a sum of two signed offsets.
+ // This can save an instruction.
+
+ // Max int16_t that's a multiple of element size.
+ const int32_t kMaxDeltaForSimpleAdjustment = 0x8000 - (1 << element_size_shift);
+ // Max ld.df/st.df offset that's a multiple of element size.
+ const int32_t kMaxLoadStoreOffset = 0x1ff << element_size_shift;
+ const int32_t kMaxOffsetForSimpleAdjustment = kMaxDeltaForSimpleAdjustment + kMaxLoadStoreOffset;
+
+ if (IsInt<16>(offset)) {
+ Daddiu(AT, base, offset);
+ offset = 0;
+ } else if (0 <= offset && offset <= kMaxOffsetForSimpleAdjustment) {
+ Daddiu(AT, base, kMaxDeltaForSimpleAdjustment);
+ offset -= kMaxDeltaForSimpleAdjustment;
+ } else if (-kMaxOffsetForSimpleAdjustment <= offset && offset < 0) {
+ Daddiu(AT, base, -kMaxDeltaForSimpleAdjustment);
+ offset += kMaxDeltaForSimpleAdjustment;
+ } else {
+ // Let's treat `offset` as 64-bit to simplify handling of sign
+ // extensions in the instructions that supply its smaller signed parts.
+ //
+ // 16-bit or smaller parts of `offset`:
+ // |63 top 48|47 hi 32|31 upper 16|15 mid 13-10|12-9 low 0|
+ //
+ // Instructions that supply each part as a signed integer addend:
+ // |dati |dahi |daui |daddiu |ld.df/st.df |
+ //
+ // `top` is always 0, so dati isn't used.
+ // `hi` is 1 when `offset` is close to +2GB and 0 otherwise.
+ uint64_t tmp = static_cast<uint64_t>(offset) - low; // Exclude `low` from the rest of `offset`
+ // (accounts for sign of `low`).
+ tmp += (tmp & (UINT64_C(1) << 15)) << 1; // Account for sign extension in daddiu.
+ tmp += (tmp & (UINT64_C(1) << 31)) << 1; // Account for sign extension in daui.
+ int16_t mid = Low16Bits(tmp);
+ int16_t upper = High16Bits(tmp);
+ int16_t hi = Low16Bits(High32Bits(tmp));
+ Daui(AT, base, upper);
+ if (hi != 0) {
+ CHECK_EQ(hi, 1);
+ Dahi(AT, hi);
+ }
+ if (mid != 0) {
+ Daddiu(AT, AT, mid);
+ }
+ offset = low;
+ }
+ base = AT;
+ CHECK_GE(JAVASTYLE_CTZ(offset), element_size_shift);
+ CHECK(IsInt<10>(offset >> element_size_shift));
+}
+
void Mips64Assembler::LoadFromOffset(LoadOperandType type,
GpuRegister reg,
GpuRegister base,
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index e824791892..c92cf4c048 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -278,14 +278,16 @@ enum LoadOperandType {
kLoadUnsignedHalfword,
kLoadWord,
kLoadUnsignedWord,
- kLoadDoubleword
+ kLoadDoubleword,
+ kLoadQuadword
};
enum StoreOperandType {
kStoreByte,
kStoreHalfword,
kStoreWord,
- kStoreDoubleword
+ kStoreDoubleword,
+ kStoreQuadword
};
// Used to test the values returned by ClassS/ClassD.
@@ -901,6 +903,10 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer
void EmitLoad(ManagedRegister m_dst, GpuRegister src_register, int32_t src_offset, size_t size);
void AdjustBaseAndOffset(GpuRegister& base, int32_t& offset, bool is_doubleword);
+ // If element_size_shift is negative at entry, its value will be calculated based on the offset.
+ void AdjustBaseOffsetAndElementSizeShift(GpuRegister& base,
+ int32_t& offset,
+ int& element_size_shift);
private:
// This will be used as an argument for loads/stores
@@ -1024,6 +1030,8 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer
null_checker();
}
break;
+ default:
+ LOG(FATAL) << "UNREACHABLE";
}
if (type != kLoadDoubleword) {
null_checker();
@@ -1036,7 +1044,12 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer
GpuRegister base,
int32_t offset,
ImplicitNullChecker null_checker = NoImplicitNullChecker()) {
- AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kLoadDoubleword));
+ int element_size_shift = -1;
+ if (type != kLoadQuadword) {
+ AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kLoadDoubleword));
+ } else {
+ AdjustBaseOffsetAndElementSizeShift(base, offset, element_size_shift);
+ }
switch (type) {
case kLoadWord:
@@ -1056,6 +1069,17 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer
null_checker();
}
break;
+ case kLoadQuadword:
+ switch (element_size_shift) {
+ case TIMES_1: LdB(static_cast<VectorRegister>(reg), base, offset); break;
+ case TIMES_2: LdH(static_cast<VectorRegister>(reg), base, offset); break;
+ case TIMES_4: LdW(static_cast<VectorRegister>(reg), base, offset); break;
+ case TIMES_8: LdD(static_cast<VectorRegister>(reg), base, offset); break;
+ default:
+ LOG(FATAL) << "UNREACHABLE";
+ }
+ null_checker();
+ break;
default:
LOG(FATAL) << "UNREACHABLE";
}
@@ -1109,7 +1133,12 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer
GpuRegister base,
int32_t offset,
ImplicitNullChecker null_checker = NoImplicitNullChecker()) {
- AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kStoreDoubleword));
+ int element_size_shift = -1;
+ if (type != kStoreQuadword) {
+ AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kStoreDoubleword));
+ } else {
+ AdjustBaseOffsetAndElementSizeShift(base, offset, element_size_shift);
+ }
switch (type) {
case kStoreWord:
@@ -1129,6 +1158,17 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer
null_checker();
}
break;
+ case kStoreQuadword:
+ switch (element_size_shift) {
+ case TIMES_1: StB(static_cast<VectorRegister>(reg), base, offset); break;
+ case TIMES_2: StH(static_cast<VectorRegister>(reg), base, offset); break;
+ case TIMES_4: StW(static_cast<VectorRegister>(reg), base, offset); break;
+ case TIMES_8: StD(static_cast<VectorRegister>(reg), base, offset); break;
+ default:
+ LOG(FATAL) << "UNREACHABLE";
+ }
+ null_checker();
+ break;
default:
LOG(FATAL) << "UNREACHABLE";
}
diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc
index 86a8cfe352..fbebe0ce15 100644
--- a/compiler/utils/mips64/assembler_mips64_test.cc
+++ b/compiler/utils/mips64/assembler_mips64_test.cc
@@ -1970,6 +1970,50 @@ TEST_F(AssemblerMIPS64Test, LoadFpuFromOffset) {
__ LoadFpuFromOffset(mips64::kLoadDoubleword, mips64::F0, mips64::A0, -32768);
__ LoadFpuFromOffset(mips64::kLoadDoubleword, mips64::F0, mips64::A0, 0xABCDEF00);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 0);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 1);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 2);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 4);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 8);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 511);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 512);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 513);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 514);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 516);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 1022);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 1024);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 1025);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 1026);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 1028);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 2044);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 2048);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 2049);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 2050);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 2052);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 4088);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 4096);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 4097);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 4098);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 4100);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 4104);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 0x7FFC);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 0x8000);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 0x10000);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 0x12345678);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 0x12350078);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, -256);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, -511);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, -513);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, -1022);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, -1026);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, -2044);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, -2052);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, -4096);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, -4104);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, -32768);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 0xABCDEF00);
+ __ LoadFpuFromOffset(mips64::kLoadQuadword, mips64::F0, mips64::A0, 0x7FFFABCD);
+
const char* expected =
"lwc1 $f0, 0($a0)\n"
"lwc1 $f0, 4($a0)\n"
@@ -2010,7 +2054,78 @@ TEST_F(AssemblerMIPS64Test, LoadFpuFromOffset) {
"ldc1 $f0, -256($a0)\n"
"ldc1 $f0, -32768($a0)\n"
"daui $at, $a0, 0xABCE\n"
- "ldc1 $f0, -0x1100($at) # 0xEF00\n";
+ "ldc1 $f0, -0x1100($at) # 0xEF00\n"
+
+ "ld.d $w0, 0($a0)\n"
+ "ld.b $w0, 1($a0)\n"
+ "ld.h $w0, 2($a0)\n"
+ "ld.w $w0, 4($a0)\n"
+ "ld.d $w0, 8($a0)\n"
+ "ld.b $w0, 511($a0)\n"
+ "ld.d $w0, 512($a0)\n"
+ "daddiu $at, $a0, 513\n"
+ "ld.b $w0, 0($at)\n"
+ "ld.h $w0, 514($a0)\n"
+ "ld.w $w0, 516($a0)\n"
+ "ld.h $w0, 1022($a0)\n"
+ "ld.d $w0, 1024($a0)\n"
+ "daddiu $at, $a0, 1025\n"
+ "ld.b $w0, 0($at)\n"
+ "daddiu $at, $a0, 1026\n"
+ "ld.h $w0, 0($at)\n"
+ "ld.w $w0, 1028($a0)\n"
+ "ld.w $w0, 2044($a0)\n"
+ "ld.d $w0, 2048($a0)\n"
+ "daddiu $at, $a0, 2049\n"
+ "ld.b $w0, 0($at)\n"
+ "daddiu $at, $a0, 2050\n"
+ "ld.h $w0, 0($at)\n"
+ "daddiu $at, $a0, 2052\n"
+ "ld.w $w0, 0($at)\n"
+ "ld.d $w0, 4088($a0)\n"
+ "daddiu $at, $a0, 4096\n"
+ "ld.d $w0, 0($at)\n"
+ "daddiu $at, $a0, 4097\n"
+ "ld.b $w0, 0($at)\n"
+ "daddiu $at, $a0, 4098\n"
+ "ld.h $w0, 0($at)\n"
+ "daddiu $at, $a0, 4100\n"
+ "ld.w $w0, 0($at)\n"
+ "daddiu $at, $a0, 4104\n"
+ "ld.d $w0, 0($at)\n"
+ "daddiu $at, $a0, 0x7FFC\n"
+ "ld.w $w0, 0($at)\n"
+ "daddiu $at, $a0, 0x7FF8\n"
+ "ld.d $w0, 8($at)\n"
+ "daui $at, $a0, 0x1\n"
+ "ld.d $w0, 0($at)\n"
+ "daui $at, $a0, 0x1234\n"
+ "daddiu $at, $at, 0x6000\n"
+ "ld.d $w0, -2440($at) # 0xF678\n"
+ "daui $at, $a0, 0x1235\n"
+ "ld.d $w0, 0x78($at)\n"
+ "ld.d $w0, -256($a0)\n"
+ "ld.b $w0, -511($a0)\n"
+ "daddiu $at, $a0, -513\n"
+ "ld.b $w0, 0($at)\n"
+ "ld.h $w0, -1022($a0)\n"
+ "daddiu $at, $a0, -1026\n"
+ "ld.h $w0, 0($at)\n"
+ "ld.w $w0, -2044($a0)\n"
+ "daddiu $at, $a0, -2052\n"
+ "ld.w $w0, 0($at)\n"
+ "ld.d $w0, -4096($a0)\n"
+ "daddiu $at, $a0, -4104\n"
+ "ld.d $w0, 0($at)\n"
+ "daddiu $at, $a0, -32768\n"
+ "ld.d $w0, 0($at)\n"
+ "daui $at, $a0, 0xABCE\n"
+ "daddiu $at, $at, -8192 # 0xE000\n"
+ "ld.d $w0, 0xF00($at)\n"
+ "daui $at, $a0, 0x8000\n"
+ "dahi $at, $at, 1\n"
+ "daddiu $at, $at, -21504 # 0xAC00\n"
+ "ld.b $w0, -51($at) # 0xFFCD\n";
DriverStr(expected, "LoadFpuFromOffset");
}
@@ -2200,6 +2315,50 @@ TEST_F(AssemblerMIPS64Test, StoreFpuToOffset) {
__ StoreFpuToOffset(mips64::kStoreDoubleword, mips64::F0, mips64::A0, -32768);
__ StoreFpuToOffset(mips64::kStoreDoubleword, mips64::F0, mips64::A0, 0xABCDEF00);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 0);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 1);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 2);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 4);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 8);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 511);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 512);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 513);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 514);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 516);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 1022);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 1024);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 1025);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 1026);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 1028);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 2044);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 2048);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 2049);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 2050);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 2052);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 4088);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 4096);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 4097);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 4098);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 4100);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 4104);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 0x7FFC);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 0x8000);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 0x10000);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 0x12345678);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 0x12350078);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, -256);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, -511);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, -513);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, -1022);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, -1026);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, -2044);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, -2052);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, -4096);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, -4104);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, -32768);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 0xABCDEF00);
+ __ StoreFpuToOffset(mips64::kStoreQuadword, mips64::F0, mips64::A0, 0x7FFFABCD);
+
const char* expected =
"swc1 $f0, 0($a0)\n"
"swc1 $f0, 4($a0)\n"
@@ -2240,7 +2399,78 @@ TEST_F(AssemblerMIPS64Test, StoreFpuToOffset) {
"sdc1 $f0, -256($a0)\n"
"sdc1 $f0, -32768($a0)\n"
"daui $at, $a0, 0xABCE\n"
- "sdc1 $f0, -0x1100($at)\n";
+ "sdc1 $f0, -0x1100($at)\n"
+
+ "st.d $w0, 0($a0)\n"
+ "st.b $w0, 1($a0)\n"
+ "st.h $w0, 2($a0)\n"
+ "st.w $w0, 4($a0)\n"
+ "st.d $w0, 8($a0)\n"
+ "st.b $w0, 511($a0)\n"
+ "st.d $w0, 512($a0)\n"
+ "daddiu $at, $a0, 513\n"
+ "st.b $w0, 0($at)\n"
+ "st.h $w0, 514($a0)\n"
+ "st.w $w0, 516($a0)\n"
+ "st.h $w0, 1022($a0)\n"
+ "st.d $w0, 1024($a0)\n"
+ "daddiu $at, $a0, 1025\n"
+ "st.b $w0, 0($at)\n"
+ "daddiu $at, $a0, 1026\n"
+ "st.h $w0, 0($at)\n"
+ "st.w $w0, 1028($a0)\n"
+ "st.w $w0, 2044($a0)\n"
+ "st.d $w0, 2048($a0)\n"
+ "daddiu $at, $a0, 2049\n"
+ "st.b $w0, 0($at)\n"
+ "daddiu $at, $a0, 2050\n"
+ "st.h $w0, 0($at)\n"
+ "daddiu $at, $a0, 2052\n"
+ "st.w $w0, 0($at)\n"
+ "st.d $w0, 4088($a0)\n"
+ "daddiu $at, $a0, 4096\n"
+ "st.d $w0, 0($at)\n"
+ "daddiu $at, $a0, 4097\n"
+ "st.b $w0, 0($at)\n"
+ "daddiu $at, $a0, 4098\n"
+ "st.h $w0, 0($at)\n"
+ "daddiu $at, $a0, 4100\n"
+ "st.w $w0, 0($at)\n"
+ "daddiu $at, $a0, 4104\n"
+ "st.d $w0, 0($at)\n"
+ "daddiu $at, $a0, 0x7FFC\n"
+ "st.w $w0, 0($at)\n"
+ "daddiu $at, $a0, 0x7FF8\n"
+ "st.d $w0, 8($at)\n"
+ "daui $at, $a0, 0x1\n"
+ "st.d $w0, 0($at)\n"
+ "daui $at, $a0, 0x1234\n"
+ "daddiu $at, $at, 0x6000\n"
+ "st.d $w0, -2440($at) # 0xF678\n"
+ "daui $at, $a0, 0x1235\n"
+ "st.d $w0, 0x78($at)\n"
+ "st.d $w0, -256($a0)\n"
+ "st.b $w0, -511($a0)\n"
+ "daddiu $at, $a0, -513\n"
+ "st.b $w0, 0($at)\n"
+ "st.h $w0, -1022($a0)\n"
+ "daddiu $at, $a0, -1026\n"
+ "st.h $w0, 0($at)\n"
+ "st.w $w0, -2044($a0)\n"
+ "daddiu $at, $a0, -2052\n"
+ "st.w $w0, 0($at)\n"
+ "st.d $w0, -4096($a0)\n"
+ "daddiu $at, $a0, -4104\n"
+ "st.d $w0, 0($at)\n"
+ "daddiu $at, $a0, -32768\n"
+ "st.d $w0, 0($at)\n"
+ "daui $at, $a0, 0xABCE\n"
+ "daddiu $at, $at, -8192 # 0xE000\n"
+ "st.d $w0, 0xF00($at)\n"
+ "daui $at, $a0, 0x8000\n"
+ "dahi $at, $at, 1\n"
+ "daddiu $at, $at, -21504 # 0xAC00\n"
+ "st.b $w0, -51($at) # 0xFFCD\n";
DriverStr(expected, "StoreFpuToOffset");
}